Repository: snipsco/tract Branch: main Commit: d5e7f043c6d3 Files: 1571 Total size: 10.3 MB Directory structure: gitextract_37zz2va9/ ├── .all_crates.sh ├── .change_crate_dep.sh ├── .clang-format ├── .github/ │ ├── dependabot.yml │ └── workflows/ │ ├── asan.yml │ ├── binaries.yml │ ├── cost_model.yml │ ├── crates.yml │ ├── cross-platform.yml │ ├── examples.yml │ ├── full.yml │ ├── large_models.yml │ ├── pydoc.yml │ ├── release.yml │ ├── tract-ci-bench.yml │ ├── wheels.yml │ └── windows.yml ├── .gitignore ├── .travis/ │ ├── README.md │ ├── android-ndk.sh │ ├── asan.sh │ ├── bundle-entrypoint.sh │ ├── cache_file.sh │ ├── cargo-deny-check.sh │ ├── ci-system-setup.sh │ ├── cli-tests.sh │ ├── cost_model_task_build.sh │ ├── cross.sh │ ├── debug-tests.sh │ ├── docker-debian-stretch/ │ │ ├── Dockerfile │ │ └── sources.list │ ├── examples.sh │ ├── llm-expectations-541 │ ├── make_bundle.sh │ ├── minion.sh │ ├── minionrc │ ├── native.sh │ ├── onnx-tests.sh │ ├── regular-tests.sh │ ├── run-bundle.sh │ ├── run_all.sh │ ├── setup-sccache.sh │ ├── test-harness.sh │ ├── test-llm.sh │ ├── test-published-crates.sh │ ├── test-rt.sh │ ├── tf.sh │ ├── tflite/ │ │ ├── Dockerfile.tensorflow-aarch64 │ │ ├── Dockerfile.tensorflow-official-rpi │ │ ├── Dockerfile.tensorflow-rpitools │ │ ├── build_tflite_aarch64.sh │ │ ├── build_tflite_raspbian.sh │ │ ├── convert_all.sh │ │ ├── linux_makefile.inc │ │ └── run_all.sh │ ├── tflite.sh │ └── travis.sh ├── .travis.yml ├── .vim/ │ └── coc-settings.json ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── api/ │ ├── .gitignore │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── c/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── grace_hopper_3_224_224.f32.raw │ │ └── mobilenet.c │ ├── ffi/ │ │ ├── Cargo.toml │ │ ├── cbindgen.toml │ │ └── src/ │ │ └── lib.rs │ ├── generate-tract-h.sh │ ├── proxy/ │ │ ├── Cargo.toml │ │ ├── LICENSE │ │ ├── LICENSE-APACHE │ │ ├── LICENSE-MIT │ │ ├── ci.sh │ │ ├── src/ │ │ │ └── lib.rs │ │ ├── sys/ │ │ │ ├── Cargo.toml │ │ │ ├── build.rs │ │ │ ├── src/ │ │ │ │ └── lib.rs │ │ │ └── tract.h │ │ └── tests/ │ │ └── mobilenet.rs │ ├── py/ │ │ ├── .gitignore │ │ ├── MANIFEST.in │ │ ├── _static/ │ │ │ ├── redirect-index.html │ │ │ └── version-switcher.js │ │ ├── conf.py │ │ ├── docs/ │ │ │ ├── fact.md │ │ │ ├── index.md │ │ │ ├── inference_model.md │ │ │ ├── model.md │ │ │ ├── nnef.md │ │ │ ├── onnx.md │ │ │ ├── runnable.md │ │ │ └── tensor.md │ │ ├── grace_hopper_1x3x224x244.npy │ │ ├── pyproject.toml │ │ ├── requirements-docs.txt │ │ ├── requirements.txt │ │ ├── setup.py │ │ ├── tests/ │ │ │ └── mobilenet_onnx_test.py │ │ └── tract/ │ │ ├── __init__.py │ │ ├── bindings.py │ │ ├── dim.py │ │ ├── fact.py │ │ ├── inference_model.py │ │ ├── model.py │ │ ├── nnef.py │ │ ├── onnx.py │ │ ├── runnable.py │ │ ├── runtime.py │ │ ├── state.py │ │ ├── tensor.py │ │ └── transform.py │ ├── rs/ │ │ ├── Cargo.toml │ │ ├── LICENSE │ │ ├── LICENSE-APACHE │ │ ├── LICENSE-MIT │ │ ├── src/ │ │ │ └── lib.rs │ │ └── tests/ │ │ └── mobilenet.rs │ ├── src/ │ │ ├── lib.rs │ │ ├── macros.rs │ │ └── transform.rs │ └── tests/ │ ├── grace_hopper_3_224_224.f32.raw │ └── mobilenet/ │ └── mod.rs ├── ci/ │ └── tract-ci-minion/ │ ├── .gitignore │ ├── Cargo.toml │ ├── minion.toml.example │ └── src/ │ └── main.rs ├── cli/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ └── src/ │ ├── bench.rs │ ├── compare.rs │ ├── cost.rs │ ├── dump.rs │ ├── hwbench.rs │ ├── llm.rs │ ├── macros.rs │ ├── main.rs │ ├── memory_arena.rs │ ├── model.rs │ ├── params.rs │ ├── plan_options.rs │ ├── run.rs │ ├── runtimes.rs │ ├── tensor.rs │ └── utils.rs ├── core/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── src/ │ │ ├── axes/ │ │ │ ├── mapping.rs │ │ │ ├── mod.rs │ │ │ └── model.rs │ │ ├── broadcast.rs │ │ ├── floats.rs │ │ ├── framework.rs │ │ ├── late_bind.rs │ │ ├── lib.rs │ │ ├── macros.rs │ │ ├── model/ │ │ │ ├── fact.rs │ │ │ ├── graph.rs │ │ │ ├── helpers.rs │ │ │ ├── memory.rs │ │ │ ├── mod.rs │ │ │ ├── node.rs │ │ │ ├── order.rs │ │ │ ├── patch.rs │ │ │ ├── rewriter.rs │ │ │ ├── translator.rs │ │ │ └── typed.rs │ │ ├── ops/ │ │ │ ├── array/ │ │ │ │ ├── broadcast.rs │ │ │ │ ├── concat.rs │ │ │ │ ├── dyn_slice.rs │ │ │ │ ├── gather.rs │ │ │ │ ├── gather_elements.rs │ │ │ │ ├── gather_nd.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── one_hot.rs │ │ │ │ ├── pad.rs │ │ │ │ ├── range.rs │ │ │ │ ├── reshape.rs │ │ │ │ ├── scatter_elements.rs │ │ │ │ ├── scatter_nd.rs │ │ │ │ ├── slice.rs │ │ │ │ ├── strided_slice.rs │ │ │ │ ├── tile.rs │ │ │ │ ├── topk.rs │ │ │ │ └── trilu.rs │ │ │ ├── binary.rs │ │ │ ├── cast.rs │ │ │ ├── change_axes.rs │ │ │ ├── cnn/ │ │ │ │ ├── conv/ │ │ │ │ │ ├── block_quant.rs │ │ │ │ │ ├── conv.rs │ │ │ │ │ ├── depth_wise.rs │ │ │ │ │ ├── im2col.rs │ │ │ │ │ ├── lazy_im2col.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── q_sum_b.rs │ │ │ │ ├── deconv/ │ │ │ │ │ ├── deconv.rs │ │ │ │ │ ├── deconv_sum.rs │ │ │ │ │ └── mod.rs │ │ │ │ ├── maxpool.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── padding.rs │ │ │ │ ├── patch_axis.rs │ │ │ │ ├── patches.rs │ │ │ │ ├── pools.rs │ │ │ │ └── sumpool.rs │ │ │ ├── downsample/ │ │ │ │ ├── array.rs │ │ │ │ ├── conv.rs │ │ │ │ ├── mod.rs │ │ │ │ └── scan.rs │ │ │ ├── dummy.rs │ │ │ ├── einsum/ │ │ │ │ ├── as_blas.rs │ │ │ │ ├── einsum_matmul.rs │ │ │ │ ├── eval.rs │ │ │ │ ├── kernel_selection.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── prefix_matmul.rs │ │ │ │ └── proptest.rs │ │ │ ├── element_wise.rs │ │ │ ├── fft.rs │ │ │ ├── identity.rs │ │ │ ├── konst.rs │ │ │ ├── logic/ │ │ │ │ ├── comparison.rs │ │ │ │ └── ite.rs │ │ │ ├── logic.rs │ │ │ ├── macros.rs │ │ │ ├── math/ │ │ │ │ ├── complex.rs │ │ │ │ └── mod.rs │ │ │ ├── matmul/ │ │ │ │ ├── de_block_quant.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── optimized.rs │ │ │ │ ├── pack.rs │ │ │ │ └── quant.rs │ │ │ ├── memory/ │ │ │ │ ├── force_eval.rs │ │ │ │ ├── load.rs │ │ │ │ ├── mod.rs │ │ │ │ └── store.rs │ │ │ ├── mod.rs │ │ │ ├── nn/ │ │ │ │ ├── data_formats.rs │ │ │ │ ├── gelu_approximate.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── reduce.rs │ │ │ │ ├── rms_norm.rs │ │ │ │ ├── silu.rs │ │ │ │ └── softmax/ │ │ │ │ ├── fixedpoint.rs │ │ │ │ ├── math.rs │ │ │ │ └── mod.rs │ │ │ ├── quant.rs │ │ │ ├── scan/ │ │ │ │ ├── decluttered.rs │ │ │ │ ├── mod.rs │ │ │ │ └── optimized.rs │ │ │ ├── source.rs │ │ │ ├── submodel.rs │ │ │ └── unimpl.rs │ │ ├── optim/ │ │ │ ├── change_axes.rs │ │ │ ├── concat_then_einsum.rs │ │ │ ├── mod.rs │ │ │ ├── op_optim.rs │ │ │ ├── prop_const.rs │ │ │ ├── propagate_roi.rs │ │ │ ├── push_split_down.rs │ │ │ ├── slice.rs │ │ │ └── uniform_mask.rs │ │ ├── plan.rs │ │ ├── runtime.rs │ │ ├── transform.rs │ │ └── value.rs │ └── test_data/ │ └── test_data.cfg ├── cuda/ │ ├── Cargo.toml │ ├── benches/ │ │ └── cuda_flash.rs │ └── src/ │ ├── context.rs │ ├── kernels/ │ │ ├── array/ │ │ │ ├── cast.rs │ │ │ ├── copy.rs │ │ │ ├── dispatch.rs │ │ │ ├── mod.rs │ │ │ └── rotate_half.rs │ │ ├── binary.rs │ │ ├── conv.rs │ │ ├── conv_cudnn.rs │ │ ├── cu/ │ │ │ ├── array.cu │ │ │ ├── binary.cu │ │ │ ├── cnn.cu │ │ │ ├── common.cuh │ │ │ ├── element_wise.cu │ │ │ ├── flash_attn.cu │ │ │ ├── ggml_flash_attn.cu │ │ │ ├── mm_mv.cu │ │ │ ├── mm_mv_q.cu │ │ │ ├── nn.cu │ │ │ └── quantize.cu │ │ ├── element_wise.rs │ │ ├── flash_attn.rs │ │ ├── ggml_flash_attn.rs │ │ ├── iff.rs │ │ ├── launch_args.rs │ │ ├── matmul/ │ │ │ ├── mod.rs │ │ │ └── quant_act_q81.rs │ │ ├── mod.rs │ │ ├── nn/ │ │ │ ├── apply_rope.rs │ │ │ ├── gelu_approximate.rs │ │ │ ├── leaky_relu.rs │ │ │ ├── mod.rs │ │ │ ├── reduce.rs │ │ │ ├── rms_norm.rs │ │ │ ├── scaled_masked_softmax.rs │ │ │ └── softmax.rs │ │ └── utils.rs │ ├── lib.rs │ ├── ops/ │ │ ├── conv.rs │ │ ├── flash_attn.rs │ │ ├── fused_axis_op.rs │ │ ├── gemm.rs │ │ ├── ggml_flash_attn.rs │ │ ├── iff.rs │ │ ├── mod.rs │ │ └── quant_q81.rs │ ├── rewrite_rules/ │ │ ├── add_matmul_broadcast.rs │ │ ├── fuse_axis_op.rs │ │ ├── mod.rs │ │ ├── pad_q40_weights.rs │ │ └── untranspose_matmul_output.rs │ ├── tensor.rs │ ├── transform.rs │ └── utils.rs ├── data/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── benches/ │ │ ├── stack_tensors.rs │ │ └── tensor_from_datum.rs │ └── src/ │ ├── blob.rs │ ├── datum.rs │ ├── dim/ │ │ ├── assertion.rs │ │ ├── mod.rs │ │ ├── parse.rs │ │ ├── resolve.rs │ │ ├── sym.rs │ │ └── tree.rs │ ├── exotic.rs │ ├── lib.rs │ ├── macros.rs │ ├── scatter.rs │ ├── tensor/ │ │ ├── litteral.rs │ │ ├── plain_view.rs │ │ ├── storage.rs │ │ └── view.rs │ └── tensor.rs ├── deny.toml ├── doc/ │ ├── README.md │ ├── cli-recipe.md │ ├── graph.md │ ├── intro.md │ ├── kernel-notes.md │ ├── nnef/ │ │ ├── tract-core.nnef │ │ ├── tract-onnx.nnef │ │ ├── tract-pulse.nnef │ │ └── tract-resource.nnef │ └── op.md ├── examples/ │ ├── .gitignore │ ├── causal_llm/ │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci.sh │ │ ├── scripts/ │ │ │ └── generate_ci_llm_assets.sh │ │ └── src/ │ │ ├── bin/ │ │ │ ├── client.rs │ │ │ ├── common/ │ │ │ │ └── mod.rs │ │ │ ├── complete.rs │ │ │ └── serve.rs │ │ └── lib.rs │ ├── face_detection_yolov8onnx_example/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci.sh │ │ └── src/ │ │ └── main.rs │ ├── face_similarity_arcface_onnx/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci.sh │ │ └── src/ │ │ ├── arc_face.rs │ │ ├── main.rs │ │ └── yolo_face.rs │ ├── keras-tract-tf2/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci.sh.nope │ │ ├── example.py │ │ ├── io.npz │ │ ├── requirements.txt │ │ └── src/ │ │ └── main.rs │ ├── nemo-nemotron-asr/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── ci.sh │ │ ├── nemotron.py │ │ └── src/ │ │ └── main.rs │ ├── nemo-parakeet-asr/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── ci.sh │ │ ├── parakeet.py │ │ └── src/ │ │ └── main.rs │ ├── nnef-dump-mobilenet-v2/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci.sh │ │ └── src/ │ │ └── main.rs │ ├── nnef-mobilenet-v2/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── ci.sh │ │ ├── imagenet_slim_labels.txt │ │ └── src/ │ │ └── main.rs │ ├── nnef-mobilenet-v2-api/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── ci.sh │ │ ├── imagenet_slim_labels.txt │ │ └── src/ │ │ └── main.rs │ ├── onnx-mobilenet-v2/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci.sh │ │ ├── imagenet_slim_labels.txt │ │ └── src/ │ │ ├── bin/ │ │ │ └── dyn-shape.rs │ │ └── main.rs │ ├── pytorch-albert-v2/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci.sh │ │ ├── export.py │ │ └── src/ │ │ └── main.rs │ ├── pytorch-resnet/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci.sh │ │ ├── export.py │ │ ├── requirements.txt │ │ └── src/ │ │ └── main.rs │ ├── stable-diffusion/ │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci-gpu.sh │ │ ├── export.py │ │ ├── reference.py │ │ └── src/ │ │ └── main.rs │ ├── stable-diffusion-3/ │ │ ├── Cargo.toml │ │ ├── export.py │ │ ├── reference.py │ │ ├── runme.sh │ │ └── src/ │ │ └── main.rs │ ├── stable-diffusion-xl/ │ │ ├── Cargo.toml │ │ ├── ci-gpu.sh │ │ ├── export.py │ │ ├── reference.py │ │ └── src/ │ │ └── main.rs │ ├── tensorflow-mobilenet-v2/ │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── ci.sh │ │ ├── imagenet_slim_labels.txt │ │ └── src/ │ │ └── main.rs │ └── tflite-mobilenet-v3/ │ ├── .gitignore │ ├── Cargo.toml │ ├── README.md │ ├── ci.sh │ ├── imagenet_slim_labels.txt │ └── src/ │ └── main.rs ├── extra/ │ ├── Cargo.toml │ └── src/ │ ├── exp_unit_norm.rs │ └── lib.rs ├── gpu/ │ ├── Cargo.toml │ └── src/ │ ├── device.rs │ ├── fact.rs │ ├── lib.rs │ ├── memory/ │ │ ├── mod.rs │ │ ├── pool.rs │ │ └── schema.rs │ ├── ops/ │ │ ├── RECIPE.md │ │ ├── apply_rope.rs │ │ ├── binary.rs │ │ ├── broadcast.rs │ │ ├── cast.rs │ │ ├── change_axes.rs │ │ ├── concat.rs │ │ ├── copy_based.rs │ │ ├── dyn_kv_cache.rs │ │ ├── element_wise.rs │ │ ├── gelu_approximate.rs │ │ ├── iff.rs │ │ ├── leaky_relu.rs │ │ ├── mod.rs │ │ ├── pulse.rs │ │ ├── reduce.rs │ │ ├── rms_norm.rs │ │ ├── rotate_half.rs │ │ ├── scaled_masked_softmax.rs │ │ ├── slice.rs │ │ └── softmax.rs │ ├── rewrite_rules/ │ │ ├── mod.rs │ │ ├── rewire_sdpa.rs │ │ ├── rewire_syncs.rs │ │ └── rms_norm.rs │ ├── session_handler.rs │ ├── sync.rs │ ├── tensor/ │ │ ├── arena_view.rs │ │ ├── mod.rs │ │ └── owned.rs │ └── utils.rs ├── harness/ │ ├── core-proptest-pulse/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── conv_plus_conv.rs │ │ ├── deconv.rs │ │ ├── delay_plus_downsample.rs │ │ ├── delay_plus_pool.rs │ │ ├── einsum.rs │ │ ├── lib.rs │ │ └── pad_plus_conv.rs │ ├── nemotron-speech-streaming-en-0.6b/ │ │ └── ci.sh │ ├── nnef-inceptionv3/ │ │ ├── Cargo.toml │ │ ├── download.sh │ │ └── src/ │ │ └── lib.rs │ ├── nnef-test-cases/ │ │ ├── .gitignore │ │ ├── conv-bias/ │ │ │ ├── expected │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── conv-q40/ │ │ │ ├── conv2d/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_base_kernel1/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_base_kernel3/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_base_kernel9/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_dilation2/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_dilation4/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_dilation8/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_groups2/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_groups4/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_insize128/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_insize64/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── conv_stride2/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ └── conv_stride3/ │ │ │ ├── io.npz │ │ │ ├── model.nnef.tgz │ │ │ └── runme.sh │ │ ├── conv-with-batch/ │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── debox/ │ │ │ ├── debox_base/ │ │ │ │ ├── graph.nnef │ │ │ │ ├── io.npz │ │ │ │ └── runme.sh │ │ │ └── debox_high_dim/ │ │ │ ├── graph.nnef │ │ │ ├── io.npz │ │ │ └── runme.sh │ │ ├── dyn_slice/ │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── fixed_roll/ │ │ │ ├── graph.nnef │ │ │ ├── io.npz │ │ │ └── runme.sh │ │ ├── memory-arena/ │ │ │ ├── expected.json │ │ │ └── runme.sh │ │ ├── pool-padding/ │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── q40_linear_followed_slice/ │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── qmul/ │ │ │ ├── graph.nnef │ │ │ ├── graph.quant │ │ │ ├── io.npz │ │ │ └── runme.sh │ │ ├── range-slice-dyn-tile/ │ │ │ ├── model.nnef.tgz │ │ │ └── runme.sh │ │ ├── reshape/ │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── reshape_with_bc/ │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── sdpa/ │ │ │ ├── simple-causal-f32/ │ │ │ │ ├── graph.nnef │ │ │ │ ├── io.npz │ │ │ │ └── runme.sh │ │ │ ├── simple-f16/ │ │ │ │ ├── graph.nnef │ │ │ │ ├── io.npz │ │ │ │ └── runme.sh │ │ │ ├── simple-grouped-query-att-f32/ │ │ │ │ ├── io.npz │ │ │ │ ├── model.nnef.tgz │ │ │ │ └── runme.sh │ │ │ ├── simple-mask-f32/ │ │ │ │ ├── graph.nnef │ │ │ │ ├── io.npz │ │ │ │ └── runme.sh │ │ │ ├── simple-non-causal-f32/ │ │ │ │ ├── graph.nnef │ │ │ │ ├── io.npz │ │ │ │ └── runme.sh │ │ │ └── simple-scale-f32/ │ │ │ ├── graph.nnef │ │ │ ├── io.npz │ │ │ └── runme.sh │ │ ├── slice-over-slice-optim-loop/ │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── softmax/ │ │ │ ├── softmax-change-axis/ │ │ │ │ ├── expected │ │ │ │ ├── graph.nnef │ │ │ │ └── runme.sh │ │ │ ├── softmax-change-axis-1/ │ │ │ │ ├── expected │ │ │ │ ├── graph.nnef │ │ │ │ └── runme.sh │ │ │ └── softmax-quant/ │ │ │ ├── expected/ │ │ │ │ ├── graph.nnef │ │ │ │ └── graph.quant │ │ │ ├── model/ │ │ │ │ ├── graph.nnef │ │ │ │ └── graph.quant │ │ │ └── runme.sh │ │ ├── submodel/ │ │ │ ├── expected │ │ │ ├── graph.nnef │ │ │ ├── graph.quant │ │ │ ├── nnet2/ │ │ │ │ ├── graph.nnef │ │ │ │ └── graph.quant │ │ │ └── runme.sh │ │ ├── tdim-cmp/ │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── test_all_reduce/ │ │ │ ├── io.npz │ │ │ ├── model.nnef.tgz │ │ │ └── runme.sh │ │ ├── test_any_reduce/ │ │ │ ├── io.npz │ │ │ ├── model.nnef.tgz │ │ │ └── runme.sh │ │ ├── test_manage_gru_states/ │ │ │ ├── io.npz │ │ │ ├── model.nnef.tgz │ │ │ └── runme.sh │ │ ├── test_stft_smaller_win/ │ │ │ ├── io.npz │ │ │ ├── model.nnef.tgz │ │ │ └── runme.sh │ │ ├── test_upcast_f32_attn/ │ │ │ ├── io.npz │ │ │ ├── model.nnef.tgz │ │ │ └── runme.sh │ │ ├── tile-with-tdim/ │ │ │ ├── graph.nnef │ │ │ └── runme.sh │ │ ├── uniform-mul/ │ │ │ ├── expected │ │ │ ├── graph.nnef │ │ │ ├── io.npz │ │ │ └── runme.sh │ │ └── variable-in-fragment/ │ │ ├── graph.nnef │ │ └── runme.sh │ ├── parakeet-tdt-600m-v3/ │ │ └── ci.sh │ ├── pre-optimized-graphes/ │ │ ├── .gitignore │ │ ├── hey_snips_v4_model17/ │ │ │ ├── expected │ │ │ └── runme.sh │ │ └── mdl-en-2019-Q3-librispeech/ │ │ ├── expected │ │ └── runme.sh │ ├── tf-inceptionv3/ │ │ ├── Cargo.toml │ │ ├── benches/ │ │ │ └── inceptionv3.rs │ │ ├── download.sh │ │ └── src/ │ │ └── lib.rs │ ├── tf-mobilenet-v2/ │ │ ├── Cargo.toml │ │ ├── download.sh │ │ └── src/ │ │ └── lib.rs │ └── tfl-mobilenet-v2-q/ │ ├── Cargo.toml │ ├── download.sh │ └── src/ │ └── lib.rs ├── hir/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ └── src/ │ ├── framework.rs │ ├── infer/ │ │ ├── analyser.rs │ │ ├── fact.rs │ │ ├── factoid.rs │ │ ├── helpers.rs │ │ ├── mod.rs │ │ ├── model.rs │ │ ├── ops.rs │ │ ├── optim.rs │ │ └── rules/ │ │ ├── cache.rs │ │ ├── expr.rs │ │ ├── mod.rs │ │ ├── path.rs │ │ ├── proxies.rs │ │ └── solver.rs │ ├── lib.rs │ ├── macros.rs │ └── ops/ │ ├── activations.rs │ ├── array/ │ │ ├── add_dims.rs │ │ ├── array_feature_extractor.rs │ │ ├── broadcast.rs │ │ ├── concat.rs │ │ ├── constant_like.rs │ │ ├── constant_of_shape.rs │ │ ├── crop.rs │ │ ├── dyn_slice.rs │ │ ├── flatten.rs │ │ ├── gather.rs │ │ ├── gather_elements.rs │ │ ├── gather_nd.rs │ │ ├── mod.rs │ │ ├── pad.rs │ │ ├── permute_axes.rs │ │ ├── range.rs │ │ ├── reshape.rs │ │ ├── rm_dims.rs │ │ ├── scatter_elements.rs │ │ ├── scatter_nd.rs │ │ ├── shape.rs │ │ ├── size.rs │ │ ├── slice.rs │ │ ├── split.rs │ │ ├── squeeze.rs │ │ ├── strided_slice.rs │ │ └── tile.rs │ ├── binary.rs │ ├── cast.rs │ ├── cnn/ │ │ ├── conv.rs │ │ ├── mod.rs │ │ └── pools.rs │ ├── downsample.rs │ ├── dummy.rs │ ├── element_wise.rs │ ├── expandable.rs │ ├── identity.rs │ ├── konst.rs │ ├── logic.rs │ ├── matmul.rs │ ├── mod.rs │ ├── nn/ │ │ ├── global_pools.rs │ │ ├── layer_max.rs │ │ ├── mod.rs │ │ ├── reduce.rs │ │ └── softmax.rs │ ├── quant.rs │ ├── scan.rs │ ├── source.rs │ └── unimpl.rs ├── libcli/ │ ├── Cargo.toml │ ├── src/ │ │ ├── annotations.rs │ │ ├── display_params.rs │ │ ├── draw.rs │ │ ├── export.rs │ │ ├── lib.rs │ │ ├── model.rs │ │ ├── profile.rs │ │ ├── tensor.rs │ │ ├── terminal.rs │ │ └── time.rs │ └── validate_wires.py ├── linalg/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── README.md │ ├── arm32/ │ │ ├── armv7neon/ │ │ │ ├── armv7neon_mmm_f32_32x1_core.tmpl │ │ │ ├── armv7neon_mmm_f32_8x1_core.tmpl │ │ │ ├── armv7neon_mmm_f32_8x4_core.tmpl │ │ │ ├── armv7neon_mmm_f32_8x6_core.tmpl │ │ │ ├── armv7neon_mmm_f32_per_cols.tmpliq │ │ │ ├── armv7neon_mmm_f32_per_rows.tmpliq │ │ │ ├── armv7neon_mmm_f32_scalars.tmpliq │ │ │ ├── armv7neon_mmm_i32_32x1.tmpl │ │ │ ├── armv7neon_mmm_i32_8x4.tmpl │ │ │ ├── armv7neon_mmm_i32_per_cols.tmpliq │ │ │ ├── armv7neon_mmm_i32_per_rows.tmpliq │ │ │ ├── armv7neon_mmm_i32_scalars.tmpliq │ │ │ ├── armv7neon_mmm_i32_scale_q8_q15.tmpliq │ │ │ ├── armv7neon_mmm_q_per_col.tmpliq │ │ │ ├── armv7neon_mmm_q_per_row.tmpliq │ │ │ ├── armv7neon_mmm_q_scalar.tmpliq │ │ │ ├── armv7neon_prefetch.tmpl │ │ │ ├── armv7neon_sigmoid_f32_4n.tmpl │ │ │ ├── armv7neon_tanh_f32_4n.tmpl │ │ │ └── dispatcher.tmpliq │ │ └── armvfpv2/ │ │ ├── armvfpv2_mmm_f32_4x4.tmpl │ │ └── dispatcher.tmpliq │ ├── arm64/ │ │ ├── apple_amx/ │ │ │ ├── apple_amx_mmm_f16_64x1.tmpl │ │ │ ├── apple_amx_mmm_f16_64x32.tmpl │ │ │ ├── apple_amx_mmm_f32_32x1.tmpl │ │ │ ├── apple_amx_mmm_f32_32x32.tmpl │ │ │ ├── dispatcher.tmpliq │ │ │ └── instructions.rs │ │ ├── arm64fp16/ │ │ │ ├── arm64fp16_leaky_relu_f16_8n.tmpl │ │ │ ├── arm64fp16_mmm_8h_per_col.tmpliq │ │ │ ├── arm64fp16_mmm_8h_per_row.tmpliq │ │ │ ├── arm64fp16_mmm_8h_scalar.tmpliq │ │ │ ├── arm64fp16_mmm_f16_128x1/ │ │ │ │ ├── loop1/ │ │ │ │ │ ├── cortex_a53.tmpli │ │ │ │ │ └── naive.tmpli │ │ │ │ └── loop2/ │ │ │ │ └── cortex_a55.tmpli │ │ │ ├── arm64fp16_mmm_f16_128x1_core.tmpl │ │ │ ├── arm64fp16_mmm_f16_16x8/ │ │ │ │ ├── loop1/ │ │ │ │ │ └── naive.tmpli │ │ │ │ └── loop2/ │ │ │ │ └── cortex_a55.tmpli │ │ │ ├── arm64fp16_mmm_f16_16x8_core.tmpl │ │ │ ├── arm64fp16_mmm_f16_32x4/ │ │ │ │ ├── loop1/ │ │ │ │ │ └── naive.tmpli │ │ │ │ └── loop2/ │ │ │ │ └── cortex_a55.tmpli │ │ │ ├── arm64fp16_mmm_f16_32x4_core.tmpl │ │ │ ├── arm64fp16_mmm_f16_32x6.core.tmpl │ │ │ ├── arm64fp16_mmm_f16_64x1.core.tmpl │ │ │ ├── arm64fp16_mmm_f16_64x3.core.tmpl │ │ │ ├── arm64fp16_mmm_f16_per_cols.tmpliq │ │ │ ├── arm64fp16_mmm_f16_per_rows.tmpliq │ │ │ ├── arm64fp16_mmm_f16_scalars.tmpliq │ │ │ ├── arm64fp16_mmm_load_tile.tmpliq │ │ │ ├── arm64fp16_sigmoid_f16_8n.tmpl │ │ │ ├── arm64fp16_tanh_f16_8n.tmpl │ │ │ ├── dispatcher.tmpliq │ │ │ ├── dummy_fmla_no_pragma.S │ │ │ └── dummy_fmla_pragma.S │ │ └── arm64simd/ │ │ ├── arm64simd_mmm_4s_per_col.tmpliq │ │ ├── arm64simd_mmm_4s_per_row.tmpliq │ │ ├── arm64simd_mmm_4s_scalar.tmpliq │ │ ├── arm64simd_mmm_f32_12x8/ │ │ │ ├── packed_packed_loop1/ │ │ │ │ ├── ldr_w_no_preload.tmpli │ │ │ │ ├── ldr_w_preload.tmpli │ │ │ │ ├── ldr_x_preload.tmpli │ │ │ │ └── naive.tmpli │ │ │ └── packed_packed_loop2/ │ │ │ └── cortex_a55.tmpli │ │ ├── arm64simd_mmm_f32_12x8_core.tmpl │ │ ├── arm64simd_mmm_f32_16x4/ │ │ │ ├── packed_packed_loop1/ │ │ │ │ ├── cortex_a53.tmpli │ │ │ │ └── naive.tmpli │ │ │ └── packed_packed_loop2/ │ │ │ └── cortex_a55.tmpli │ │ ├── arm64simd_mmm_f32_16x4_core.tmpl │ │ ├── arm64simd_mmm_f32_24x4/ │ │ │ ├── loop2/ │ │ │ │ └── cortex_a55.tmpli │ │ │ └── packed_packed_loop1/ │ │ │ ├── cortex_a53.tmpli │ │ │ ├── cortex_a55.tmpli │ │ │ └── naive.tmpli │ │ ├── arm64simd_mmm_f32_24x4_core.tmpl │ │ ├── arm64simd_mmm_f32_32x1_core.tmpl │ │ ├── arm64simd_mmm_f32_32x3_core.tmpl │ │ ├── arm64simd_mmm_f32_64x1/ │ │ │ ├── loop1/ │ │ │ │ ├── cortex_a53.tmpli │ │ │ │ └── naive.tmpli │ │ │ └── loop2/ │ │ │ ├── cortex_a55.tmpli │ │ │ └── naive.tmpli │ │ ├── arm64simd_mmm_f32_64x1_core.tmpl │ │ ├── arm64simd_mmm_f32_8x8/ │ │ │ ├── packed_packed_loop1/ │ │ │ │ ├── broken_chains.tmpli │ │ │ │ ├── ldr_w_no_preload.tmpli │ │ │ │ ├── ldr_w_preload.tmpli │ │ │ │ ├── ldr_x_no_preload.tmpli │ │ │ │ ├── ldr_x_preload.tmpli │ │ │ │ └── naive.tmpli │ │ │ └── packed_packed_loop2/ │ │ │ ├── broken_chains.tmpli │ │ │ └── cortex_a55.tmpli │ │ ├── arm64simd_mmm_f32_8x8_core.tmpl │ │ ├── arm64simd_mmm_f32_per_cols.tmpliq │ │ ├── arm64simd_mmm_f32_per_rows.tmpliq │ │ ├── arm64simd_mmm_f32_scalars.tmpliq │ │ ├── arm64simd_mmm_i32_64x1.tmpl │ │ ├── arm64simd_mmm_i32_8x8.tmpl │ │ ├── arm64simd_mmm_i32_per_cols.tmpliq │ │ ├── arm64simd_mmm_i32_per_rows.tmpliq │ │ ├── arm64simd_mmm_i32_scalars.tmpliq │ │ ├── arm64simd_mmm_i32_scale_q16_q31.tmpliq │ │ ├── arm64simd_mmm_load_tile.tmpliq │ │ ├── arm64simd_sigmoid_f32_4n.tmpl │ │ ├── arm64simd_tanh_f32_4n.tmpl │ │ └── dispatcher.tmpliq │ ├── benches/ │ │ ├── arm32neon.rs │ │ ├── arm64.rs │ │ ├── arm64simd.rs │ │ ├── intel.rs │ │ ├── leaky_relu.rs │ │ ├── mat_vec.rs │ │ ├── mm_for_asr_am.rs │ │ ├── mm_for_inception.rs │ │ ├── mm_for_wavenet_hw.rs │ │ ├── sigmoid.rs │ │ ├── softmax.rs │ │ ├── utils.rs │ │ ├── virtual_im2col.rs │ │ └── x86_64.rs │ ├── build.rs │ ├── cost_model/ │ │ ├── Cargo.toml │ │ ├── src/ │ │ │ └── main.rs │ │ └── train/ │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── runme.sh │ │ └── train.py │ ├── matmul-bench/ │ │ ├── Cargo.toml │ │ ├── benches/ │ │ │ └── matmul.rs │ │ ├── build.rs │ │ ├── c/ │ │ │ ├── packed_tile_4x4.c │ │ │ ├── packed_tile_8x8.c │ │ │ ├── tile_1x1.c │ │ │ ├── tile_2x2.c │ │ │ ├── tile_4x4.c │ │ │ └── tile_8x8.c │ │ └── src/ │ │ └── lib.rs │ ├── src/ │ │ ├── arm32/ │ │ │ ├── armv7neon.rs │ │ │ ├── armvfpv2.rs │ │ │ ├── cortex_a7.rs │ │ │ ├── cortex_a7.txt │ │ │ ├── cortex_a9.rs │ │ │ └── cortex_a9.txt │ │ ├── arm32.rs │ │ ├── arm64/ │ │ │ ├── apple_amx.rs │ │ │ ├── arm64fp16/ │ │ │ │ ├── by_scalar.rs │ │ │ │ ├── leaky_relu.rs │ │ │ │ ├── max.rs │ │ │ │ ├── panel_extract.rs │ │ │ │ ├── sum.rs │ │ │ │ └── unicast.rs │ │ │ ├── arm64fp16.rs │ │ │ ├── arm64simd/ │ │ │ │ ├── by_scalar.rs │ │ │ │ ├── leaky_relu.rs │ │ │ │ ├── max.rs │ │ │ │ ├── panel_extract.rs │ │ │ │ ├── softmax.rs │ │ │ │ ├── sum.rs │ │ │ │ └── unicast.rs │ │ │ ├── arm64simd.rs │ │ │ ├── cortex_a53.rs │ │ │ ├── cortex_a55.rs │ │ │ ├── cortex_a72.rs │ │ │ └── cortex_a73.rs │ │ ├── arm64.rs │ │ ├── frame/ │ │ │ ├── block_quant/ │ │ │ │ ├── helpers.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── q4_0.rs │ │ │ │ ├── q8_1.rs │ │ │ │ ├── storage.rs │ │ │ │ └── value.rs │ │ │ ├── by_scalar.rs │ │ │ ├── element_wise.rs │ │ │ ├── element_wise_helper.rs │ │ │ ├── leaky_relu.rs │ │ │ ├── lut.rs │ │ │ ├── mmm/ │ │ │ │ ├── cost_model.rs │ │ │ │ ├── fuse.rs │ │ │ │ ├── input_store.rs │ │ │ │ ├── kernel.rs │ │ │ │ ├── macros.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── panel_extract.rs │ │ │ │ ├── scratch.rs │ │ │ │ ├── storage.rs │ │ │ │ └── tests/ │ │ │ │ ├── frame.rs │ │ │ │ ├── fuse.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── packed_packed.rs │ │ │ │ ├── q_scale.rs │ │ │ │ └── store.rs │ │ │ ├── mod.rs │ │ │ ├── pack.rs │ │ │ ├── reduce/ │ │ │ │ ├── max.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── softmax.rs │ │ │ │ └── sum.rs │ │ │ ├── sigmoid.rs │ │ │ ├── tanh.rs │ │ │ ├── unicast.rs │ │ │ └── weights.rs │ │ ├── generic/ │ │ │ ├── by_scalar.rs │ │ │ ├── erf.rs │ │ │ ├── leaky_relu.rs │ │ │ ├── lut.rs │ │ │ ├── mmm.rs │ │ │ ├── reduce.rs │ │ │ ├── rounding.rs │ │ │ ├── sigmoid.rs │ │ │ ├── tanh.rs │ │ │ └── unicast.rs │ │ ├── generic.rs │ │ ├── hwbench/ │ │ │ ├── bandwidth.rs │ │ │ ├── mod.rs │ │ │ └── runner.rs │ │ ├── lib.rs │ │ ├── multithread.rs │ │ ├── wasm.rs │ │ ├── x86_64_fma/ │ │ │ ├── by_scalar.rs │ │ │ ├── intel.rs │ │ │ ├── max.rs │ │ │ ├── mmm.rs │ │ │ ├── panel_extract.rs │ │ │ └── softmax.rs │ │ └── x86_64_fma.rs │ ├── tests/ │ │ └── virtual_im2col.rs │ └── x86_64/ │ ├── avx512/ │ │ ├── 10x1/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 1x1/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512.tmpli │ │ │ ├── unroll-16.tmpli │ │ │ ├── unroll-4.tmpli │ │ │ ├── unroll-8.tmpli │ │ │ └── unroll.tmpli │ │ ├── 1x12/ │ │ │ └── packed_packed_loop1/ │ │ │ └── avx-512.tmpli │ │ ├── 2x5/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 2x6/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 3x4/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 4x3/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 5x2/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 6x1/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 6x2/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 7x1/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 8x1/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── 8x2/ │ │ │ └── packed_packed_loop1/ │ │ │ └── avx-512.tmpli │ │ ├── 8x8/ │ │ │ └── packed_packed_loop1/ │ │ │ ├── avx-512-unroll.tmpli │ │ │ └── avx-512.tmpli │ │ ├── avx512_mmm_f32_128x1.tmpl │ │ ├── avx512_mmm_f32_16x1.tmpl │ │ ├── avx512_mmm_f32_16x12.tmpl │ │ ├── avx512_mmm_f32_16x8.tmpl │ │ ├── avx512_mmm_f32_32x5.tmpl │ │ ├── avx512_mmm_f32_32x6.tmpl │ │ ├── avx512_mmm_f32_48x4.tmpl │ │ ├── avx512_mmm_f32_64x3.tmpl │ │ ├── avx512_mmm_f32_80x2.tmpl │ │ ├── avx512_mmm_load_tile.tmpliq │ │ ├── dispatcher.tmpliq │ │ ├── f32_per_cols.tmpliq │ │ ├── f32_per_rows.tmpliq │ │ ├── f32_scalars.tmpliq │ │ ├── i32_per_cols.tmpliq │ │ ├── i32_per_rows.tmpliq │ │ ├── i32_scalars.tmpliq │ │ ├── postamble.tmpliq │ │ ├── preamble.tmpliq │ │ ├── sigmoid_f32.tmpl │ │ ├── tanh_f32.tmpl │ │ ├── zmm_per_col.tmpliq │ │ ├── zmm_per_row.tmpliq │ │ └── zmm_scalar.tmpliq │ └── fma/ │ ├── 10x1/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── 2x5/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── 2x6/ │ │ └── packed_packed_loop1/ │ │ ├── original-unroll.tmpli │ │ └── original.tmpli │ ├── 3x4/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── 4x3/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── 5x2/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── 6x1/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── 6x2/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── 7x1/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── 8x1/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── 8x8/ │ │ └── packed_packed_loop1/ │ │ ├── avx-unroll.tmpli │ │ └── avx.tmpli │ ├── avx2_mmm_i32_8x8.tmpl │ ├── dispatcher.tmpliq │ ├── fma_mmm_f32_16x5.tmpl │ ├── fma_mmm_f32_16x6.tmpl │ ├── fma_mmm_f32_24x4.tmpl │ ├── fma_mmm_f32_32x1.tmpl │ ├── fma_mmm_f32_32x3.tmpl │ ├── fma_mmm_f32_40x2.tmpl │ ├── fma_mmm_f32_64x1.tmpl │ ├── fma_mmm_f32_8x8.tmpl │ ├── fma_mmm_f32_per_cols.tmpliq │ ├── fma_mmm_f32_per_rows.tmpliq │ ├── fma_mmm_f32_scalars.tmpliq │ ├── fma_mmm_i32_per_cols.tmpliq │ ├── fma_mmm_i32_per_rows.tmpliq │ ├── fma_mmm_i32_scalars.tmpliq │ ├── fma_mmm_load_tile.tmpliq │ ├── fma_mmm_ymm_per_col.tmpliq │ ├── fma_mmm_ymm_per_row.tmpliq │ ├── fma_mmm_ymm_scalar.tmpliq │ ├── fma_sigmoid_f32.tmpl │ ├── fma_tanh_f32.tmpl │ ├── postamble.tmpliq │ └── preamble.tmpliq ├── metal/ │ ├── Cargo.toml │ ├── README.md │ ├── benches/ │ │ └── metal_gemm.rs │ └── src/ │ ├── command_buffer.rs │ ├── context.rs │ ├── encoder.rs │ ├── func_constants.rs │ ├── kernels/ │ │ ├── array/ │ │ │ ├── array_ops.metal │ │ │ ├── cast.rs │ │ │ ├── copy.rs │ │ │ ├── dispatch.rs │ │ │ ├── mod.rs │ │ │ └── rotate_half.rs │ │ ├── bin_ops.metal │ │ ├── bin_ops.rs │ │ ├── conv.metal │ │ ├── conv.rs │ │ ├── element_wise.metal │ │ ├── element_wise.rs │ │ ├── matmul/ │ │ │ ├── basic/ │ │ │ │ ├── basic_mat_mul.metal │ │ │ │ └── mod.rs │ │ │ ├── ggml_gemm/ │ │ │ │ ├── README.md │ │ │ │ ├── ggml_mm_mv.metal │ │ │ │ └── mod.rs │ │ │ ├── mfa/ │ │ │ │ ├── libMetalFlashAttention-ios.metallib │ │ │ │ ├── libMetalFlashAttention-macos.metallib │ │ │ │ └── mod.rs │ │ │ ├── mlx_gemm/ │ │ │ │ ├── mlx_gemm.metal │ │ │ │ ├── mlx_gemv.metal │ │ │ │ └── mod.rs │ │ │ └── mod.rs │ │ ├── mod.rs │ │ ├── nn/ │ │ │ ├── apply_rope.rs │ │ │ ├── gelu_approximate.rs │ │ │ ├── leaky_relu.rs │ │ │ ├── mod.rs │ │ │ ├── nn_ops.metal │ │ │ ├── reduce.rs │ │ │ ├── rms_norm.rs │ │ │ ├── scaled_masked_softmax.rs │ │ │ ├── silu.rs │ │ │ └── softmax.rs │ │ └── utils.rs │ ├── lib.rs │ ├── ops/ │ │ ├── conv.rs │ │ ├── fused_axis_op.rs │ │ ├── gemm.rs │ │ └── mod.rs │ ├── rewrite_rules/ │ │ ├── add_matmul_broadcast.rs │ │ ├── fuse_axis_op.rs │ │ ├── mod.rs │ │ └── untranspose_matmul_output.rs │ ├── tensor.rs │ ├── tests.rs │ ├── transform.rs │ └── utils.rs ├── nnef/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── cli/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── main.rs │ ├── nnef-resources/ │ │ ├── Cargo.toml │ │ ├── src/ │ │ │ ├── json_loader.rs │ │ │ └── lib.rs │ │ └── tests/ │ │ ├── nnef_with_json/ │ │ │ ├── graph.nnef │ │ │ └── src_config.json │ │ └── test_json_resource.rs │ ├── src/ │ │ ├── ast/ │ │ │ ├── dump.rs │ │ │ ├── dump_doc.rs │ │ │ ├── parse.rs │ │ │ └── quant.rs │ │ ├── ast.rs │ │ ├── deser.rs │ │ ├── framework.rs │ │ ├── lib.rs │ │ ├── liquid.rs │ │ ├── ops/ │ │ │ ├── core/ │ │ │ │ ├── broadcast.rs │ │ │ │ ├── cast.rs │ │ │ │ ├── complex.rs │ │ │ │ ├── downsample.rs │ │ │ │ ├── dyn_slice.rs │ │ │ │ ├── einsum.rs │ │ │ │ ├── fft.rs │ │ │ │ ├── gather.rs │ │ │ │ ├── gelu_approximate.rs │ │ │ │ ├── is_inf.rs │ │ │ │ ├── matmul.rs │ │ │ │ ├── one_hot.rs │ │ │ │ ├── qconv.rs │ │ │ │ ├── qmatmul.rs │ │ │ │ ├── range.rs │ │ │ │ ├── reduce.rs │ │ │ │ ├── rms_norm.rs │ │ │ │ ├── scan.rs │ │ │ │ ├── scatter.rs │ │ │ │ ├── shape_of.rs │ │ │ │ ├── silu.rs │ │ │ │ ├── softmax.rs │ │ │ │ ├── source.rs │ │ │ │ ├── submodel.rs │ │ │ │ ├── topk.rs │ │ │ │ └── trilu.rs │ │ │ ├── core.rs │ │ │ ├── mod.rs │ │ │ ├── nnef/ │ │ │ │ ├── deser.rs │ │ │ │ ├── mod.rs │ │ │ │ └── ser.rs │ │ │ └── resource.rs │ │ ├── registry.rs │ │ ├── resource.rs │ │ ├── ser.rs │ │ ├── tensors.rs │ │ └── transform.rs │ ├── stdlib.nnef │ └── tests/ │ ├── alexnet.nnef │ └── parse.rs ├── onnx/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── benches/ │ │ ├── linear_classifier.rs │ │ └── linear_regressor.rs │ ├── build-proto.rs │ ├── protos/ │ │ └── onnx/ │ │ ├── onnx-operators.proto3 │ │ ├── onnx.proto │ │ └── onnx.proto3 │ ├── src/ │ │ ├── data_resolver.rs │ │ ├── lib.rs │ │ ├── model.rs │ │ ├── ops/ │ │ │ ├── array/ │ │ │ │ ├── compress.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── nonzero.rs │ │ │ │ ├── one_hot.rs │ │ │ │ ├── pad.rs │ │ │ │ ├── shape.rs │ │ │ │ ├── slice.rs │ │ │ │ ├── split.rs │ │ │ │ ├── squeeze.rs │ │ │ │ ├── topk.rs │ │ │ │ ├── trilu.rs │ │ │ │ └── unsqueeze.rs │ │ │ ├── cast.rs │ │ │ ├── cumsum.rs │ │ │ ├── d2s.rs │ │ │ ├── einsum.rs │ │ │ ├── fft.rs │ │ │ ├── grid_sample.rs │ │ │ ├── logic.rs │ │ │ ├── math/ │ │ │ │ ├── clip.rs │ │ │ │ ├── gemm.rs │ │ │ │ ├── mat_mul_integer.rs │ │ │ │ ├── pow.rs │ │ │ │ └── rem.rs │ │ │ ├── math.rs │ │ │ ├── ml/ │ │ │ │ ├── category_mapper.rs │ │ │ │ ├── linear_classifier.rs │ │ │ │ ├── linear_regressor.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── normalizer.rs │ │ │ │ └── tree_ensemble_classifier.rs │ │ │ ├── mod.rs │ │ │ ├── multinomial.rs │ │ │ ├── nn/ │ │ │ │ ├── batch_norm.rs │ │ │ │ ├── conv_transpose.rs │ │ │ │ ├── dropout.rs │ │ │ │ ├── instance_norm.rs │ │ │ │ ├── layer_norm.rs │ │ │ │ ├── lrn.rs │ │ │ │ ├── mod.rs │ │ │ │ └── reduce.rs │ │ │ ├── non_max_suppression.rs │ │ │ ├── quant.rs │ │ │ ├── random.rs │ │ │ ├── rec/ │ │ │ │ ├── common.rs │ │ │ │ ├── gru.rs │ │ │ │ ├── lstm.rs │ │ │ │ ├── rnn.rs │ │ │ │ └── scan.rs │ │ │ ├── rec.rs │ │ │ ├── resize.rs │ │ │ └── s2d.rs │ │ ├── pb_helpers.rs │ │ ├── prost/ │ │ │ └── onnx.rs │ │ └── tensor.rs │ └── test_cases/ │ ├── byte_sb_bidi_lstm/ │ │ ├── README.md │ │ ├── generate_io.py │ │ ├── io.npz │ │ └── model.onnx │ ├── deconv_group/ │ │ ├── io.npz │ │ ├── model.onnx │ │ └── vars.sh │ ├── lgbm_classifier_tensor/ │ │ ├── generate_io.py │ │ ├── io.npz │ │ ├── model.onnx │ │ └── vars.sh │ ├── lgbm_regressor_tensor/ │ │ ├── generate_io.py │ │ ├── io.npz │ │ ├── model.onnx │ │ └── vars.sh │ ├── linear_classifier/ │ │ └── model.onnx │ ├── linear_regressor/ │ │ └── model.onnx │ ├── qlstm_3-2-3_T3_S1/ │ │ ├── final.mdl │ │ ├── io.npz │ │ ├── model.onnx │ │ ├── model.raw │ │ └── vars.sh │ ├── qrelu_1/ │ │ ├── final.mdl │ │ ├── io.npz │ │ ├── model.onnx │ │ ├── model.raw │ │ └── vars.sh │ ├── qrelu_2/ │ │ ├── final.mdl │ │ ├── io.npz │ │ ├── model.onnx │ │ ├── model.raw │ │ └── vars.sh │ ├── qsigmoid_1/ │ │ ├── final.mdl │ │ ├── io.npz │ │ ├── model.onnx │ │ ├── model.raw │ │ └── vars.sh │ ├── qsigmoid_2/ │ │ ├── final.mdl │ │ ├── io.npz │ │ ├── model.onnx │ │ ├── model.raw │ │ └── vars.sh │ ├── qtanh_1/ │ │ ├── final.mdl │ │ ├── io.npz │ │ ├── model.onnx │ │ ├── model.raw │ │ └── vars.sh │ ├── qtanh_2/ │ │ ├── final.mdl │ │ ├── io.npz │ │ ├── model.onnx │ │ ├── model.raw │ │ └── vars.sh │ ├── qtdnn_10x5_101_i32_biases/ │ │ ├── final.mdl │ │ ├── io.npz │ │ ├── model.onnx │ │ ├── model.raw │ │ └── vars.sh │ ├── run_all.sh │ ├── tinyyolov2/ │ │ ├── io.npz │ │ └── vars.sh │ ├── transformer-mlm/ │ │ ├── generate_io.py │ │ ├── io.npz │ │ └── vars.sh │ ├── xgboost_classifier_tree/ │ │ ├── generate_io.py │ │ ├── io.npz │ │ ├── model.onnx │ │ └── vars.sh │ └── xgboost_regressor_tree/ │ ├── generate_io.py │ ├── io.npz │ ├── model.onnx │ └── vars.sh ├── onnx-opl/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ └── src/ │ ├── grid_sample.rs │ ├── lib.rs │ ├── lrn.rs │ ├── ml/ │ │ ├── category_mapper.rs │ │ ├── mod.rs │ │ ├── tree.rs │ │ └── tree_ensemble_classifier.rs │ ├── multinomial.rs │ ├── non_max_suppression.rs │ ├── random.rs │ └── resize.rs ├── post-release.sh ├── pulse/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ └── src/ │ ├── fact.rs │ ├── lib.rs │ ├── macros.rs │ ├── model.rs │ └── ops/ │ ├── array/ │ │ ├── broadcast.rs │ │ ├── concat.rs │ │ ├── mask.rs │ │ ├── mod.rs │ │ ├── pad.rs │ │ └── slice.rs │ ├── cnn/ │ │ ├── conv.rs │ │ ├── deconv.rs │ │ ├── mod.rs │ │ └── pools.rs │ ├── delay.rs │ ├── downsample.rs │ ├── dummy.rs │ ├── fft.rs │ ├── identity.rs │ ├── mask.rs │ ├── mod.rs │ ├── scan.rs │ ├── slice.rs │ └── source.rs ├── pulse-opl/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ └── src/ │ ├── concat.rs │ ├── deconv_delay.rs │ ├── delay.rs │ ├── lib.rs │ ├── mask.rs │ ├── pad.rs │ └── slice.rs ├── release.sh ├── rustfmt.toml ├── tensorflow/ │ ├── Cargo.toml │ ├── LICENSE │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── benches/ │ │ └── hey_snips_3.pb │ ├── build-proto.rs │ ├── examples/ │ │ └── plus3.rs │ ├── protos/ │ │ └── tensorflow/ │ │ └── core/ │ │ ├── framework/ │ │ │ ├── attr_value.proto │ │ │ ├── function.proto │ │ │ ├── graph.proto │ │ │ ├── node_def.proto │ │ │ ├── op_def.proto │ │ │ ├── resource_handle.proto │ │ │ ├── tensor.proto │ │ │ ├── tensor_shape.proto │ │ │ ├── types.proto │ │ │ ├── variable.proto │ │ │ └── versions.proto │ │ └── protobuf/ │ │ ├── meta_graph.proto │ │ ├── saved_model.proto │ │ ├── saved_object_graph.proto │ │ ├── saver.proto │ │ ├── struct.proto │ │ └── trackable_object_graph.proto │ ├── src/ │ │ ├── conform/ │ │ │ ├── mod.rs │ │ │ └── tf.rs │ │ ├── lib.rs │ │ ├── model.rs │ │ ├── ops/ │ │ │ ├── array/ │ │ │ │ ├── concatv2.rs │ │ │ │ ├── expand_dims.rs │ │ │ │ ├── fill.rs │ │ │ │ ├── gather_nd.rs │ │ │ │ ├── gather_v2.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── pack.rs │ │ │ │ ├── pad.rs │ │ │ │ ├── squeeze.rs │ │ │ │ └── transpose.rs │ │ │ ├── control_flow.rs │ │ │ ├── logic.rs │ │ │ ├── math/ │ │ │ │ └── reduce.rs │ │ │ ├── math.rs │ │ │ ├── mod.rs │ │ │ ├── nn/ │ │ │ │ ├── conv2d.rs │ │ │ │ ├── dw_conv2d.rs │ │ │ │ ├── fused_batch_norm.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── pools.rs │ │ │ │ └── s2b/ │ │ │ │ ├── mod.rs │ │ │ │ ├── raw.rs │ │ │ │ └── unary.rs │ │ │ ├── quant.rs │ │ │ ├── random/ │ │ │ │ ├── mod.rs │ │ │ │ ├── philox.rs │ │ │ │ └── random_uniform.rs │ │ │ └── rec/ │ │ │ ├── block_lstm.rs │ │ │ └── mod.rs │ │ ├── prost/ │ │ │ ├── google.protobuf.rs │ │ │ └── tensorflow.rs │ │ ├── tensor.rs │ │ └── tfpb.rs │ └── tests/ │ ├── models/ │ │ └── plus3.pb │ ├── ops_array_pack.rs │ ├── ops_array_strided_slice.proptest-regressions │ ├── ops_array_strided_slice.rs │ ├── ops_fake_quant_with_min_max_vars.rs │ ├── ops_nn_conv2d.proptest-regressions │ ├── ops_nn_conv2d.rs │ ├── ops_nn_dwconv2d.proptest-regressions │ ├── ops_nn_dwconv2d.rs │ ├── ops_nn_pools.proptest-regressions │ ├── ops_nn_pools.rs │ ├── ops_nn_space_to_batch.proptest-regressions │ ├── ops_nn_space_to_batch.rs │ ├── ops_random_uniform.rs │ └── utils/ │ └── mod.rs ├── test-rt/ │ ├── infra/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── suite-onnx/ │ │ ├── Cargo.toml │ │ ├── node.txt │ │ ├── pytorch-converted.txt │ │ ├── pytorch-operator.txt │ │ ├── simple.txt │ │ └── src/ │ │ └── lib.rs │ ├── suite-unit/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── apply_rope.rs │ │ ├── bin_einsum.rs │ │ ├── binary.rs │ │ ├── conv_f16.rs │ │ ├── conv_f32.rs │ │ ├── conv_q.rs │ │ ├── deconv.rs │ │ ├── downsample.rs │ │ ├── elmwise.rs │ │ ├── gelu_approximate.rs │ │ ├── lib.rs │ │ ├── matmul_q40.rs │ │ ├── q_binary.rs │ │ ├── q_elmwise.rs │ │ ├── q_flavours.rs │ │ ├── q_helpers.rs │ │ ├── rms_norm.rs │ │ ├── scaled_masked_softmax.rs │ │ ├── sdpa.rs │ │ ├── silu.rs │ │ └── slice.rs │ ├── test-blas/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── src/ │ │ │ └── lib.rs │ │ └── suite.rs │ ├── test-cuda/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── src/ │ │ │ └── lib.rs │ │ └── suite.rs │ ├── test-f16/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── src/ │ │ │ └── lib.rs │ │ └── suite.rs │ ├── test-metal/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── ggml_suite.rs │ │ ├── src/ │ │ │ └── lib.rs │ │ └── suite.rs │ ├── test-nnef-cycle/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── src/ │ │ │ └── lib.rs │ │ └── suite.rs │ ├── test-onnx-core/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── debug-utils/ │ │ │ ├── Cargo.toml │ │ │ ├── README.md │ │ │ ├── save_all.py │ │ │ └── src/ │ │ │ └── main.rs │ │ ├── include-passing-ignored.sh │ │ └── src/ │ │ ├── bin/ │ │ │ └── reset-test-list.rs │ │ └── lib.rs │ ├── test-tflite/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── src/ │ │ │ ├── lib.rs │ │ │ └── tflite_runtime.rs │ │ └── suite.rs │ └── test-unit-core/ │ ├── Cargo.toml │ ├── build.rs │ └── src/ │ ├── lib.rs │ └── main.rs ├── test-suite.sh ├── tflite/ │ ├── Cargo.toml │ ├── Readme.md │ ├── schema/ │ │ └── tflite.fbs │ └── src/ │ ├── lib.rs │ ├── model.rs │ ├── ops/ │ │ ├── array.rs │ │ ├── cnn.rs │ │ ├── element_wise.rs │ │ ├── math.rs │ │ ├── mod.rs │ │ └── nn.rs │ ├── registry.rs │ ├── rewriter.rs │ ├── ser.rs │ ├── tensors.rs │ └── tflite_generated.rs ├── transformers/ │ ├── Cargo.toml │ └── src/ │ ├── lib.rs │ ├── ops/ │ │ ├── apply_rope.rs │ │ ├── dyn_kv_cache.rs │ │ ├── flash_sdpa.rs │ │ ├── mod.rs │ │ ├── scaled_masked_softmax.rs │ │ ├── sdpa.rs │ │ └── streamed_sdpa.rs │ └── rewriter.rs └── yank.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .all_crates.sh ================================================ ALL_CRATES_PATH="data linalg core nnef nnef/nnef-resources pulse-opl pulse extra transformers hir tflite tensorflow onnx-opl onnx gpu metal cuda libcli api api/rs api/ffi api/proxy/sys api/proxy cli" ================================================ FILE: .change_crate_dep.sh ================================================ #!/bin/bash crate=$1 version=$2 perl -pi -e "s/^($crate = {.*version *= *)\"([^\"]*)\"(.*)$/\$1\"=$version\"\$3/" \ `find . -name Cargo.toml \! -path "./target/*" \! -path "./issue*"` ================================================ FILE: .clang-format ================================================ BasedOnStyle: LLVM IndentWidth: 4 TabWidth: 4 UseTab: Never IndentPPDirectives: BeforeHash PPIndentWidth: 4 ColumnLimit: 100 # OneLineFormatOffRegex: '^\s*#\s*pragma\s+unroll\b' ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" day: "monday" groups: actions: patterns: - "*" - package-ecosystem: "cargo" directory: "/" schedule: interval: "weekly" day: "monday" open-pull-requests-limit: 10 groups: rust-dependencies: patterns: - "*" - package-ecosystem: "pip" directory: "/api/py" ignore: # Only update them manually since updating them might break compatibility - dependency-name: "numpy" schedule: interval: "weekly" day: "monday" ================================================ FILE: .github/workflows/asan.yml ================================================ name: Sanitized build tests on: workflow_dispatch: schedule: - cron: '0 5 * * MON' env: CARGO_INCREMENTAL: false FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true jobs: sanitizer-address: strategy: fail-fast: false matrix: os: [ ubuntu-latest, macOS-latest ] runs-on: ${{matrix.os}} steps: - uses: actions/checkout@v6 - name: Rustup update run: rustup update - name: Run sanitized tests run: .travis/asan.sh ================================================ FILE: .github/workflows/binaries.yml ================================================ on: release: types: - created name: Upload Release Binaries env: CARGO_INCREMENTAL: false FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true jobs: assets: name: Upload Release Binaries strategy: fail-fast: false matrix: os: [ ubuntu-latest, macOS-latest ] arch: [ x86_64, aarch64, armv7 ] include: - os: ubuntu-latest arch: x86_64 target: x86_64-unknown-linux-musl musl: x86_64-linux-musl - os: ubuntu-latest arch: aarch64 target: aarch64-unknown-linux-musl musl: aarch64-linux-musl - os: ubuntu-latest arch: armv7 target: armv7-unknown-linux-musleabihf musl: armv7l-linux-musleabihf - os: macOS-latest arch: x86_64 target: x86_64-apple-darwin - os: macOS-latest arch: aarch64 target: aarch64-apple-darwin exclude: - os: macOS-latest arch: armv7 runs-on: ${{ matrix.os }} steps: - name: Checkout code uses: actions/checkout@v6 - name: Extract version tag id: version run: echo value=$(echo ${{ github.ref }} | cut -f 3 -d / | sed 's/^v//' ) >> $GITHUB_OUTPUT - name: Build tract run: | set -ex target=${{matrix.target}} version=${{steps.version.outputs.value}} name=${target}-${version} rustup update rustup target add ${target} if [ -n "${{matrix.musl}}" ] then MUSL_TRIPLE=${{matrix.musl}} curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/${MUSL_TRIPLE}-cross.tgz | tar zx MUSL_BIN=`pwd`/${MUSL_TRIPLE}-cross/bin export PATH=$MUSL_BIN:$PATH export TARGET_CC=$MUSL_BIN/${MUSL_TRIPLE}-gcc RUST_TRIPLE_ENV=$(echo ${target} | tr 'a-z-' 'A-Z_') export CARGO_TARGET_${RUST_TRIPLE_ENV}_CC=$TARGET_CC export CARGO_TARGET_${RUST_TRIPLE_ENV}_LINKER=$TARGET_CC fi cargo build --target ${target} --release -p tract-cli mkdir tract-$name cp target/${target}/release/tract tract-${name} tar czf tract-${name}.tgz tract-${name} - name: Upload asset uses: softprops/action-gh-release@v2 with: files: tract-${{matrix.target}}-${{ steps.version.outputs.value }}.tgz name: ${{ steps.version.outputs.value }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/cost_model.yml ================================================ on: workflow_dispatch: inputs: dataset_id: description: 'dataset identifier' required: true name: Generate cost model analysis dataset env: CARGO_INCREMENTAL: false FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true jobs: build: name: Upload cost model tasks runs-on: ubuntu-latest strategy: fail-fast: false matrix: target: [ "aarch64", "armv7" ] steps: - name: Checkout code uses: actions/checkout@v6 - name: Build and upload run: ./.travis/cost_model_task_build.sh ${{matrix.target}} ${{github.event.inputs.dataset_id}} env: AWS_ACCESS_KEY_ID: ${{secrets.TRACT_CI_AWS_ACCESS_KEY_ID}} AWS_SECRET_ACCESS_KEY: ${{secrets.TRACT_CI_AWS_SECRET_ACCESS_KEY}} AWS_EC2_METADATA_DISABLED: true ================================================ FILE: .github/workflows/crates.yml ================================================ name: Rust crates on: pull_request: schedule: - cron: '0 3 * * *' workflow_dispatch: env: CARGO_INCREMENTAL: false FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true jobs: prepare-matrix: runs-on: ubuntu-latest outputs: os: ${{steps.set-matrix.outputs.os}} rust: ${{steps.set-matrix.outputs.rust}} steps: - id: set-matrix env: FULL: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }} run: | if [ "$FULL" == "true" ] then echo 'os=["ubuntu-latest", "macos-latest"]' >> $GITHUB_OUTPUT echo 'rust=["1.91.0", "stable", "beta", "nightly"]' >> $GITHUB_OUTPUT else echo ::notice::Skipping macOS checks on PR and commit. Dispatch workflow manually if needed. echo 'os=["ubuntu-latest"]' >> $GITHUB_OUTPUT echo 'rust=["1.91.0"]' >> $GITHUB_OUTPUT fi crates: name: ${{matrix.os}} / ${{matrix.crate}} / ${{matrix.rust}} needs: prepare-matrix strategy: matrix: os: ${{fromJson(needs.prepare-matrix.outputs.os)}} rust: ${{fromJson(needs.prepare-matrix.outputs.rust)}} crate: [ tract-data, tract-linalg, tract-core, tract-nnef, tract-hir, tract-onnx, tract-pulse, tract-onnx-opl, tract-pulse-opl, tract, test-unit-core, test-onnx-core, test-nnef-cycle, test-f16, ] fail-fast: false runs-on: ${{matrix.os}} env: RUSTUP_TOOLCHAIN: ${{matrix.rust}} steps: - uses: actions/checkout@v6 - name: Cargo test run: cargo test -p ${{matrix.crate}} cuda: runs-on: cuda-lovelace needs: prepare-matrix strategy: matrix: rust: ${{fromJson(needs.prepare-matrix.outputs.rust)}} fail-fast: false env: RUSTUP_TOOLCHAIN: ${{matrix.rust}} steps: - uses: actions/checkout@v6 - name: Cargo test run: cargo test -p tract-cuda -p test-cuda metal: runs-on: macOS needs: prepare-matrix strategy: matrix: rust: ${{fromJson(needs.prepare-matrix.outputs.rust)}} fail-fast: false env: RUSTUP_TOOLCHAIN: ${{matrix.rust}} steps: - uses: actions/checkout@v6 - name: Cargo test run: cargo test -p tract-metal -p test-metal pedantic: name: fmt, clippy, etc (${{matrix.os}} / ${{matrix.rust}}) needs: prepare-matrix strategy: matrix: os: ${{fromJson(needs.prepare-matrix.outputs.os)}} rust: ${{fromJson(needs.prepare-matrix.outputs.rust)}} fail-fast: false runs-on: ${{matrix.os}} env: RUSTUP_TOOLCHAIN: ${{matrix.rust}} steps: - uses: actions/checkout@v6 - run: rustup component add clippy && cargo clippy - name: fmt run: rustup component add rustfmt && cargo fmt --check - name: Warnings env: RUSTFLAGS: -D warnings run: cargo check cargo-deny: strategy: fail-fast: false runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Install cargo-deny run: | curl -L https://github.com/EmbarkStudios/cargo-deny/releases/download/$VERSION/cargo-deny-$VERSION-x86_64-unknown-linux-musl.tar.gz \ | tar -zx --strip-components=1 "cargo-deny-$VERSION-x86_64-unknown-linux-musl/cargo-deny" env: VERSION: 0.18.9 - name: Run cargo-deny run: .travis/cargo-deny-check.sh ================================================ FILE: .github/workflows/cross-platform.yml ================================================ name: Embedded targets on: pull_request: schedule: - cron: '0 5 * * *' workflow_dispatch: env: CARGO_INCREMENTAL: false FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true RUSTUP_TOOLCHAIN: 1.91.0 jobs: linux: strategy: fail-fast: false matrix: platform: - raspbian - aarch64-unknown-linux-gnu - aarch64-unknown-linux-gnu-stretch - armv6vfp-unknown-linux-gnueabihf - armv7-unknown-linux-gnueabihf - armv7-unknown-linux-gnueabihf-stretch - aarch64-unknown-linux-musl - cortexa53-unknown-linux-musl - armv7-unknown-linux-musl - aarch64-linux-android - armv7-linux-androideabi - i686-linux-android - x86_64-linux-android - x86_64-unknown-linux-gnu-stretch - wasm32-unknown-unknown - wasm32-wasi runs-on: ubuntu-latest permissions: id-token: write contents: read steps: - uses: actions/checkout@v6 - name: Get current date id: date run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - name: Configure AWS Credentials continue-on-error: true uses: aws-actions/configure-aws-credentials@v6 with: role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci aws-region: us-east-2 - uses: actions/cache@v5 with: path: | ~/.rustup ~/.cargo/registry ~/.cargo/git # ~/.cache/sccache .cached target key: ${{ runner.os }}-${{matrix.platform}}-${{steps.date.outputs.date}} - name: Setup wasmtime if: ${{ matrix.platform }} == "wasm32-wasi" uses: bytecodealliance/actions/wasmtime/setup@v1 - name: Cross script env: PLATFORM: ${{matrix.platform}} AWS_EC2_METADATA_DISABLED: true run: .travis/cross.sh apple: strategy: fail-fast: false matrix: platform: - aarch64-apple-ios - aarch64-apple-darwin runs-on: macos-latest permissions: id-token: write contents: read steps: - uses: actions/checkout@v6 - name: Configure AWS Credentials continue-on-error: true uses: aws-actions/configure-aws-credentials@v6 with: role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci aws-region: us-east-2 - name: Get current date id: date run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - name: Cross script env: PLATFORM: ${{matrix.platform}} run: .travis/cross.sh ================================================ FILE: .github/workflows/examples.yml ================================================ name: Examples on: schedule: - cron: '0 3 * * *' workflow_dispatch: env: CARGO_INCREMENTAL: false FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true RUSTUP_TOOLCHAIN: 1.91.0 jobs: examples: runs-on: ubuntu-latest outputs: examples: ${{steps.set-matrix.outputs.examples}} steps: - uses: actions/checkout@v6 - id: set-matrix run: | echo examples=`find examples -name ci.sh | cut -d/ -f 2 | jq -Rsc '. / "\n" - [""]'` >> "$GITHUB_OUTPUT" example: name: ${{ matrix.ex }} runs-on: ubuntu-latest needs: examples strategy: fail-fast: false matrix: ex: ${{fromJSON(needs.examples.outputs.examples)}} steps: - uses: actions/checkout@v6 - name: Configure AWS Credentials # if: github.repository == 'sonos/tract' continue-on-error: true uses: aws-actions/configure-aws-credentials@v6 with: role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci aws-region: us-east-2 - name: example tests env: AWS_EC2_METADATA_DISABLED: true timeout-minutes: 30 run: | cd examples/${{matrix.ex}} ./ci.sh build-tract-cli: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - run: cargo build -p tract-cli --profile opt-no-lto - uses: actions/upload-artifact@v7 with: name: tract-cli-x86_64 path: ./target/opt-no-lto/tract build-tract-cli-macos: runs-on: macOS steps: - uses: actions/checkout@v6 - run: cargo build -p tract-cli --profile opt-no-lto - uses: actions/upload-artifact@v7 with: name: tract-cli-aarch64-apple path: ./target/opt-no-lto/tract gpu-examples: runs-on: ubuntu-latest outputs: examples: ${{steps.set-matrix.outputs.examples}} steps: - uses: actions/checkout@v6 - id: set-matrix run: | echo examples=`find examples -name ci-gpu.sh | cut -d/ -f 2 | jq -Rsc '. / "\n" - [""]'` >> "$GITHUB_OUTPUT" gpu-example: name: ${{ matrix.ex }} (CUDA) runs-on: cuda-lovelace needs: [gpu-examples, build-tract-cli] strategy: fail-fast: false matrix: ex: ${{fromJSON(needs.gpu-examples.outputs.examples)}} steps: - uses: actions/checkout@v6 - uses: actions/download-artifact@v8 with: name: tract-cli-x86_64 path: target/opt-no-lto - run: chmod +x target/opt-no-lto/tract - name: GPU example tests timeout-minutes: 60 run: | cd examples/${{matrix.ex}} ./ci-gpu.sh gpu-example-metal: name: ${{ matrix.ex }} (Metal) runs-on: macOS needs: [gpu-examples, build-tract-cli-macos] strategy: fail-fast: false matrix: ex: ${{fromJSON(needs.gpu-examples.outputs.examples)}} steps: - uses: actions/checkout@v6 - uses: actions/download-artifact@v8 with: name: tract-cli-aarch64-apple path: target/opt-no-lto - run: chmod +x target/opt-no-lto/tract - name: Metal GPU example tests timeout-minutes: 60 run: | cd examples/${{matrix.ex}} ./ci-gpu.sh ================================================ FILE: .github/workflows/full.yml ================================================ name: Full test harness on: schedule: - cron: '0 3 * * *' workflow_dispatch: inputs: pr_number: description: "Optional PR number to test (from fork ok). Leave empty to run on selected branch." required: false type: number env: CARGO_INCREMENTAL: false FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true jobs: prepare: runs-on: ubuntu-latest outputs: test_ref: ${{ steps.set.outputs.test_ref }} steps: - id: set uses: actions/github-script@v8 with: script: | const prInput = context.payload.inputs?.pr_number; core.info(`Fetching PR ${prInput}`); if (!prInput) { // Use the ref the workflow was triggered on (branch/tag/SHA in base repo) core.setOutput('test_ref', process.env.GITHUB_SHA); return; } const pr = await github.rest.pulls.get({ owner: context.repo.owner, repo: context.repo.repo, pull_number: Number(prInput), }); core.info(pr.data.head.sha); core.setOutput('test_ref', pr.data.head.sha); old-harness: runs-on: ubuntu-latest strategy: fail-fast: false permissions: id-token: write contents: read needs: prepare steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: Configure AWS Credentials continue-on-error: true uses: aws-actions/configure-aws-credentials@v6 with: role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci aws-region: us-east-2 - name: Full test run: .travis/test-harness.sh cli-tests: runs-on: ubuntu-latest needs: prepare steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: Full test env: AWS_EC2_METADATA_DISABLED: true run: .travis/cli-tests.sh onnx-tests: runs-on: ubuntu-latest needs: prepare strategy: matrix: opset: [1_4_1, 1_5_0, 1_6_0, 1_7_0, 1_8_1, 1_9_0, 1_10_2, 1_11_0, 1_12_0, 1_13_0, 1_14_1, 1_15_0, 1_16_2, 1_17_0, 1_18_0, 1_19_1] steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: Full test run: .travis/onnx-tests.sh ${{ matrix.opset }} tflite: runs-on: ubuntu-latest needs: prepare steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: Full test run: .travis/tflite.sh some-tests-with-paranoid-asserts: runs-on: ubuntu-latest needs: prepare steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: With assertions run: | rustup update cargo test --features tract-core/paranoid_assertions -p test-onnx-core -p test-unit-core without-default-features: runs-on: ubuntu-latest needs: prepare steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: Without default features run: | rustup update cargo check -p tract-cli --no-default-features $CARGO_EXTRA complexes: runs-on: ubuntu-latest needs: prepare steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: With complexes run: | rustup update cargo check -p tract-nnef --features complex $CARGO_EXTRA check-all-targets: runs-on: ubuntu-latest needs: prepare steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: Check all targets run: | ROOT=$(pwd) ./.travis/ci-system-setup.sh cargo check --all-targets --workspace --exclude test-metal --exclude tract-metal C: runs-on: ubuntu-latest needs: prepare steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: C smoke tests run: | cd api/c cargo install cbindgen make python: runs-on: ubuntu-latest needs: prepare steps: - uses: actions/checkout@v6 with: ref: ${{ needs.prepare.outputs.test_ref }} fetch-depth: 0 - name: Setup Python uses: actions/setup-python@v6 with: python-version: "3.13" - name: Install uv uses: astral-sh/setup-uv@v7 - name: Pytest bindings timeout-minutes: 60 run: | cd api/py uv venv --python 3.13 source .venv/bin/activate uv pip install -e ".[dev]" pytest . ================================================ FILE: .github/workflows/large_models.yml ================================================ name: Large models on: pull_request: schedule: - cron: '0 3 * * *' workflow_dispatch: env: LARGE_MODELS: true jobs: cli: name: Build tract on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: os: [ macos-latest, ubuntu-latest ] steps: - uses: actions/checkout@v6 - run: | ROOT=. ./.travis/ci-system-setup.sh cargo build -p tract-cli --profile opt-no-lto --no-default-features --features transformers - run: echo uname=$(uname) >> $GITHUB_ENV - uses: actions/upload-artifact@v7 with: name: tract-cli-${{env.uname}} path: ./target/opt-no-lto/tract foundation-llms: runs-on: ubuntu-latest outputs: models: ${{steps.set-matrix.outputs.models}} q: ${{steps.set-matrix.outputs.q}} steps: - id: set-matrix env: FULL: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }} run: | if [ "$FULL" = "true" ] then echo 'models=[ "openelm-270M", "llama-3.2-1B-instruct", "llama-3.2-3B-instruct", "llama-3.1-8B-instruct", "qwen3-1.7B", "qwen3-8B" ]' >> $GITHUB_OUTPUT echo 'q=[ "f16f16", "f32f32", "q40ef16" ]' >> $GITHUB_OUTPUT else echo ::notice::Skipping most checks on PR and commit. Dispatch workflow manually if needed. echo 'models=[ "llama-3.2-1B-instruct" ]' >> $GITHUB_OUTPUT echo 'q=[ "f32f32", "q40ef16" ]' >> $GITHUB_OUTPUT fi foundation-llm: name: ${{ matrix.os }} / ${{matrix.rt}} / ${{ matrix.model }} / ${{ matrix.q }} needs: [ cli, foundation-llms ] runs-on: ${{ matrix.os }} strategy: matrix: os: [ macOS, cuda-lovelace ] model: ${{fromJson(needs.foundation-llms.outputs.models)}} q: ${{fromJson(needs.foundation-llms.outputs.q)}} rt: [ cpu, gpu ] exclude: - model: openelm-270M q: f32f32 - model: Llama-3.2-3B-Instruct q: f32f32 - model: Llama-3.2-3B-Instruct q: f32f32 - model: Llama-3.1-8B-Instruct q: f32f32 - model: Qwen3-1.7B q: f32f32 - model: Qwen3-8B q: f32f32 - model: OpenELM-270M q: f32f32 fail-fast: false permissions: id-token: write contents: read steps: - uses: actions/checkout@v6 - name: Configure AWS Credentials continue-on-error: true uses: aws-actions/configure-aws-credentials@v6 with: role-to-assume: arn:aws:iam::567805100031:role/github-runner-tract-ci aws-region: us-east-2 - run: echo uname=$(uname) >> $GITHUB_ENV - uses: actions/download-artifact@v8 with: name: tract-cli-${{env.uname}} path: tract-cli-${{env.uname}} - name: Download and run run: | chmod +x tract-cli-${{env.uname}}/tract export TRACT_RUN=$GITHUB_WORKSPACE/tract-cli-${{env.uname}}/tract if [ "${{matrix.rt}}" = "gpu" ] then case $(uname) in Darwin) RT=metal;; Linux) RT=cuda;; esac fi .travis/test-llm.sh ${{matrix.model}} ${{matrix.q}} $RT parakeet-tdt-600m-v3: name: ${{matrix.os}} / Parakeet TDT 600m v3 needs: [ cli ] strategy: matrix: os: [ macOS, cuda-lovelace ] fail-fast: false permissions: id-token: write contents: read runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v6 - run: echo uname=$(uname) >> $GITHUB_ENV - uses: actions/download-artifact@v8 with: name: tract-cli-${{env.uname}} path: tract-cli-${{env.uname}} - name: Download and run run: | chmod +x tract-cli-${{env.uname}}/tract export TRACT_RUN=$GITHUB_WORKSPACE/tract-cli-${{env.uname}}/tract ./harness/parakeet-tdt-600m-v3/ci.sh ================================================ FILE: .github/workflows/pydoc.yml ================================================ name: Python gh-pages doc on: pull_request: release: workflow_dispatch: env: CARGO_INCREMENTAL: false jobs: build_doc: name: Build doc runs-on: ubuntu-latest if: github.repository == 'sonos/tract' steps: - uses: actions/checkout@v6 - name: Install Rust toolchain uses: dtolnay/rust-toolchain@stable - name: Set up Python uses: actions/setup-python@v6 with: python-version: "3.12" - name: Extract version tag id: version if: github.event_name == 'release' && github.event.action == 'published' run: echo value=$(echo ${{ github.ref }} | cut -f 3 -d / | sed 's/^v//' ) >> $GITHUB_OUTPUT - name: Build doc run: | set -ex cd api/py python -m venv pydocs source pydocs/bin/activate pip install -r requirements-docs.txt pip install -e . sphinx-build -b html . _build/html cp _static/redirect-index.html _build/html/index.html - name: Deploy to gh-pages if: github.event_name != 'pull_request' run: | set -ex git config user.name "CI bot" git config user.email ci-bot@tract.rs version="${{ steps.version.outputs.value }}" if [ -z "$version" ]; then version="dev" fi # fetch existing gh-pages into a work directory git fetch origin gh-pages --depth=1 || true workdir=$(mktemp -d) git worktree add "$workdir" gh-pages 2>/dev/null || { git worktree add --orphan "$workdir" gh-pages } # copy new build into the versioned subdirectory rm -rf "$workdir/$version" cp -r api/py/_build/html "$workdir/$version" # regenerate versions.json (mike-compatible format) from directories present cd "$workdir" python3 -c " import json, os, re dirs = sorted( [d for d in os.listdir('.') if os.path.isdir(d) and d != '.git'], key=lambda v: [int(x) if x.isdigit() else x for x in re.split(r'(\d+)', v)], reverse=True, ) versions = [{'version': d, 'title': d, 'aliases': []} for d in dirs] with open('versions.json', 'w') as f: json.dump(versions, f, indent=2) " # commit and push git add -A git commit -m "Update Python docs ($version)" || true git push origin gh-pages # clean up worktree cd - git worktree remove "$workdir" ================================================ FILE: .github/workflows/release.yml ================================================ on: push: tags: - 'v*' name: Create release env: CARGO_INCREMENTAL: false FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true jobs: release: name: Create release runs-on: ubuntu-latest steps: - name: Extract version tag id: version run: echo value=$(echo ${{ github.ref }} | cut -f 3 -d / | sed 's/^v//' ) >> $GITHUB_OUTPUT - uses: actions/checkout@v6 - name: Create Release uses: softprops/action-gh-release@v2 with: name: tract ${{ steps.version.outputs.value }} env: GITHUB_TOKEN: ${{ secrets.ACCESS_TOKEN_RELEASE }} ================================================ FILE: .github/workflows/tract-ci-bench.yml ================================================ name: Bench with tract-ci-minion on: schedule: - cron: '1 * * * *' # every hour at minute 1 workflow_dispatch: jobs: minion: strategy: fail-fast: false matrix: os: - macOS - cuda-lovelace runs-on: ${{ matrix.os }} steps: - name: Run minion if found run: | if [ -d $HOME/tract-minion ] then echo "Running minion" cd $HOME/tract-minion ./tract-ci-minion --once else echo "Not running minion" fi ================================================ FILE: .github/workflows/wheels.yml ================================================ name: Python wheels on: schedule: - cron: '0 3 * * MON' release: types: [created] workflow_dispatch: inputs: publish: description: force publish to pypi type: boolean pypi_version_override: description: override version id detection type: string env: CARGO_INCREMENTAL: false PYPI_VERSION_OVERRIDE: ${{ inputs.pypi_version_override }} CIBW_ENVIRONMENT_PASS_LINUX: "PYPI_VERSION_OVERRIDE" FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true MACOSX_DEPLOYMENT_TARGET: 10.13 jobs: build_wheels: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os}} strategy: fail-fast: false matrix: os: [ubuntu-22.04, windows-2022, macos-14] steps: - uses: actions/checkout@v6 - name: Setup | Rust uses: dtolnay/rust-toolchain@stable - uses: actions/setup-python@v6 with: python-version: "3.13" - name: Install uv uses: astral-sh/setup-uv@v7 - name: Install rust toolchains if: startsWith(matrix.os, 'macOS') run: rustup target install x86_64-apple-darwin aarch64-apple-darwin - name: Build wheels uses: nick-fields/retry@v4 with: max_attempts: 1 timeout_seconds: 54000 # 15 hours :/ command: uvx cibuildwheel --output-dir wheelhouse api/py - uses: actions/upload-artifact@v7 with: name: wheels-${{github.run_id}}-${{matrix.os}} path: ./wheelhouse/*.whl make_sdist: name: Make SDist runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Install uv uses: astral-sh/setup-uv@v7 - name: Build SDist run: cd api/py && uv build --sdist - uses: actions/upload-artifact@v7 with: name: wheels-${{github.run_id}}-src path: api/py/dist/*.tar.gz upload_all: needs: [build_wheels, make_sdist] runs-on: ubuntu-latest if: (github.event_name == 'release' && github.event.action == 'published') || inputs.publish steps: - uses: actions/download-artifact@v8 with: pattern: wheels-${{github.run_id}}-* merge-multiple: true path: dist - uses: pypa/gh-action-pypi-publish@v1.13.0 with: user: __token__ password: ${{ secrets.PYPI }} verbose: true ================================================ FILE: .github/workflows/windows.yml ================================================ name: Windows unit tests on: # pull_request: workflow_dispatch: schedule: - cron: '0 3 * * *' env: CARGO_INCREMENTAL: false FORCE_JAVASCRIPT_ACTIONS_TO_NODE20: true jobs: windows: strategy: matrix: os: [ windows-2022 ] toolchain: [ gnu, msvc ] fail-fast: false runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v6 - uses: nick-fields/retry@v4 name: Install Rustup using win.rustup.rs with: timeout_minutes: 10 max_attempts: 8 shell: pwsh command: | # Disable the download progress bar which can cause perf issues $ProgressPreference = "SilentlyContinue" Invoke-WebRequest https://win.rustup.rs/ -OutFile rustup-init.exe .\rustup-init.exe -y --default-host=x86_64-pc-windows-msvc --profile=minimal - uses: nick-fields/retry@v4 name: Install the target with: timeout_minutes: 10 max_attempts: 8 shell: pwsh command: | rustup toolchain add stable-x86_64-pc-windows-${{matrix.toolchain}} rustup default stable-x86_64-pc-windows-${{matrix.toolchain}} - name: Install LLVM and Clang uses: KyleMayes/install-llvm-action@v2 with: version: "11.0" - name: debug run: dir "C:\\Program Files\\LLVM" - name: debug lib run: dir "C:\\Program Files\\LLVM\\lib" - name: debug bin run: dir "C:\\Program Files\\LLVM\\bin" - name: top level cargo check run: cargo check --workspace --exclude test-blas --exclude tract-metal --exclude test-metal --exclude causal_llm env: LIBCLANG_PATH: "C:\\Program Files\\LLVM\\bin" - name: data / linalg / core / nnef / onnx / onnx-opl run: cargo test -p tract-data -p tract-linalg -p tract-core -p tract-nnef -p tract-onnx -p tract-onnx-opl - name: Onnx test suite run: | cargo test --release -p test-onnx-core -p test-unit-core env: TRACT_LOG: info ================================================ FILE: .gitignore ================================================ target **/*.rs.bk *.rustfmt *.back Cargo.lock examples/data .idea .cached/** flamegraph.svg perf.data* readings.* metrics tract.out .gdb_history /issue-* /.dinghy.toml .cargo proptest-regressions /tmp wheelhouse target-bisector* /nvidia ================================================ FILE: .travis/README.md ================================================ # Travis & minions test infrastructure ## Principles * travis is triggered on each commit, it will run `./.travis/native.sh` to perform x86_64 builds, plus a series of `./.travis/cross.sh` for as many arm boards configurations. * `.travis/cross.sh` pushes a `.tgz` to a s3 bucket for each configuration. The bundle contains a `entrypoint.sh` script and anything it depends on, including the relevant `tract` cli executable. The script is actually names `bundle-entrypoint.sh` in the repository. * devices are running `minion.sh` and will pick the new bundles from the s3 bucket, untar and run the `entrypoint.sh` ## Testing locally ``` cargo build --release -p tract-cli && cargo bench -p tract-linalg --no-run && .travis/run-bundle.sh `.travis/make_bundle.sh` ``` ## minion setup ``` MINION=user@hostname.local scp .travis/minionrc $MINION:.minionrc scp .travis/minion.sh $MINION: ``` also setup aws credentials (.aws/credentials) ``` apt install wget curl perl awscli screen vim netcat ``` On device: `.minioncrc` set a MINION_ID. At this point, running `./minion.sh` should work. ## crontab `crontab -e` ``` */10 * * * * $HOME/minion.sh ``` ## systemd timers in /etc/systemd/system/minion.service ``` [Unit] Description=Travis ci bench minion [Service] User=root Type=oneshot ExecStart=/home/root/minion.sh ``` in /etc/systemd/system/minion.timer ``` [Unit] Description=Run minion.service every 5 minutes [Timer] OnCalendar=*:0/5 [Install] WantedBy=timers.target ``` then ``` systemctl enable minion.timer systemctl start minion.timer ``` # Setup file server (http only) ``` sudo apt install nginx awscli vim ``` * setup aws credentials (.aws/credentials) * in $HOME/sync-data.sh: ``` ``` * chmod +x $HOME/sync-data.sh * run it: ./sync-data.sh * `crontab -e` ``` */5 * * * * $HOME/sync-data.sh ``` * `sudo vi /etc/nginx/sites-available/models` ``` server { root /home/raspbian/models/; location /models { } } ``` * `sudo ln -s /etc/nginx/sites-available/models /etc/nginx/sites-enabled/` * `sudo rm /etc/nginx/sites-enabled/default` * `sudo /etc/init.d/nginx reload` * test : `curl -I http://localhost/hey_snips_v1.pb` ================================================ FILE: .travis/android-ndk.sh ================================================ #!/bin/sh set -ex which java || sudo apt install -y default-jdk ANDROID_SDK=$HOME/cached/android-sdk if [ ! -d "$ANDROID_SDK" ] then mkdir -p $ANDROID_SDK cd $ANDROID_SDK # ANDROID_SDK_VERSION=4333796 # "https://dl.google.com/android/repository/sdk-tools-linux-${ANDROID_SDK_VERSION}.zip" curl -s -o android-sdk.zip \ https://dl.google.com/android/repository/commandlinetools-linux-8092744_latest.zip unzip -q android-sdk.zip rm android-sdk.zip fi yes | $ANDROID_SDK/cmdline-tools/bin/sdkmanager --sdk_root=$ANDROID_SDK --licenses > /dev/null $ANDROID_SDK/cmdline-tools/bin/sdkmanager --sdk_root=$ANDROID_SDK \ "build-tools;30.0.0" "platform-tools" "platforms;android-31" "tools" "ndk-bundle" \ > /dev/null ================================================ FILE: .travis/asan.sh ================================================ #!/bin/sh set -ex # RUSTFLAGS=-Zsanitizer=address cargo +nightly test -Zbuild-std --target $(rustc -vV | sed -n 's|host: ||p') TARGET=$(rustc -vV | sed -n 's|host: ||p') rustup toolchain add nightly rustup component add rust-src --toolchain nightly-$TARGET export RUSTFLAGS=-Zsanitizer=address export RUSTDOCFLAGS=$RUSTFLAGS export RUSTUP_TOOLCHAIN=nightly export RUST_VERSION=nightly export CARGO_EXTRA="--target $TARGET" cargo -q test -q -p tract-linalg $CARGO_EXTRA # inventory, asan and macos liner are not playing nice, so we have to stop there if [ $(uname) == "Darwin" ] then exit 0 fi cargo -q test -q -p tract-core --features paranoid_assertions $CARGO_EXTRA ./.travis/regular-tests.sh if [ -n "$CI" ] then cargo clean fi ./.travis/onnx-tests.sh if [ -n "$CI" ] then cargo clean fi ./.travis/cli-tests.sh ================================================ FILE: .travis/bundle-entrypoint.sh ================================================ #!/bin/sh set -ex start=$(date +%s) ROOT=`pwd` if [ -n "$TRACT_RUN" ] then TRACT=$TRACT_RUN elif [ -x tract ] then TRACT="./tract" else cargo build -p tract-cli -q --release TRACT="./target/release/tract" fi CACHEDIR=${CACHEDIR:-$HOME/.cache/tract-ci-minion-models} case $CACHEDIR in "http"*) wget $CACHEDIR/private/private-benches.sh PRIVATE=`pwd`/private-benches.sh ;; *) [ -d $CACHEDIR ] || mkdir $CACHEDIR PATH=$PATH:/usr/local/bin # for aws command on darwin aws s3 sync s3://tract-ci-builds/model $CACHEDIR || echo "Warning: aws s3 sync failed, continuing with cached models" (cd $CACHEDIR [ -d en_libri_real ] || tar zxf en_libri_real.tar.gz [ -d en_tdnn_lstm_bn_q7 ] || tar zxf en_tdnn_lstm_bn_q7.tar.gz ) PRIVATE=$CACHEDIR/private/private-benches.sh ;; esac touch metrics if [ -e sizes ] then cat sizes >> metrics fi if [ $(uname) = "Linux" ] then if [ -r /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor -a `cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor` = "userspace" ] then F=$(printf "%s\n" `cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies` | sort -n | tail -1) echo $F > /sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed fi fi net_bench() { net=$1 pb=$2 shift 2 $TRACT "$@" --machine-friendly -O bench --allow-random-input $BENCH_OPTS > tract.out v=`cat tract.out | grep -a real | cut -f 2 -d ' ' | sed 's/\([0-9]\{9,9\}\)[0-9]*/\1/'` echo net.$net.evaltime.$pb $v >> metrics $TRACT "$@" --readings --readings-heartbeat 1000 --machine-friendly -O bench --allow-random-input $BENCH_OPTS > tract.out for stage in model_ready before_optimize do pattern=$(echo $stage | sed 's/[_-]/./g') v=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 1 -d ' ') echo net.$net.time_to_$stage.$pb $v >> metrics v=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 4 -d ' ') echo net.$net.rsz_at_$stage.$pb $v >> metrics f=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 11 -d ' ') a=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 10 -d ' ') echo net.$net.active_at_$stage.$pb $(($a-$f)) >> metrics done } llm_bench() { net=$1 pb=$2 shift 2 if $TRACT "$@" --llm --machine-friendly -O llm-bench $BENCH_OPTS > tract.out then cat tract.out echo llm.$net.pp512.$pb $(cat tract.out | grep -a PP512 | cut -f 2 -d ' ') >> metrics echo llm.$net.tg128.$pb $(cat tract.out | grep -a TG128 | cut -f 2 -d ' ') >> metrics fi if $TRACT "$@" --readings --readings-heartbeat 1000 --llm --machine-friendly -O llm-bench $BENCH_OPTS > /dev/null then for stage in model_ready before_optimize do pattern=$(echo $stage | sed 's/[_-]/./g') v=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 1 -d ' ') echo llm.$net.time_to_$stage.$pb $v >> metrics v=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 4 -d ' ') echo llm.$net.rsz_at_$stage.$pb $v >> metrics f=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 11 -d ' ') a=$(grep -a $pattern readings.out | sed 's/ */ /g;s/^ *//' | cut -f 10 -d ' ') if [ -n "$a" -a -n "$f" ] then echo llm.$net.active_at_$stage.$pb $(($a-$f)) >> metrics fi done fi } net_bench arm_ml_kws_cnn_m pass $CACHEDIR/ARM-ML-KWS-CNN-M.pb -i 49,10,f32 --partial --input-node Mfcc net_bench hey_snips_v1 400ms $CACHEDIR/hey_snips_v1.pb -i 80,40,f32 net_bench hey_snips_v31 400ms $CACHEDIR/hey_snips_v3.1.pb -i 40,40,f32 net_bench hey_snips_v4_model17 2sec $CACHEDIR/hey_snips_v4_model17.pb -i 200,20,f32 net_bench hey_snips_v4_model17 pulse8 $CACHEDIR/hey_snips_v4_model17.pb -i S,20,f32 --pulse 8 net_bench hey_snips_v4_model17_nnef pulse8 --nnef-tract-pulse $CACHEDIR/hey_snips_v4_model17.alpha1.tar net_bench mobilenet_v1_1 pass $CACHEDIR/mobilenet_v1_1.0_224_frozen.pb -i 1,224,224,3,f32 net_bench mobilenet_v2_1 pass $CACHEDIR/mobilenet_v2_1.4_224_frozen.pb -i 1,224,224,3,f32 net_bench inceptionv1q pass $CACHEDIR/inceptionv1_quant.nnef.tar.gz --nnef-tract-core net_bench inceptionv3 pass $CACHEDIR/inception_v3_2016_08_28_frozen.pb -i 1,299,299,3,f32 net_bench mdl-en-2019-Q3-librispeech_onnx 2600ms $CACHEDIR/en_libri_real/model.onnx --output-node output -i 264,40 net_bench mdl-en-2019-Q3-librispeech_onnx pulse_240ms $CACHEDIR/en_libri_real/model.onnx --output-node output -i S,40 --pulse 24 net_bench en_tdnn_lstm_bn_q7 2600ms $CACHEDIR/en_tdnn_lstm_bn_q7/model.onnx --output-node output -i 264,40 net_bench en_tdnn_lstm_bn_q7 pulse_240ms $CACHEDIR/en_tdnn_lstm_bn_q7/model.onnx --output-node output -i S,40 --pulse 24 net_bench en_tdnn_8M 2600ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i 264,40 net_bench en_tdnn_8M pulse_240ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i S,40 --pulse 24 net_bench en_tdnn_8M pulse_180ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i S,40 --pulse 18 net_bench en_tdnn_8M pulse_120ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.onnx --output-node output -i S,40 --pulse 12 net_bench en_tdnn_8M_nnef pulse_240ms $CACHEDIR/mdl-en-2019-12-24-aho-corasick-18h01m33s.alpha1.a.tar --nnef-tract-pulse net_bench en_tdnn_15M 2600ms $CACHEDIR/en_tdnn_15M.onnx --output-node output -i 264,40 net_bench en_tdnn_15M pulse_240ms $CACHEDIR/en_tdnn_15M.onnx --output-node output -i S,40 --pulse 24 net_bench en_tdnn_15M pulse_120ms $CACHEDIR/en_tdnn_15M.onnx --output-node output -i S,40 --pulse 12 net_bench en_tdnn_15M_nnef pulse_240ms $CACHEDIR/en_tdnn_15M.alpha1.tar --nnef-tract-pulse net_bench dummy-conmer-12M pulse_120ms $CACHEDIR/dummy-conmer-12M.nnef.tar --nnef-tract-core --pulse 12 net_bench en_tdnn_pyt_15M pulse_120ms $CACHEDIR/mdl-en-2023-03-27-allen-17h11m50s.nnef.tar --nnef-tract-core --pulse 12 net_bench speaker_id pulse8 $CACHEDIR/speaker-id-2019-03.onnx -i 1,S,40,f32 --output-node 257 --partial --pulse 8 net_bench voicecom_fake_quant 2sec $CACHEDIR/snips-voice-commands-cnn-fake-quant.pb -i 200,10,f32 net_bench voicecom_float 2sec $CACHEDIR/snips-voice-commands-cnn-float.pb -i 200,10,f32 net_bench trunet pulse1_f32 $CACHEDIR/trunet_dummy.nnef.tgz --nnef-tract-core --pulse 1 net_bench trunet pulse1_f16 $CACHEDIR/trunet_dummy.nnef.tgz --nnef-tract-core -t f32_to_f16 --pulse 1 . $PRIVATE if [ $(uname) = "Darwin" ] then LLM_BACKENDS="cpu metal" fi if which nvidia-smi then LLM_BACKENDS="cpu cuda" fi if [ -n "$LLM_BACKENDS" ] then for backend in $LLM_BACKENDS do case $backend in cpu) extra="--timeout 180";; metal) extra="--metal --timeout 60" BENCH_OPTS="--warmup-loops 1" ;; cuda) extra="--cuda --timeout 60" BENCH_OPTS="--warmup-loops 1" ;; esac llm_bench llama-3_2-1B-q40ef32-516 $backend $CACHEDIR/Llama-3.2-1B-q40ef32.516.nnef.tgz $extra llm_bench openelm-270M-q40ef16-516 $backend $CACHEDIR/OpenELM-270M-q40ef16.516.nnef.tgz $extra llm_bench llama-3_2-1B-instruct-q40ef16-541 $backend $CACHEDIR/Llama-3.2-1B-Instruct-q40ef16.541.nnef.tgz $extra llm_bench openelm-270M-q40ef16-541 $backend $CACHEDIR/OpenELM-270M-q40ef16.541.nnef.tgz $extra net_bench parakeet-tdt-600m-v3-f32f32-preprocessor_1s $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.preprocessor.nnef.tgz \ -t transformers_detect_all --nnef-tract-transformers --set B=1 --set A=16000 $extra net_bench parakeet-tdt-600m-v3-f32f32-encoder_1s $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.encoder.p1.nnef.tgz \ -t transformers_detect_all --nnef-tract-transformers --set B=1 --set S=100 $extra net_bench parakeet-tdt-600m-v3-f32f32-decoder_pass $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.decoder.nnef.tgz \ -t transformers_detect_all --nnef-tract-transformers --set B=1 --set T=1 $extra net_bench parakeet-tdt-600m-v3-f32f32-joint_pass $backend $CACHEDIR/parakeet-tdt-0.6b-v3-f32f32.608.joint.nnef.tgz \ -t transformers_detect_all --nnef-tract-transformers --set B=1 --set R=1 --set U=1 $extra if [ "$backend" != "cpu" ] then llm_bench llama-3_2-3B-q40ef32-516 $backend $CACHEDIR/Llama-3.2-3B-q40ef32.516.nnef.tgz $extra llm_bench llama-3_1-8B-instruct-q40ef16-541 $backend $CACHEDIR/Llama-3.1-8B-Instruct-q40ef16.541.nnef.tgz $extra llm_bench llama-3_2-3B-instruct-q40ef16-541 $backend $CACHEDIR/Llama-3.2-3B-Instruct-q40ef16.541.nnef.tgz $extra llm_bench qwen3-1_7B-q40ef16-541 $backend $CACHEDIR/Qwen3-1.7B-q40ef16.541.nnef.tgz $extra fi done fi end=$(date +%s) echo bundle.bench-runtime $(($end - $start)) >> metrics ================================================ FILE: .travis/cache_file.sh ================================================ #!/bin/sh set -e if [ -z "$CACHEDIR" ] then CACHEDIR=`dirname $0`/../.cached fi mkdir -p $CACHEDIR cd $CACHEDIR for file in $@ do mkdir -p $(dirname $file) if [ ! -e $file ] then wget --no-verbose https://s3.amazonaws.com/tract-ci-builds/tests/$file -O $file.tmp \ || aws s3 cp s3://tract-ci-builds/tests/$file $file.tmp mv $file.tmp $file fi done exit 0 ================================================ FILE: .travis/cargo-deny-check.sh ================================================ #!/bin/sh if [ -e cargo-deny ] then CARGO_DENY=`pwd`/cargo-deny else CARGO_DENY="cargo deny" fi (cd api/rs ; $CARGO_DENY check) ================================================ FILE: .travis/ci-system-setup.sh ================================================ #!/bin/sh set -e [ -d $ROOT/.travis ] || exit 1 "\$ROOT not set correctly '$ROOT'" if [ -z "$RUSTUP_TOOLCHAIN" ] then export RUSTUP_TOOLCHAIN=1.91.0 fi export RUSTUP_TOOLCHAIN PATH=$PATH:$HOME/.cargo/bin if [ -n "$CI" -a ! -e /tmp/ci-setup-done ] then if [ `uname` = "Darwin" ] then sysctl -n machdep.cpu.brand_string python3 --version brew install coreutils numpy python-setuptools jshon PATH="/opt/homebrew/opt/coreutils/libexec/gnubin:$PATH" export PYTHON_BIN_PATH=python3 else if [ "$RUNNER_ENVIRONMENT" != "self-hosted" ] then if [ `whoami` != "root" ] then SUDO=sudo fi $SUDO apt-get update # $SUDO apt-get upgrade -y $SUDO apt-get install -y llvm python3 python3-numpy jshon wget curl build-essential sudo jshon clang if ! which aws then curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o awscliv2.zip $SUDO apt-get install -y unzip unzip -q awscliv2.zip $SUDO ./aws/install aws --version fi fi fi which rustup || curl https://sh.rustup.rs -sSf | sh -s -- -y rustup update rustup toolchain add $RUSTUP_TOOLCHAIN [ -n "$GITHUB_PATH" ] && echo $HOME/.cargo/bin >> $GITHUB_PATH touch /tmp/ci-setup-done fi S3=https://s3.amazonaws.com/tract-ci-builds/tests if [ -n "$LARGE_MODELS" ] then export CACHE_FILE=$ROOT/.travis/cache_file.sh export MODELS=$HOME/.cache/models export CACHEDIR=$MODELS mkdir -p $MODELS elif [ -n "$CI" ] then MODELS=$S3 CACHE_FILE=true else CACHE_FILE=$ROOT/.travis/cache_file.sh MODELS=${MODELS:-$ROOT/.cached} mkdir -p $MODELS fi if [ -z "$TRACT_RUN" ] then TRACT_RUN="cargo run -p tract-cli $CARGO_EXTRA --profile opt-no-lto --no-default-features --features transformers,pulse --" export TRACT_RUN fi TRACT_RUNTIMES="-O" if [ "$(uname)" = "Darwin" ] && (system_profiler SPDisplaysDataType | grep -i "Metal") then TRACT_RUNTIMES="$TRACT_RUNTIMES --metal" fi if which nvidia-smi then TRACT_RUNTIMES="$TRACT_RUNTIMES --cuda" fi echo $TRACT_RUNTIMES ================================================ FILE: .travis/cli-tests.sh ================================================ #!/bin/sh WHITE='\033[1;37m' NC='\033[0m' # No Color set -e ROOT=$(dirname $(dirname $(realpath $0))) . $ROOT/.travis/ci-system-setup.sh echo echo $WHITE • build tract $NC echo TRACT_RUN=$(cargo build --message-format json -p tract-cli $CARGO_EXTRA --profile opt-no-lto | jq -r 'select(.target.name == "tract" and .executable).executable') echo TRACT_RUN=$TRACT_RUN export TRACT_RUN echo echo $WHITE • harness/nnef-test-cases $NC echo for t in `find harness/nnef-test-cases -name runme.sh` do echo $WHITE$t$NC $t done echo echo $WHITE • onnx/test_cases $NC echo # ( cd onnx/test_cases ; CACHEDIR=$MODELS ./run_all.sh ) echo echo $WHITE • full models command line test cases $NC echo echo $WHITE image $NC $CACHE_FILE squeezenet.onnx $TRACT_RUN $MODELS/squeezenet.onnx -O \ run -q \ --allow-random-input \ --assert-output-fact 1,1000,1,1,f32 $CACHE_FILE inception_v3_2016_08_28_frozen.pb $TRACT_RUN \ $MODELS/inception_v3_2016_08_28_frozen.pb \ -i 1,299,299,3,f32 -O \ run -q \ --allow-random-input \ --assert-output-fact 1,1001,f32 $TRACT_RUN \ $MODELS/inception_v3_2016_08_28_frozen.pb \ -i 1,299,299,3,f32 -O \ run -q \ --allow-random-input \ --assert-output-fact 1,1001,f32 $CACHE_FILE mobilenet_v1_1.0_224_frozen.pb $TRACT_RUN $MODELS/mobilenet_v1_1.0_224_frozen.pb \ -O -i 1,224,224,3,f32 \ run -q \ --allow-random-input \ --assert-output-fact 1,1001,f32 $CACHE_FILE mobilenet_v2_1.4_224_frozen.pb $TRACT_RUN $MODELS/mobilenet_v2_1.4_224_frozen.pb \ -O -i 1,224,224,3,f32 \ run -q \ --allow-random-input \ --assert-output-fact 1,1001,f32 $CACHE_FILE inceptionv1_quant.nnef.tar.gz inceptionv1_quant.io.npz $TRACT_RUN $MODELS/inceptionv1_quant.nnef.tar.gz \ --nnef-tract-core \ --input-facts-from-bundle $MODELS/inceptionv1_quant.io.npz -O \ run \ --input-from-bundle $MODELS/inceptionv1_quant.io.npz \ --allow-random-input \ --assert-output-bundle $MODELS/inceptionv1_quant.io.npz echo $WHITE audio $NC $CACHE_FILE ARM-ML-KWS-CNN-M.pb $TRACT_RUN $MODELS/ARM-ML-KWS-CNN-M.pb \ -O -i 49,10,f32 --partial \ --input-node Mfcc \ run -q \ --allow-random-input $CACHE_FILE GRU128KeywordSpotter-v2-10epochs.onnx $TRACT_RUN $MODELS/GRU128KeywordSpotter-v2-10epochs.onnx \ -O run -q \ --allow-random-input \ --assert-output-fact 1,3,f32 $CACHE_FILE mdl-en-2019-Q3-librispeech.onnx $TRACT_RUN $MODELS/mdl-en-2019-Q3-librispeech.onnx \ -O -i S,40,f32 --output-node output --pulse 24 \ run -q \ --allow-random-input $CACHE_FILE hey_snips_v4_model17.pb $TRACT_RUN $MODELS/hey_snips_v4_model17.pb \ -i S,20,f32 --pulse 8 dump --cost -q \ --assert-cost "FMA(F32)=2060448,Div(F32)=24576,Buffer(F32)=2920,Params(F32)=222251" $TRACT_RUN $MODELS/hey_snips_v4_model17.pb -i S,20,f32 \ dump -q \ --assert-op-count AddAxis 0 $CACHE_FILE trunet_dummy.nnef.tgz $TRACT_RUN --nnef-tract-core $MODELS/trunet_dummy.nnef.tgz dump -q echo $WHITE LLM $NC TEMP_ELM=$(mktemp -d) $CACHE_FILE 2024_06_25_elm_micro_export_with_kv_cache.nnef.tgz $TRACT_RUN $MODELS/2024_06_25_elm_micro_export_with_kv_cache.nnef.tgz \ --nnef-tract-core \ --assert "S>0" --assert "P>0" --assert "S+P<2048" \ dump -q --nnef $TEMP_ELM/with-asserts.nnef.tgz $TRACT_RUN --nnef-tract-core $TEMP_ELM/with-asserts.nnef.tgz dump -q rm -rf $TEMP_ELM for t in harness/pre-optimized-graphes/* do ( cd $t ; ./runme.sh) done ( if aws s3 ls tract-ci-builds/model/private then echo echo $WHITE • private tests $NC echo if [ -n "$CI" ] then OUTPUT=/dev/null else set -x OUTPUT=/dev/stdout fi ( mkdir -p $CACHEDIR cd $CACHEDIR aws s3 sync s3://tract-ci-builds/model/private private for t in `find private -name t.sh` do ( cd `dirname $t` ; sh ./t.sh ) done ) 2>&1 > $OUTPUT echo echo $WHITE • benches on full models $NC echo ./.travis/bundle-entrypoint.sh fi ) ================================================ FILE: .travis/cost_model_task_build.sh ================================================ #!/bin/sh set -ex ARCH=$1 ID=$2 case $ARCH in aarch64) MUSL_TRIPLE=aarch64-linux-musl RUST_TRIPLE=aarch64-unknown-linux-musl PLATFORM=aarch64-unknown-linux-musl ;; armv7) MUSL_TRIPLE=armv7l-linux-musleabihf RUST_TRIPLE=armv7-unknown-linux-musleabihf PLATFORM=armv7-unknown-linux-musl ;; *) exit "Can't build with musl for $ARCH" ;; esac rustup update rustup target add $RUST_TRIPLE #curl -s https://musl.cc/${MUSL_TRIPLE}-cross.tgz | tar zx curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/${MUSL_TRIPLE}-cross.tgz | tar zx MUSL_BIN=`pwd`/${MUSL_TRIPLE}-cross/bin export PATH=$MUSL_BIN:$PATH export TARGET_CC=$MUSL_BIN/${MUSL_TRIPLE}-gcc RUST_TRIPLE_ENV=$(echo $RUST_TRIPLE | tr 'a-z-' 'A-Z_') export CARGO_TARGET_${RUST_TRIPLE_ENV}_CC=$TARGET_CC export CARGO_TARGET_${RUST_TRIPLE_ENV}_LINKER=$TARGET_CC ( cd linalg/cost_model ; cargo build --target $RUST_TRIPLE --release ) TASK_NAME=cost-model-dataset-$ID mkdir $TASK_NAME mv linalg/cost_model/target/${RUST_TRIPLE}/release/cost_model $TASK_NAME echo "export TIMEOUT=$((86400*4))" > $TASK_NAME/vars echo "#!/bin/sh" > $TASK_NAME/entrypoint.sh echo "mkdir product" >> $TASK_NAME/entrypoint.sh echo "./cost_model ds --size 10000 product/$TASK_NAME.txt" >> $TASK_NAME/entrypoint.sh # echo "./cost_model ds --size 2000 -k 128 -n 16 product/$TASK_NAME-small-k-tiny-n.txt" >> $TASK_NAME/entrypoint.sh # echo "./cost_model ds --size 5000 -m 1-512 -k 16,64,256 -n 1-20 product/$TASK_NAME-multiple-k-tiny-n.txt" >> $TASK_NAME/entrypoint.sh # echo "./cost_model ds --size 1000 -m 1-512 -k 256,1024 -n 1-512 product/$TASK_NAME-bigmn" >> $TASK_NAME/entrypoint.sh chmod +x $TASK_NAME/entrypoint.sh tar czf $TASK_NAME.tgz $TASK_NAME if [ -n "$AWS_ACCESS_KEY_ID" ] then aws s3 cp $TASK_NAME.tgz s3://tract-ci-builds/tasks/$PLATFORM/$TASK_NAME.tgz fi ================================================ FILE: .travis/cross.sh ================================================ #!/bin/sh set -ex ROOT=$(dirname $(dirname $(realpath $0))) . $ROOT/.travis/ci-system-setup.sh which cargo-dinghy || ( mkdir -p /tmp/cargo-dinghy if [ `arch` = x86_64 -o `arch` = i386 -o `arch` = arm64 ] then cd /tmp/cargo-dinghy if [ `uname` = "Darwin" ] then NAME=macos else NAME=linux fi VERSION=0.8.0 wget -q https://github.com/snipsco/dinghy/releases/download/$VERSION/cargo-dinghy-$NAME-$VERSION.tgz -O cargo-dinghy.tgz tar vzxf cargo-dinghy.tgz --strip-components 1 mv cargo-dinghy $HOME/.cargo/bin else cargo install cargo-dinghy fi ) if [ -z "$PLATFORM" -a -n "$1" ] then PLATFORM=$1 fi case "$PLATFORM" in "raspbian") [ -e $HOME/cached/raspitools ] || git clone --depth 1 https://github.com/raspberrypi/tools $HOME/cached/raspitools TOOLCHAIN=$HOME/cached/raspitools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf export RUSTC_TRIPLE=arm-unknown-linux-gnueabihf rustup target add $RUSTC_TRIPLE echo "[platforms.$PLATFORM]\nrustc_triple='$RUSTC_TRIPLE'\ntoolchain='$TOOLCHAIN'" > .dinghy.toml cargo dinghy --platform $PLATFORM build --release -p tract-cli -p example-tensorflow-mobilenet-v2 -p tract-ffi ;; "aarch64-linux-android"|"armv7-linux-androideabi"|"i686-linux-android"|"x86_64-linux-android") case "$PLATFORM" in "aarch64-linux-android") ANDROID_CPU=aarch64 RUSTC_TRIPLE=aarch64-linux-android ;; "armv7-linux-androideabi") ANDROID_CPU=armv7 RUSTC_TRIPLE=armv7-linux-androideabi ;; "i686-linux-android") ANDROID_CPU=i686 RUSTC_TRIPLE=i686-linux-android ;; "x86_64-linux-android") ANDROID_CPU=x86_64 RUSTC_TRIPLE=x86_64-linux-android ;; esac export TARGET_AR=ar if [ -e /usr/local/lib/android/sdk/ndk-bundle ] then export ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk-bundle else export ANDROID_SDK_HOME=$HOME/cached/android-sdk [ -e $ANDROID_SDK_HOME ] || ./.travis/android-ndk.sh fi rustup target add $RUSTC_TRIPLE cargo dinghy --platform auto-android-$ANDROID_CPU build -p tract-linalg -p tract-ffi ;; "aarch64-apple-ios") rustup target add aarch64-apple-ios cargo dinghy --platform auto-ios-aarch64 check -p tract-linalg -p tract-ffi ;; "aarch64-apple-darwin" | "x86_64-unknown-linux-gnu") RUSTC_TRIPLE=$PLATFORM rustup target add $RUSTC_TRIPLE cargo build --target $RUSTC_TRIPLE -p tract-cli --release ;; "aarch64-unknown-linux-gnu-stretch" | "armv7-unknown-linux-gnueabihf-stretch" | "x86_64-unknown-linux-gnu-stretch") INNER_PLATFORM=${PLATFORM%-stretch} (cd .travis/docker-debian-stretch; docker build --tag debian-stretch .) docker run -v `pwd`:/tract -w /tract \ -e CI=true \ -e SKIP_QEMU_TEST=skip \ -e PLATFORM=$INNER_PLATFORM debian-stretch \ ./.travis/cross.sh sudo chown -R `whoami` . export RUSTC_TRIPLE=$INNER_PLATFORM ;; "aarch64-unknown-linux-gnu" | "armv6vfp-unknown-linux-gnueabihf" | "armv7-unknown-linux-gnueabihf" | \ "aarch64-unknown-linux-musl" | "armv7-unknown-linux-musl" | "cortexa53-unknown-linux-musl" ) case "$PLATFORM" in "aarch64-unknown-linux-gnu") export ARCH=aarch64 export QEMU_ARCH=aarch64 export LIBC_ARCH=arm64 export TRACT_CPU_AARCH64_KIND=a55 export RUSTC_TRIPLE=$ARCH-unknown-linux-gnu export DEBIAN_TRIPLE=$ARCH-linux-gnu ;; "armv6vfp-unknown-linux-gnueabihf") export ARCH=armv6vfp export LIBC_ARCH=armhf export QEMU_ARCH=arm export QEMU_OPTS="-cpu cortex-a15" export RUSTC_TRIPLE=arm-unknown-linux-gnueabihf export DEBIAN_TRIPLE=arm-linux-gnueabihf ;; "armv7-unknown-linux-gnueabihf") export ARCH=armv7 export QEMU_ARCH=arm export LIBC_ARCH=armhf export QEMU_OPTS="-cpu cortex-a15" export RUSTC_TRIPLE=armv7-unknown-linux-gnueabihf export DEBIAN_TRIPLE=arm-linux-gnueabihf export TARGET_CC=$DEBIAN_TRIPLE-gcc export TRACT_CPU_ARM32_NEON=true export DINGHY_TEST_ARGS="--env TRACT_CPU_ARM32_NEON=true" ;; "aarch64-unknown-linux-musl") export ARCH=aarch64 export QEMU_ARCH=aarch64 export LIBC_ARCH=arm64 export RUSTC_TRIPLE=$ARCH-unknown-linux-musl export DEBIAN_TRIPLE=$ARCH-linux-gnu export TRACT_CPU_AARCH64_KIND=a55 export CUSTOM_TC=`pwd`/aarch64-linux-musl-cross [ -d "$CUSTOM_TC" ] || curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/aarch64-linux-musl-cross.tgz | tar zx ;; "cortexa53-unknown-linux-musl") export ARCH=aarch64 export QEMU_ARCH=aarch64 export LIBC_ARCH=arm64 export RUSTC_TRIPLE=$ARCH-unknown-linux-musl export DEBIAN_TRIPLE=$ARCH-linux-gnu export TRACT_CPU_AARCH64_KIND=a53 export QEMU_OPTS="-cpu cortex-a53" export CUSTOM_TC=`pwd`/aarch64-linux-musl-cross [ -d "$CUSTOM_TC" ] || curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/aarch64-linux-musl-cross.tgz | tar zx ;; "armv7-unknown-linux-musl") export ARCH=armv7 export QEMU_ARCH=arm export LIBC_ARCH=armhf export RUSTC_TRIPLE=armv7-unknown-linux-musleabihf export DEBIAN_TRIPLE=arm-linux-gnueabihf export CUSTOM_TC=`pwd`/armv7l-linux-musleabihf-cross export TRACT_CPU_ARM32_NEON=true export DINGHY_TEST_ARGS="--env TRACT_CPU_ARM32_NEON=true" [ -d "$CUSTOM_TC" ] || curl -s https://s3.amazonaws.com/tract-ci-builds/toolchains/armv7l-linux-musleabihf-cross.tgz | tar zx export TARGET_CFLAGS="-mfpu=neon" ;; *) echo "unsupported platform $PLATFORM" exit 1 ;; esac mkdir -p $ROOT/target/$RUSTC_TRIPLE echo "[platforms.$PLATFORM]\nrustc_triple='$RUSTC_TRIPLE'" > .dinghy.toml if [ -n "$DEBIAN_TRIPLE" ] then PACKAGES="$PACKAGES binutils-$DEBIAN_TRIPLE gcc-$DEBIAN_TRIPLE libc6-dev-$LIBC_ARCH-cross" echo "deb_multiarch='$DEBIAN_TRIPLE'" >> .dinghy.toml fi if [ -n "$CUSTOM_TC" ] then echo "toolchain='$CUSTOM_TC'" >> .dinghy.toml fi echo "[script_devices.qemu-$PLATFORM]\nplatform='$PLATFORM'\npath='$ROOT/target/$RUSTC_TRIPLE/qemu-$PLATFORM'" >> .dinghy.toml echo "#!/bin/sh\nexe=\$1\nshift\n/usr/bin/qemu-$QEMU_ARCH $QEMU_OPTS -L /usr/$DEBIAN_TRIPLE/ \$exe --test-threads 1 \"\$@\"" > $ROOT/target/$RUSTC_TRIPLE/qemu-$PLATFORM chmod +x $ROOT/target/$RUSTC_TRIPLE/qemu-$PLATFORM DINGHY_TEST_ARGS="$DINGHY_TEST_ARGS --env PROPTEST_MAX_SHRINK_ITERS=100000000" $SUDO apt-get -y install --no-install-recommends qemu-system-arm qemu-user libssl-dev pkg-config $PACKAGES rustup target add $RUSTC_TRIPLE if [ -z "$SKIP_QEMU_TEST" ] then qemu-$QEMU_ARCH --version cargo dinghy --platform $PLATFORM $DINGHY_TEST_ARGS test --profile opt-no-lto -p tract-linalg -- --nocapture cargo dinghy --platform $PLATFORM $DINGHY_TEST_ARGS test --profile opt-no-lto -p tract-core fi cargo dinghy --platform $PLATFORM $DINGHY_TEST_ARGS check -p tract-ffi # keep lto for these two are they're going to devices. cargo dinghy --platform $PLATFORM build --release -p tract-cli -p example-tensorflow-mobilenet-v2 ;; wasm32-wasi) PLATFORM=wasm32-wasip1 wasmtime --version rustup target add $PLATFORM cargo check --target $PLATFORM --features getrandom-js -p tract-onnx -p tract-tensorflow RUSTFLAGS='-C target-feature=+simd128' CARGO_TARGET_WASM32_WASIP1_RUNNER=wasmtime \ cargo test --target=$PLATFORM -p tract-linalg -p tract-core -p test-unit-core ;; wasm32-*) rustup target add $PLATFORM cargo check --target $PLATFORM --features getrandom-js -p tract-onnx -p tract-tensorflow ;; *) echo "Don't know what to do for platform: $PLATFORM" exit 2 ;; esac if [ -e "target/$RUSTC_TRIPLE/release/tract" ] then export RUSTC_TRIPLE TASK_NAME=`.travis/make_bundle.sh` echo bench task: $TASK_NAME if [ -n "$AWS_ACCESS_KEY_ID" ] then aws s3 cp $TASK_NAME.tgz s3://tract-ci-builds/tasks/$PLATFORM/$TASK_NAME.tgz fi fi ================================================ FILE: .travis/debug-tests.sh ================================================ #!/bin/sh set -ex if [ -z "$CACHEDIR" ] then CACHEDIR=`dirname $0`/../.cached fi # useful as debug_asserts will come into play cargo test -p tract-core cargo test -p test-onnx-core -p test-nnef-cycle -p test-unit-core ================================================ FILE: .travis/docker-debian-stretch/Dockerfile ================================================ FROM debian:stretch COPY sources.list /etc/apt/sources.list ================================================ FILE: .travis/docker-debian-stretch/sources.list ================================================ deb http://archive.debian.org/debian/ stretch contrib main non-free deb http://archive.debian.org/debian stretch-backports main deb http://archive.debian.org/debian-security stretch/updates main ================================================ FILE: .travis/examples.sh ================================================ #!/bin/sh WHITE='\033[1;37m' NC='\033[0m' # No Color set -e ROOT=$(dirname $(dirname $(realpath $0))) . $ROOT/.travis/ci-system-setup.sh for t in `find examples -name ci.sh` do df -h ex=$(dirname $t) echo ::group:: $ex echo $WHITE $ex $NC ( cd $ex ; sh ./ci.sh ) if [ -n "$CI" ] then cargo clean fi echo ::endgroup:: done ================================================ FILE: .travis/llm-expectations-541 ================================================ Qwen--Qwen3-1.7B-f16f16.p0s100.arm64.cpu 0.96 Qwen--Qwen3-1.7B-f16f16.p0s100.arm64.metal 0.96 Qwen--Qwen3-1.7B-f16f16.p0s100.x86_64.cpu 0.99 Qwen--Qwen3-1.7B-f16f16.p0s100.x86_64.cuda 0.99 Qwen--Qwen3-1.7B-f16f16.p50s50.arm64.cpu 0.97 Qwen--Qwen3-1.7B-f16f16.p50s50.arm64.metal 0.97 Qwen--Qwen3-1.7B-f16f16.p50s50.x86_64.cpu 0.99 Qwen--Qwen3-1.7B-f16f16.p50s50.x86_64.cuda 0.99 Qwen--Qwen3-1.7B-f16f16.p99s1.arm64.cpu 0.99 Qwen--Qwen3-1.7B-f16f16.p99s1.arm64.metal 0.99 Qwen--Qwen3-1.7B-f16f16.p99s1.x86_64.cpu 0.99 Qwen--Qwen3-1.7B-f16f16.p99s1.x86_64.cuda 0.99 Qwen--Qwen3-1.7B-q40ef16.p0s100.arm64.cpu 0.92 Qwen--Qwen3-1.7B-q40ef16.p0s100.arm64.metal 0.98 Qwen--Qwen3-1.7B-q40ef16.p0s100.x86_64.cpu 0.99 Qwen--Qwen3-1.7B-q40ef16.p0s100.x86_64.cuda 0.92 Qwen--Qwen3-1.7B-q40ef16.p50s50.arm64.cpu 0.96 Qwen--Qwen3-1.7B-q40ef16.p50s50.arm64.metal 0.99 Qwen--Qwen3-1.7B-q40ef16.p50s50.x86_64.cpu 0.99 Qwen--Qwen3-1.7B-q40ef16.p50s50.x86_64.cuda 0.98 Qwen--Qwen3-1.7B-q40ef16.p99s1.arm64.cpu 0.97 Qwen--Qwen3-1.7B-q40ef16.p99s1.arm64.metal 0.99 Qwen--Qwen3-1.7B-q40ef16.p99s1.x86_64.cpu 0.99 Qwen--Qwen3-1.7B-q40ef16.p99s1.x86_64.cuda 0.96 Qwen--Qwen3-8B-f16f16.p0s100.arm64.cpu 0.94 Qwen--Qwen3-8B-f16f16.p0s100.arm64.metal 0.95 Qwen--Qwen3-8B-f16f16.p0s100.x86_64.cpu 0.99 Qwen--Qwen3-8B-f16f16.p0s100.x86_64.cuda 0.99 Qwen--Qwen3-8B-f16f16.p50s50.arm64.cpu 0.94 Qwen--Qwen3-8B-f16f16.p50s50.arm64.metal 0.95 Qwen--Qwen3-8B-f16f16.p50s50.x86_64.cpu 0.99 Qwen--Qwen3-8B-f16f16.p50s50.x86_64.cuda 0.99 Qwen--Qwen3-8B-f16f16.p99s1.arm64.cpu 0.96 Qwen--Qwen3-8B-f16f16.p99s1.arm64.metal 0.99 Qwen--Qwen3-8B-f16f16.p99s1.x86_64.cpu 0.99 Qwen--Qwen3-8B-f16f16.p99s1.x86_64.cuda 0.99 Qwen--Qwen3-8B-q40ef16.p0s100.arm64.cpu 0.86 Qwen--Qwen3-8B-q40ef16.p0s100.arm64.metal 0.97 Qwen--Qwen3-8B-q40ef16.p0s100.x86_64.cpu 0.99 Qwen--Qwen3-8B-q40ef16.p0s100.x86_64.cuda 0.96 Qwen--Qwen3-8B-q40ef16.p50s50.arm64.cpu 0.98 Qwen--Qwen3-8B-q40ef16.p50s50.arm64.metal 0.99 Qwen--Qwen3-8B-q40ef16.p50s50.x86_64.cpu 0.99 Qwen--Qwen3-8B-q40ef16.p50s50.x86_64.cuda 0.99 Qwen--Qwen3-8B-q40ef16.p99s1.arm64.cpu 0.96 Qwen--Qwen3-8B-q40ef16.p99s1.arm64.metal 0.98 Qwen--Qwen3-8B-q40ef16.p99s1.x86_64.cpu 0.99 Qwen--Qwen3-8B-q40ef16.p99s1.x86_64.cuda 0.96 apple--OpenELM-270M-f16f16.p0s100.arm64.cpu 0.98 apple--OpenELM-270M-f16f16.p0s100.arm64.metal 0.99 apple--OpenELM-270M-f16f16.p0s100.x86_64.cpu 0.99 apple--OpenELM-270M-f16f16.p0s100.x86_64.cuda 0.98 apple--OpenELM-270M-f16f16.p50s50.arm64.cpu 0.92 apple--OpenELM-270M-f16f16.p50s50.arm64.metal 0.92 apple--OpenELM-270M-f16f16.p50s50.x86_64.cpu 0.99 apple--OpenELM-270M-f16f16.p50s50.x86_64.cuda 0.99 apple--OpenELM-270M-f16f16.p99s1.arm64.cpu 0.97 apple--OpenELM-270M-f16f16.p99s1.arm64.metal 0.99 apple--OpenELM-270M-f16f16.p99s1.x86_64.cpu 0.99 apple--OpenELM-270M-f16f16.p99s1.x86_64.cuda 0.99 apple--OpenELM-270M-q40ef16.p0s100.arm64.cpu 0.99 apple--OpenELM-270M-q40ef16.p0s100.arm64.metal 0.99 apple--OpenELM-270M-q40ef16.p0s100.x86_64.cpu 0.99 apple--OpenELM-270M-q40ef16.p0s100.x86_64.cuda 0.95 apple--OpenELM-270M-q40ef16.p50s50.arm64.cpu 0.97 apple--OpenELM-270M-q40ef16.p50s50.arm64.metal 0.95 apple--OpenELM-270M-q40ef16.p50s50.x86_64.cpu 0.99 apple--OpenELM-270M-q40ef16.p50s50.x86_64.cuda 0.94 apple--OpenELM-270M-q40ef16.p99s1.arm64.cpu 0.99 apple--OpenELM-270M-q40ef16.p99s1.arm64.metal 0.99 apple--OpenELM-270M-q40ef16.p99s1.x86_64.cpu 0.99 apple--OpenELM-270M-q40ef16.p99s1.x86_64.cuda 0.89 meta-llama--Llama-3.1-8B-Instruct-f16f16.p0s100.arm64.cpu 0.96 meta-llama--Llama-3.1-8B-Instruct-f16f16.p0s100.arm64.metal 0.92 meta-llama--Llama-3.1-8B-Instruct-f16f16.p0s100.x86_64.cpu 0.99 meta-llama--Llama-3.1-8B-Instruct-f16f16.p0s100.x86_64.cuda 0.99 meta-llama--Llama-3.1-8B-Instruct-f16f16.p50s50.arm64.cpu 0.95 meta-llama--Llama-3.1-8B-Instruct-f16f16.p50s50.arm64.metal 0.95 meta-llama--Llama-3.1-8B-Instruct-f16f16.p50s50.x86_64.cpu 0.98 meta-llama--Llama-3.1-8B-Instruct-f16f16.p50s50.x86_64.cuda 0.98 meta-llama--Llama-3.1-8B-Instruct-f16f16.p99s1.arm64.cpu 0.97 meta-llama--Llama-3.1-8B-Instruct-f16f16.p99s1.arm64.metal 0.99 meta-llama--Llama-3.1-8B-Instruct-f16f16.p99s1.x86_64.cpu 0.99 meta-llama--Llama-3.1-8B-Instruct-f16f16.p99s1.x86_64.cuda 0.99 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p0s100.arm64.cpu 0.93 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p0s100.arm64.metal 0.99 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p0s100.x86_64.cpu 0.97 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p0s100.x86_64.cuda 0.97 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p50s50.arm64.cpu 0.93 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p50s50.arm64.metal 0.98 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p50s50.x86_64.cpu 0.99 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p50s50.x86_64.cuda 0.99 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p99s1.arm64.cpu 0.97 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p99s1.arm64.metal 0.99 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p99s1.x86_64.cpu 0.99 meta-llama--Llama-3.1-8B-Instruct-q40ef16.p99s1.x86_64.cuda 0.97 meta-llama--Llama-3.2-1B-Instruct-f16f16.p0s100.arm64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-f16f16.p0s100.arm64.metal 0.99 meta-llama--Llama-3.2-1B-Instruct-f16f16.p0s100.x86_64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-f16f16.p0s100.x86_64.cuda 0.99 meta-llama--Llama-3.2-1B-Instruct-f16f16.p50s50.arm64.cpu 0.96 meta-llama--Llama-3.2-1B-Instruct-f16f16.p50s50.arm64.metal 0.96 meta-llama--Llama-3.2-1B-Instruct-f16f16.p50s50.x86_64.cpu 0.98 meta-llama--Llama-3.2-1B-Instruct-f16f16.p50s50.x86_64.cuda 0.97 meta-llama--Llama-3.2-1B-Instruct-f16f16.p99s1.arm64.cpu 0.97 meta-llama--Llama-3.2-1B-Instruct-f16f16.p99s1.arm64.metal 0.99 meta-llama--Llama-3.2-1B-Instruct-f16f16.p99s1.x86_64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-f16f16.p99s1.x86_64.cuda 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p0s100.arm64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p0s100.arm64.metal 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p0s100.x86_64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p0s100.x86_64.cuda 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p50s50.arm64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p50s50.arm64.metal 0.96 meta-llama--Llama-3.2-1B-Instruct-f32f32.p50s50.x86_64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p50s50.x86_64.cuda 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p99s1.arm64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p99s1.arm64.metal 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p99s1.x86_64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-f32f32.p99s1.x86_64.cuda 0.99 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p0s100.arm64.cpu 0.97 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p0s100.arm64.metal 0.99 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p0s100.x86_64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p0s100.x86_64.cuda 0.98 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p50s50.arm64.cpu 0.86 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p50s50.arm64.metal 0.99 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p50s50.x86_64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p50s50.x86_64.cuda 0.94 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p99s1.arm64.cpu 0.98 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p99s1.arm64.metal 0.99 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p99s1.x86_64.cpu 0.99 meta-llama--Llama-3.2-1B-Instruct-q40ef16.p99s1.x86_64.cuda 0.99 meta-llama--Llama-3.2-3B-Instruct-f16f16.p0s100.arm64.cpu 0.98 meta-llama--Llama-3.2-3B-Instruct-f16f16.p0s100.arm64.metal 0.97 meta-llama--Llama-3.2-3B-Instruct-f16f16.p0s100.x86_64.cpu 0.99 meta-llama--Llama-3.2-3B-Instruct-f16f16.p0s100.x86_64.cuda 0.99 meta-llama--Llama-3.2-3B-Instruct-f16f16.p50s50.arm64.cpu 0.96 meta-llama--Llama-3.2-3B-Instruct-f16f16.p50s50.arm64.metal 0.98 meta-llama--Llama-3.2-3B-Instruct-f16f16.p50s50.x86_64.cpu 0.99 meta-llama--Llama-3.2-3B-Instruct-f16f16.p50s50.x86_64.cuda 0.99 meta-llama--Llama-3.2-3B-Instruct-f16f16.p99s1.arm64.cpu 0.96 meta-llama--Llama-3.2-3B-Instruct-f16f16.p99s1.arm64.metal 0.98 meta-llama--Llama-3.2-3B-Instruct-f16f16.p99s1.x86_64.cpu 0.99 meta-llama--Llama-3.2-3B-Instruct-f16f16.p99s1.x86_64.cuda 0.99 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p0s100.arm64.cpu 0.96 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p0s100.arm64.metal 0.99 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p0s100.x86_64.cpu 0.99 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p0s100.x86_64.cuda 0.99 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p50s50.arm64.cpu 0.97 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p50s50.arm64.metal 0.98 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p50s50.x86_64.cpu 0.99 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p50s50.x86_64.cuda 0.97 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p99s1.arm64.cpu 0.93 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p99s1.arm64.metal 0.99 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p99s1.x86_64.cpu 0.99 meta-llama--Llama-3.2-3B-Instruct-q40ef16.p99s1.x86_64.cuda 0.94 ================================================ FILE: .travis/make_bundle.sh ================================================ #!/bin/sh set -ex TRAVIS_COMMIT=${GITHUB_SHA:-dummy-commit-id} BRANCH=$(echo $GITHUB_HEAD_REF | tr '/' '_') BRANCH=${BRANCH:-main} PLATFORM=${PLATFORM:-dummy-platform} dates=`date -u +"%Y%m%dT%H%M%S %s"` date_iso=`echo $dates | cut -f 1 -d ' '` timestamp=`echo $dates | cut -f 2 -d ' '` TASK_NAME=tract-$date_iso mkdir -p $TASK_NAME echo "export TASK_NAME=$TASK_NAME" > $TASK_NAME/vars echo "export TRAVIS_COMMIT=$TRAVIS_COMMIT" >> $TASK_NAME/vars TRAVIS_BRANCH_SANE=`echo $BRANCH | tr '/' '_'` echo "export TRAVIS_BRANCH_SANE=$TRAVIS_BRANCH_SANE" >> $TASK_NAME/vars echo "export DATE_ISO=$date_iso" >> $TASK_NAME/vars echo "export TIMESTAMP=$timestamp" >> $TASK_NAME/vars echo "export PLATFORM=$PLATFORM" >> $TASK_NAME/vars if which gstat > /dev/null then STAT=gstat else STAT=stat fi touch sizes for bin in example-tensorflow-mobilenet-v2 tract do if [ -e target/$RUSTC_TRIPLE/release/$bin ] then binary_size_cli=$($STAT -c "%s" target/$RUSTC_TRIPLE/release/$bin) token=$(echo $bin | tr '-' '_') if [ "$bin" = "tract" ] then token=cli fi echo binary_size.$token $binary_size_cli >> sizes fi done cp target/$RUSTC_TRIPLE/release/tract $TASK_NAME cp sizes $TASK_NAME cp .travis/bundle-entrypoint.sh $TASK_NAME/entrypoint.sh tar czf $TASK_NAME.tgz $TASK_NAME/ echo $TASK_NAME ================================================ FILE: .travis/minion.sh ================================================ #!/bin/bash set -ex . $HOME/.minionrc exec 200>$LOCKFILE || exit 1 flock -n 200 || { echo "WARN: flock() failed." >&2; exit 0; } mkdir -p $WORKDIR/taskdone/ for task in `aws s3 ls $S3PATH_TASKS/$PLATFORM/ | awk '{ print $4; }'` do cd $HOME task_name="${task%.tgz}" if [ -e $WORKDIR/taskdone/$task_name ] then continue fi echo considering task $task rm -rf $WORKDIR/current mkdir -p $WORKDIR/current cd $WORKDIR/current aws s3 cp s3://$S3PATH_TASKS/$PLATFORM/$task . tar zxf $task . $task_name/vars cd $task_name ( ./entrypoint.sh 2> stderr.log > stdout.log || true ) gzip stderr.log gzip stdout.log aws s3 cp stderr.log.gz s3://$S3PATH_RESULTS/$MINION_ID/$task_name/stderr.log.gz aws s3 cp stdout.log.gz s3://$S3PATH_RESULTS/$MINION_ID/$task_name/stdout.log.gz touch $WORKDIR/taskdone/$task_name cat metrics | sed "s/^/$GRAPHITE_PREFIX.$PLATFORM.$MINION_ID.$TRAVIS_BRANCH_SANE./;s/$/ $TIMESTAMP/" \ | tr '-' '_' > graphite if nc --version then # GNU export GRAPHITE_HOST export GRAPHITE_PORT cat graphite | while read line do echo $line | nc -c -w 1 $GRAPHITE_HOST $GRAPHITE_PORT done else # BSD nc -q 5 $GRAPHITE_HOST $GRAPHITE_PORT < graphite fi done sleep 1 echo "DONE" ================================================ FILE: .travis/minionrc ================================================ MINION_ID= LOCKFILE=/tmp/minion-lock PLATFORM=raspbian GRAPHITE_HOST=graphite-proxy.snips.net GRAPHITE_PORT=2003 GRAPHITE_PREFIX=tract S3PATH_TASKS=tract-ci-builds/tasks S3PATH_LOGS=tract-ci-builds/logs S3PATH_RESULTS=tract-ci-builds/logs WORKDIR=$HOME/tract-minion CACHEDIR=$WORKDIR/cache ================================================ FILE: .travis/native.sh ================================================ #!/bin/sh set -ex if [ -z "$RUSTUP_TOOLCHAIN" ] then export RUSTUP_TOOLCHAIN=1.91.0 fi rustup update cargo update cargo check --all-targets --workspace --exclude test-tflite --exclude test-metal --exclude tract-metal ./.travis/onnx-tests.sh ./.travis/regular-tests.sh ./.travis/test-harness.sh if [ -n "$CI" ] then cargo clean fi if [ `uname` = "Linux" ] then ./.travis/tflite.sh fi if [ -n "$CI" ] then cargo clean fi if nvidia-smi > /dev/null 2>&1 then cargo test -p tract-cuda --lib cargo test -p test-cuda fi ./.travis/cli-tests.sh ================================================ FILE: .travis/onnx-tests.sh ================================================ #!/bin/sh set -ex ROOT=$(dirname $(realpath $0))/.. . $ROOT/.travis/ci-system-setup.sh opset=onnx_"${1:-1_13_0}" cargo -q test -p test-unit-core $CARGO_EXTRA -q cargo -q test -p test-onnx-core $CARGO_EXTRA -q --no-default-features --features $opset cargo -q test -p test-nnef-cycle $CARGO_EXTRA -q --no-default-features ================================================ FILE: .travis/regular-tests.sh ================================================ #!/bin/sh set -e set -x cd $(dirname $0) ./test-published-crates.sh if [ -n "$CI" ] then cargo clean fi ./test-rt.sh if [ -n "$CI" ] then cargo clean fi ================================================ FILE: .travis/run-bundle.sh ================================================ #!/bin/sh set -ex BUNDLE_NAME=$1 tar zxf $BUNDLE_NAME.tgz ( cd $BUNDLE_NAME . ./vars ./entrypoint.sh ) # rm -rf "$BUNDLE_NAME" "$BUNDLE_NAME.tgz" ================================================ FILE: .travis/run_all.sh ================================================ #!/bin/sh set -ex `dirname $0`/native.sh cd `dirname $0`/../examples for i in * do (cd $i; cargo test --release) done ================================================ FILE: .travis/setup-sccache.sh ================================================ #!/bin/sh set -ex export SCCACHE_DIR=$HOME/.cache/sccache export SCCACHE_CACHE_SIZE=2G if [ -n "$GITHUB_ENV" ] then echo "SCCACHE_DIR=$HOME/.cache/sccache" >> $GITHUB_ENV echo "SCCACHE_CACHE_SIZE=2G" >> $GITHUB_ENV echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV echo "$HOME/.local/bin" >> $GITHUB_PATH fi LINK=https://github.com/mozilla/sccache/releases/download SCCACHE_VERSION=v0.10.0 echo $HOME if [ `uname` = "Linux" ] then SCCACHE_FILE=sccache-$SCCACHE_VERSION-x86_64-unknown-linux-musl else SCCACHE_FILE=sccache-$SCCACHE_VERSION-x86_64-apple-darwin fi mkdir -p $SCCACHE_DIR mkdir -p $HOME/.local/bin for i in 1 2 3 4 5 do curl -L "$LINK/$SCCACHE_VERSION/$SCCACHE_FILE.tar.gz" | tar xz && break sleep 15 done mv -f $SCCACHE_FILE/sccache $HOME/.local/bin/sccache chmod +x $HOME/.local/bin/sccache ================================================ FILE: .travis/test-harness.sh ================================================ #!/bin/sh WHITE='\033[1;37m' NC='\033[0m' # No Color if [ -e /proc/cpuinfo ] then grep "^flags" /proc/cpuinfo | head -1 | \ grep --color=always '\(s\?sse[0-9_]*\|fma\|f16c\|avx[^ ]*\)' fi set -x ROOT=$(dirname $0)/.. . $ROOT/.travis/ci-system-setup.sh set -e if [ `arch` = "x86_64" -a "$RUST_VERSION" = "stable" ] then ALL_FEATURES=--all-features fi set +x cargo -q test $CARGO_EXTRA -q -p tract cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p core-proptest-pulse $ALL_FEATURES cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p nnef-inceptionv3 $ALL_FEATURES cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p tf-inceptionv3 $ALL_FEATURES cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p tf-mobilenet-v2 $ALL_FEATURES cargo -q test $CARGO_EXTRA -q --profile opt-no-lto -p tfl-mobilenet-v2-q $ALL_FEATURES ================================================ FILE: .travis/test-llm.sh ================================================ #!/bin/bash set -e set -o pipefail export LC_ALL=C ROOT=$(dirname $(dirname $(realpath $0))) . $ROOT/.travis/ci-system-setup.sh model=$1 q=$2 device=$3 if [ -z "$device" ] then device=cpu fi generation=541 if [ "$model" = "all" ] then for m in \ openelm-270M \ llama-3.2-1B-instruct \ llama-3.2-3B-instruct \ llama-3.1-8B-instruct \ qwen3-1.7B \ qwen3-8B do $0 $m $2 $device done exit 0 fi model=$(echo $model | tr 'A-Z' 'a-z' | tr -d "_.-") for m in \ apple--OpenELM-270M \ meta-llama--Llama-3.2-1B-Instruct \ meta-llama--Llama-3.2-3B-Instruct \ meta-llama--Llama-3.1-8B-Instruct \ Qwen--Qwen3-1.7B \ Qwen--Qwen3-8B do norm=$(echo $m | tr "A-Z" "a-z" | tr -d "_.-") if [[ "$norm" == *"$model"* ]]; then model_id=$m fi done if [ -z "$model_id" ] then echo "No model matched" fi if [ "$q" = "all" ] then for q in q40ef16 f16f16 f32f32 do $0 $1 $q $device done exit 0 fi id=$model_id-$q if which gstat > /dev/null then STAT=gstat else STAT=stat fi nnef=llm/$generation/$id/$id.nnef.tgz $CACHE_FILE $nnef if [ -e $MODELS/$nnef ] then size=$($STAT -c %s $MODELS/$nnef) else size=$(curl -s -I $MODELS/$nnef | grep Content-Length | cut -d " " -f 2 | tr -cd 0123456789) fi if which nvidia-smi > /dev/null then vram=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | awk '{print $1*1024*1024}') if [ $vram -lt $size ] then echo "::warning::Skipping this test, not enough VRAM." exit 0 fi fi $TRACT_RUN -v --nnef-tract-transformers $MODELS/$nnef -O --readings --assert-maximal-mm-quality-cost 0 $TRACT_EXTRA_ARGS dump -q alloc_max=$(cat readings.out | tail -n +2 | awk '{print $10-$11}' | sort -n | tail -1) ratio=$((alloc_max * 100 / size)) echo " ###########################################" echo " Alloc max to model size ratio: ${ratio}%." echo " ###########################################" limit=125 if [ $ratio -gt $limit ] then echo "RSZ max is ${ratio}% the size of the unzipped model!" exit 1 fi for t in p0s100 p50s50 p99s1 do npz=llm/$generation/$id/$id.$t.io.npz $CACHE_FILE $npz key=$id.$t.$(arch).$device expectations="$ROOT/.travis/llm-expectations-541" echo echo " Key: $key" echo case $device in cuda) DEVICE="--cuda" GPU_ASSERT="--assert-op-only Cuda*,Gpu*,DeviceSync*,Const,Source,Range,Gather" ;; metal) DEVICE="--metal" GPU_ASSERT="--assert-op-only Metal*,Gpu*,DeviceSync*,Const,Source,Range,Gather" ;; *) GPU_ASSERT="" ;; esac if [ -n "$RESET" ] then $TRACT_RUN -v $MODELS/$nnef $TRACT_EXTRA_ARGS \ --llm --transform unfold-kv-cache -O $DEVICE run --prompt-chunk-size 60 --allow-missing-outputs \ --input-from-npz $MODELS/$npz \ --assert-output-bundle $MODELS/$npz \ --assert-llm-rbo 0.0 \ $approx --allow-float-casts $GPU_ASSERT 2>&1 | tee output.txt found=$(cat output.txt | perl -MPOSIX=floor -ne 'printf("%.2f\n", floor($1 * 100) / 100) if /LLM RBO:\s+([\d.]+)/') ( ( grep -v $key $expectations || true) ; echo $key $found) | sort > $expectations.tmp mv $expectations.tmp $expectations elif [ -n "$RELAX" ] then prior=$(grep $key $expectations | cut -f 2 -d ' ') $TRACT_RUN -v $MODELS/$nnef $TRACT_EXTRA_ARGS \ --llm --transform unfold-kv-cache -O $DEVICE run --prompt-chunk-size 60 --allow-missing-outputs \ --input-from-npz $MODELS/$npz \ --assert-output-bundle $MODELS/$npz \ --assert-llm-rbo 0.0 \ $approx --allow-float-casts $GPU_ASSERT 2>&1 | tee output.txt found=$(cat output.txt | perl -MPOSIX=floor -ne 'printf("%.2f\n", floor($1 * 100) / 100) if /LLM RBO:\s+([\d.]+)/') if [ -n "$prior" ] && perl -e 'exit($ARGV[0] <= $ARGV[1] ? 1 : 0)' "$found" "$prior" then found=$prior fi ( ( grep -v $key $expectations || true) ; echo $key $found) | sort > $expectations.tmp mv $expectations.tmp $expectations else # test ! expectation=$(grep $key $expectations | cut -f 2 -d ' ') $TRACT_RUN -v $MODELS/$nnef $TRACT_EXTRA_ARGS \ --llm --transform unfold-kv-cache -O $DEVICE run --prompt-chunk-size 60 --allow-missing-outputs \ --input-from-npz $MODELS/$npz \ --assert-output-bundle $MODELS/$npz \ --assert-llm-rbo $expectation \ $approx --allow-float-casts $GPU_ASSERT fi done ================================================ FILE: .travis/test-published-crates.sh ================================================ #!/bin/sh WHITE='\033[1;37m' NC='\033[0m' # No Color if [ -e /proc/cpuinfo ] then grep "^flags" /proc/cpuinfo | head -1 | \ grep --color=always '\(s\?sse[0-9_]*\|fma\|f16c\|avx[^ ]*\)' fi set -x ROOT=$(dirname $0)/.. . $ROOT/.travis/ci-system-setup.sh set -e if [ `arch` = "x86_64" -a "$RUST_VERSION" = "stable" ] then ALL_FEATURES=--all-features fi set +x cargo update echo echo "$WHITE ### tract ### $NC" echo cargo -q test $CARGO_EXTRA -q -p tract for c in data linalg core nnef hir onnx pulse onnx-opl pulse-opl do echo echo "$WHITE ### $c ### $NC" echo cargo -q test $CARGO_EXTRA -q -p tract-$c done if [ `uname` = "Darwin" -a -z "$CI" ] then echo echo "$WHITE ### metal ### $NC" echo cargo -q test $CARGO_EXTRA -q -p tract-metal fi if command -v nvcc >/dev/null 2>&1 && [ -z "$CI" ] then echo echo "$WHITE ### cuda ### $NC" echo cargo -q test -q -p tract-cuda fi $ROOT/api/proxy/ci.sh # doc test are not finding libtensorflow.so if ! cargo -q test $CARGO_EXTRA -q -p tract-tensorflow --lib $ALL_FEATURES then # this crate triggers an incremental bug on nightly. cargo clean -p tract-tensorflow cargo -q test $CARGO_EXTRA -q -p tract-tensorflow --lib $ALL_FEATURES fi ================================================ FILE: .travis/test-rt.sh ================================================ #!/bin/sh WHITE='\033[1;37m' NC='\033[0m' # No Color if [ -e /proc/cpuinfo ] then grep "^flags" /proc/cpuinfo | head -1 | \ grep --color=always '\(s\?sse[0-9_]*\|fma\|f16c\|avx[^ ]*\)' fi set -x ROOT=$(dirname $0)/.. . $ROOT/.travis/ci-system-setup.sh set -e if [ `arch` = "x86_64" -a "$RUST_VERSION" = "stable" ] then ALL_FEATURES=--all-features fi set +x cd $ROOT for c in test-rt/test*; do case "$c" in test-rt/test-tflite) echo "$WHITE ### $c ### IGNORED $NC" continue ;; test-rt/test-metal) if [ "$(uname)" != "Darwin" ] || [ -n "$CI" ]; then echo "$WHITE ### $c ### IGNORED $NC" continue fi ;; test-rt/test-cuda) if ! command -v nvcc >/dev/null; then echo "$WHITE ### $c ### IGNORED $NC" continue fi ;; esac echo echo "$WHITE ### $c ### $NC" echo (cd "$c" && cargo test -q $CARGO_EXTRA) if [ -n "$CI" ]; then df -h cargo clean fi done ================================================ FILE: .travis/tf.sh ================================================ #!/bin/sh set -ex if [ -z "$CACHEDIR" ] then CACHEDIR=`dirname $0`/../.cached fi (cd tensorflow; cargo test --release --features conform) ================================================ FILE: .travis/tflite/Dockerfile.tensorflow-aarch64 ================================================ # vim: set syntax=Dockerfile: FROM tensorflow/tensorflow:devel RUN apt-get update ; apt-get upgrade -y RUN apt-get install -y crossbuild-essential-arm64 COPY linux_makefile.inc /tensorflow_src/tensorflow/lite/tools/make/targets/linux_makefile.inc COPY disable_nnapi.patch /tensorflow_src WORKDIR /tensorflow_src RUN patch -p1 < disable_nnapi.patch ================================================ FILE: .travis/tflite/Dockerfile.tensorflow-official-rpi ================================================ # vim: set syntax=Dockerfile: FROM tensorflow/tensorflow:nightly-devel RUN apt-get update ; apt-get upgrade -y RUN apt-get -y install git crossbuild-essential-armhf WORKDIR /tensorflow RUN ./tensorflow/lite/tools/make/download_dependencies.sh ================================================ FILE: .travis/tflite/Dockerfile.tensorflow-rpitools ================================================ # vim: set syntax=Dockerfile: FROM tensorflow/tensorflow:nightly-devel RUN apt-get update ; apt-get upgrade -y RUN apt-get -yy install git WORKDIR /tensorflow RUN ./tensorflow/lite/tools/make/download_dependencies.sh RUN git clone https://github.com/raspberrypi/tools /raspitools ENV PATH=/raspitools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/bin:$PATH ================================================ FILE: .travis/tflite/build_tflite_aarch64.sh ================================================ #!/bin/sh set -ex mkdir -p result docker build -f Dockerfile.tensorflow-aarch64 --tag tensorflow-aarch64 . docker run --rm -it \ -v `pwd`/result:/result \ tensorflow-aarch64 \ sh -c " cd /tensorflow_src ; export EXTRA_CXXFLAGS=-flax-vector-conversions export DISABLE_NNAPI=true ./tensorflow/lite/tools/make/download_dependencies.sh make -j 3 -f tensorflow/lite/tools/make/Makefile TARGET=linux TARGET_ARCH=aarch64 ; cp /tensorflow_src/tensorflow/lite/tools/make/gen/linux_aarch64/bin/benchmark_model /result/tflite_benchmark_model_aarch64 " ================================================ FILE: .travis/tflite/build_tflite_raspbian.sh ================================================ #!/bin/sh set -ex mkdir -p result # build pseudo-rpi official tensorflow, https://www.tensorflow.org/lite/rpi, only works on pi3 docker build -f Dockerfile.tensorflow-official-rpi --tag tensorflow-official-rpi . docker run --rm \ -e CC_PREFIX=arm-linux-gnueabihf- \ -v `pwd`/result:/result \ tensorflow-official-rpi \ sh -c " make -j 3 -f tensorflow/lite/tools/make/Makefile TARGET=rpi TARGET_ARCH=armv7l; cp /tensorflow/tensorflow/lite/tools/make/gen/rpi_armv7l/bin/benchmark_model /result/tflite_benchmark_model_official_rpi " # build with rpi tools (works on rpi0, 1 and 2) docker build -f Dockerfile.tensorflow-rpitools --tag tensorflow-rpitools . docker run --rm \ -e CC_PREFIX=arm-linux-gnueabihf- \ -v `pwd`/result:/result \ tensorflow-rpitools \ sh -c " make -j 3 -f tensorflow/lite/tools/make/Makefile TARGET=rpi TARGET_ARCH=armv6; cp /tensorflow/tensorflow/lite/tools/make/gen/rpi_armv6/bin/benchmark_model /result/tflite_benchmark_model_rpitools " ================================================ FILE: .travis/tflite/convert_all.sh ================================================ run_in_tf_docker() { docker run --rm -v $HOME/.cache:/models -it tensorflow/tensorflow:nightly-devel sh -c "$@" } # # inception v3 # run_in_tf_docker "cd /models ; tflite_convert \ # --graph_def_file inception_v3_2016_08_28_frozen.pb \ # --input_arrays input \ # --input_shapes 1,299,299,3 \ # --output_arrays InceptionV3/Predictions/Reshape_1 \ # --output_format tflite \ # --output_file inception_v3_2016_08_28_frozen.tflite" # # # arm ml kws # run_in_tf_docker "cd /models ; tflite_convert \ # --graph_def_file ARM-ML-KWS-CNN-M.pb \ # --input_arrays Mfcc \ # --input_shapes 1,49,10 \ # --output_arrays labels_softmax \ # --output_format tflite \ # --output_file ARM-ML-KWS-CNN-M.tflite" # hey_snips v1 run_in_tf_docker "cd /models ; tflite_convert \ --graph_def_file hey_snips_v1.pb \ --input_arrays inputs \ --input_shapes 80,40 \ --output_arrays logits \ --output_format tflite \ --output_file hey_snips_v1.tflite" # hey_snips v3.1 # (tflite does not support 1D dil) # run_in_tf_docker "cd /models ; tflite_convert \ # --graph_def_file hey_snips_v3.1.pb \ # --input_arrays inputs \ # --input_shapes 40,40 \ # --output_arrays logits \ # --output_format tflite \ # --output_file hey_snips_v3.1.tflite" # # # hey_snips v4 model17, 2seconds # (tflite does not support AddN) # run_in_tf_docker "cd /models ; tflite_convert \ # --graph_def_file hey_snips_v4_model17.pb \ # --input_arrays input_node \ # --input_shapes 200,20 \ # --output_arrays output_node \ # --output_format tflite \ # --output_file hey_snips_v4_model17.tflite" ================================================ FILE: .travis/tflite/linux_makefile.inc ================================================ # Settings for Linux. ifeq ($(TARGET), linux) CXXFLAGS += \ -fPIC \ -DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \ -pthread # TODO(petewarden): In the future we may want to add architecture-specific # flags like -msse4.2 LIBS := -lstdc++ -lpthread -lm -ldl TARGET_ARCH=aarch64 TARGET_TOOLCHAIN_PREFIX := aarch64-linux-gnu- endif ================================================ FILE: .travis/tflite/run_all.sh ================================================ #!/bin/sh ./benchmark_model --graph=inception_v3_2016_08_28_frozen.tflite ================================================ FILE: .travis/tflite.sh ================================================ #!/bin/sh set -ex ROOT=$(dirname $(dirname $(realpath $0))) . $ROOT/.travis/ci-system-setup.sh if [ `uname` = "Darwin" ] then brew install coreutils fi if [ -n "$GITHUB_ACTIONS" ] then pip install numpy fi cargo check -p tract-tflite cargo -q test -p test-tflite $CARGO_EXTRA -q ================================================ FILE: .travis/travis.sh ================================================ #!/bin/sh set -ex if [ -z "$PLATFORM" ] then .travis/native.sh else .travis/cross.sh fi ================================================ FILE: .travis.yml ================================================ language: rust dist: trusty cache: # - cargo #- directories: #- $HOME/cached addons: apt: packages: - awscli - jshon - moreutils env: global: - RUST_BACKTRACE=1 - AWS_DEFAULT_REGION=us-east-1 # - CACHEDIR=$HOME/cached - secure: DdQqS4fFDevY/JknS0M6CwW+ooj6ZhKmBk4lFliG3WQgjYietbPMCkiHPEC8Df8U07l54+8G4j+sZJJ4VYQY8WQcuKGWt9/ALjoYHYZ1Dlw0KW0rRJ1BZWLUh8MwpJ5pxHSWfl1a8QqTy/0mI3eJ8iVIDxiZR6b1fGwPYDkNXyqnfOvz31X1aMyoGslNkP7LitObCdBJyzobPlvWafGCQLf7oLbK4a5Ysyc9T607n1B0okco3Te2ztahEOwNoxmHlEFRojM6ZmAmo8LzwkCdFQNjHw+mQ3vScC8gpngi61G9U35luAfweMt30m1UmecVGADeEmwSnJLeAHo5HYKT5n6Q1begxlMGMxezinstTHUX6G8EhEumO/ii3PAscFJ6C+VfciGA7JDS2ICghygKSIqQvVeugNR0glW97lhszLnoXCNY45siknAZVTVqwhgn1ctTQiPWqGVuQp+m9NYIoQAYUpOFNo1mEtEjurHOk96Q0XjRJMfUSLOB5KfPakk/ghEY1ZYDDB9wi134f3Z5hLw1FGj/Uiw9LcnEIcORV2o8fbFrb2IgGsKQbRtdPEJ92q5bdeA00TbSrzoDDRFGbwBg+8ibFMF8O2J3Q54sUu6LmkP7qjtMIT2vB09M7LWQYtW3Vd6ovHwI6v+tNJK2D4cJA0KOSwzpOgIBhEubrZI= - secure: su5wLbN7lr59HSKGM8M9pW9VSFMtdTBbQ1oBZFTvw+RNoNew3BBMh8XymbjEv6R+yyGmDAa8Dw6E7HPiBtI5O4q89rHxl7MO5keZINjpXDzTydZ5MQ2juZPIRQpLfpl6AN1meG8I3SoOrTMDgGqfPh2rqEjIS6cBZbo8Re/0KSBuJB781qdT6x80Qfpy87sDu9GTRM8ueXsV+jRw0Yek709m29MSB8pkFAP9OitKHxzF5OFnxVPV2becj53racOe7q9ZE5QWmWnzPZUxflVyrtzDsN0J/C4g0SoEbxLFg1OLbffO1GVJ/Iv6ZeaggzYRvCYwSSANyfqorqSUDT5NPwQlUXjgBPHlOXbCfY2s5hFzQQ3z4R86fyzfdfBQ56uXTXkB4CWpn/JieUoviuDO0/YNaI0KU6hOrOn68BZrBSqwHcxwhtcP3cw/EXfR1aiok7OoDPAWnR4f3Lu/+fmkW+VUEg5Ufh/GgjZt7XwsNBfo+pmvO3mO/5okxa2/HbOwcoTpELAzPMKyh1xn5gjrk5bWcZofeGtFMoXN4+G8+1qlQ/sLp3144QHyRf00n4qlhA6xZwplpBWN12haXyKRx67lPTzE5QuT1BRoyCdRvbjQiOdo61xGvoOK9J8PL9C06xtnKQ+6iDnjFaWielASoENvcNL+DCKqiecpUb5hoR4= - secure: HZHubUhHLYh+v5yuyMy5TmfleHqAdcmVZd6hZf7c2sXQLsrcjoLGPxu4jzS9OJt27Sfp6xXVFeeA7SFDMobxe8AFm3+SRCbzvT6mu8/LlsuTsQO3jYQtt8j5OIhtLQ7yfDyOscXwy2I3SgluhVQ0HCIi6ShQ8YRD1vE119Qku2x/sWlKcZQckcl5T/yyig08sXfOM+IfFQPIW1gvMWM3dv6RigiCiy4qjfeQy8v8xbvbRayXeT4Vpfv9CqN79kAQ03r7MSmBBn6i88mGoQEzVDMEZPZq8rMNqn8qyIn8LxjXaCpUz0fTCYJrHSmzKyCE7+k7IEidlkyT6hJpvpXCfNYBSk2fB7SDxlm98ELVgqPBq6vjYoPaqsqs7Cz+pTTQYOCnCKvRDYhccqhAsgjNVKDRIJc0H7cT5sv4TuJxsMp/vYVh7RxoFem2r37ns4pu8XPP9RVsmoAVYzlHu8Fnd8TWY30MXACf3t43ceaPfor0IARrZcjQR1lt7eMJGQDkduJRxzq0cBB1djP8HfZGab/I0cVFEXGeJWDQfaHZ8Pq/M3+bBPLD9XLqKmpoNbW+gqQQl49/w01/EJrp9QhK/Og6ujfkeA1OCziPCUDLAHYvmwaYZYzV9z5VSPfUYwiiJzva92/ywWfhCmz3SpXPbq8cTPoDWzeBUeqcw8iIWVU= - secure: vH8bS7RVgaHLGZUeqtViCQYDJfhubMiCMETLPD959pv9sODmSfjOhYtFgZtbn0wZ2fhCQFgKhYKUJdti5Vo9OUlyBiUsfLPilAAaeZu0Y2SQIKpbuNU9kJibuzyj6KZoRhjvsifhO5/mB03W7CpzjSGvJntK62BM0b6CrDtUlHlOgjwd3U1c5brZS9VWfnkh8pohgneB/XYtefTJXHuGjJgf75uw2TO/ZKQmmaKJWPoMVN76cgarRmXS/SoGMLr0+7ArnvIMNW9QRMABrSzUgP0RBvNfndwjiIQDZpIefIz/UVTa5e/pS79CLoQKM9FSWZANf3ZJgz0SzYgMprSe9f3RZGu5i0BLQA0SzdxCjCra5/3/pz+p86/iWGHnBfV6pvH9c2W1OUCCTiohNk7bgUfXxVrxk2RHxhc375MFiCxu6KtPRW8kJoRTSZP+k2itaBPUSevV0cdWrVlRjnTwoCskxMIFQH+vStxQjUXV0/g9SZzwdIR/j1aKIjb6VdQS2WOh0+BKgHy0jy2w4GJHtuObIg0aTcQAtt44aK0T/VeHJ0f1FxfjzPxrcqcgSxvi2E4HgedSCvtOHPWs5YYYGt76yH0G5ZOMOF8xP2CRStlcNB0TtLdpcUvQT2ejK7t4sCOj8Kz81s2cbLCZnJdkFaaBsffV7JtbjexXRwohGxI= matrix: include: - rust: stable - rust: stable env: PLATFORM=raspbian - rust: 1.35.0 - rust: stable script: .travis/tf.sh - rust: stable script: .travis/debug-tests.sh - rust: stable os: osx osx_image: xcode9.3 env: PARTIAL_CI=true - rust: stable env: PLATFORM=aarch64-unknown-linux-gnu - rust: stable env: - PLATFORM=armv6vfp-unknown-linux-gnueabihf - TRACT_CPU_EXPECT_ARM32_NEON=false - rust: stable env: - PLATFORM=armv7-unknown-linux-gnueabihf - TRACT_CPU_ARM32_NEON=true - TRACT_CPU_EXPECT_ARM32_NEON=true - rust: stable env: PLATFORM=aarch64-linux-android - rust: stable env: PLATFORM=armv7-linux-androideabi - rust: stable env: PLATFORM=i686-linux-android - rust: stable env: PLATFORM=x86_64-linux-android - rust: stable os: osx osx_image: xcode9.3 env: PLATFORM=aarch64-apple-ios - rust: beta - rust: nightly - rust: stable os: windows script: cargo test -p tract-linalg allow_failures: - rust: nightly - os: windows script: ".travis/travis.sh" ================================================ FILE: .vim/coc-settings.json ================================================ { "rust-analyzer.imports.granularity.group": "module" } ================================================ FILE: CHANGELOG.md ================================================ # Unreleased * [Breaking][MSRV] MSRV bumped to 1.91.0 (for `const TypeId::of`). # 0.23.0-dev.3 — 2026-03-20 ### Breaking changes - **`Value` renamed to `Tensor`** across the entire public API surface (Rust, C, Python). The deprecated `Value` alias has been removed. - **Crate renamed: `tract-rs` → `tract`** — update your `Cargo.toml` dependency accordingly. The CLI binary is now `tract-cli` (previously `tract`). - **`into_tract()` renamed to `into_model()`** in all API layers. - **`DatumType` variant names shortened** — the `TRACT_DATUM_TYPE_` prefix is dropped (C API). - **Deprecated state methods removed**: `init_states()`, `state_initializers`, and the `n_states` parameter are gone from `State` trait and `RunTensors`. - **Python**: `concretize_symbols` and `pulse` methods replaced by typed transform classes; `TransformSpec` is now an abstract base class. ### Improvements - **`UnfoldKeyValueCacheTransform`** — explicit KV-cache I/O mode now available as a first-class transform (CLI: `--transform unfold-kv-cache`). - **Structured `NodeFilter`** for `FloatPrecisionTranslator` — replaces raw filter strings. - Python docs migrated from mkdocs to **Sphinx** (hosted on GitHub Pages with version switcher). - New GPU inference section (CUDA example, `Runtime` usage). # 0.23.0-dev.2 - 2026-02-18 * This is a pre-release release. It will be a pretty big one, here are some hilights. * New public api: tract-rs should be the main point of entry for any new project. A caveat: it does support most of tract simple uses as is, but some specialized sections like state management and model transforms are not satisfactory yet, so the real 0.23.0 will presumably break these again. The plan is that this facade will be tract public API, and that it will be the only surface under semver rules. Up to there is was essentially everything `pub`, which boils down to mostly "everything". * GPU: The new API puts forward the Runtime trait. It allows running models on GPU (runtimes "cuda" and "metal" offser some support, while "default" is the CPU runtime). See https://github.com/sonos/tract/blob/main/examples/nemo-parakeet-asr/src/main.rs#L21 for an example. Instead of calling into_runnable(), use Runtime::prepare(). * Additionally, the internal API compatibility is broken in many places (e.g. RunnableModel always takes the model as an Arc while it was accepting AsRef before). As you're going to need to fix code to upgrade, it is recommended to try and use the new "tract-rs" facade (please tell us if the current API coverage is not enough, or awkward to use). * [Breaking][MSRV] MSRV bumped to 1.89.0 * [linalg] Avoid panic in Apple sysctl-based feature probing (AMX detection). # 0.22.1 - 2026-02-23 * [backport] Small bug fixes release (Slice decluttering bug) # 0.22.0 - 2025-08-25 * [Breaking][MSRV] MSRV bumped to 1.85.0 * port to edition 2024 * bump virtuaally each and every dependency * (wip, experimental) cuda support for llm # 0.21.14 - 2026-02-23 * [backport] Small bug fixes release (Slice decluttering bug) # 0.21.12 - 2025-04-10 * multithread matmul is feature gated now ("multithread-mm" on linal) * full hand made arm64 f32-accumulator matmul kit * more auditing improvment around einsum and its matmul translations * bugfix in matmul translation and gather * more test-rt-level coverage of low-level matmuls (metal and cpu) * memory arena improvements (metal) * q40 for convolution weights # 0.21.11 - 2025-03-19 * [cli] augment audit capabilities for mm implementation choices * revisit matmul kernel selection * improve gather with compressed inputs * revisit slice bubbling up to unlock optimisations * fix a bug around flipping substractions * support for left q40 input in arm64 f32 accumulating kernels (unlocks q40f32 compression on arm64) # 0.21.10 - 2025-02-21 * WIP llm testability (--approx-custom) * [metal] ggml-ported kernels * WIP einsum-to-matmul testability * optimisation around reduce impacting some modern/exotic normalisation layers * WIP towards better handling of shared weights (e.g. embeddings duplication) # 0.21.9 - 2025-01-08 * [metal] experimental profile * [cpu] new versatile (mmm/mmmv) kernels combinations for various architectures * [metal] scaled-masked-softmax detector and impl # 0.21.8 - 2024-12-05 * [linalg, compression] introduce mmm kits * [linalg] (wip) rework f16 on non-f16 machines * [linalg] element-wise binary operators optimisation * [core, compression] gather with compressed weights * [metal] new kernels * [metal] new memory management * [nnef] opt-in deterministic tar encoding # 0.21.7 - 2024-09-23 * [metal] (experimental) introduce partial support for Apple Metal * [core] Potential internal API breaking changes (operator names, comparison ops refactored) * [data] (experimental) Smarter TDim simplification, handling of Min and Max. TDim assertions for simplifications. * [data] (experimental) WIP around multiple scenarios (modes) for LLM inference * Extra examples * [linalg] (experimental) kernels targetting LLM block-quantized tasks (inc. intel 32x1 q40f32) # 0.21.6 - 2024-07-24 * [data] Rework tdim and symbols, introduce inequalities assertions, min and max operators * [data] Generalize Blob usage in Tensor * [linalg] Rework reduce implementation, introduce more generic binary ops support (wip) * [linalg] Introduce multithreaded matrix multiplication runner * [linalg] Introduce Q4_0 block quantization for weights (wip) * [linalg] Introduce AMX f16 kernels, Neon Q40F16 kernel (experimental) * [linalg] wasm f32 4x4 kernel * [core] Introduce Opaque and OpaqueFact to escape Tensor and TValue formalism * [core] generalize/improve float precision translator, with translation filter * [core] Introduce garbage collecting in patch application, new compact algo, and rework constant propagation to spare memory * [core] Rework packed format and packing metadata * [linalg/core] Introduce multiple packing format for matmul kernels * [core] Work In Progress refactoring binary, towards more optimized execution strategies * [nnef] inequalities assertions extension, q4_0 extension * [tflite] plug in tanh and sigmoid # 0.21.5 - 2024-05-11 * [TFLite] fixes for fully connected and max pool layers * Allow opting out of new memory friendly execution order optimisation # 0.21.4 - 2024-04-23 * More memory/cache friendly execution order * Several fixes around symbolic dimensions computation (some should help with attention models) # 0.21.3 - 2024-04-03 * [AMX] Put AMX for iOS behind a feature gate ("tract-linalg/apple-amx-ios"). # 0.21.2 - 2024-03-29 (yanked) * [ONNX] Support for external storage of tensors with offset and length * [ONNX] Lots of fixes around binary quantized operators (add, mul, etc) * [PY] Fix python source distribution * [AMX] Activate AMX on iOS * [API] Introduce transforms in external api * [BLAS] Introduce a simple BLAS transform for Matrix multiplication * [F16] Introduce a Reduce that solves many L2 normalization errors in f16 This version has been yanked to revert systematic activation of AMX on iOS. AMX is a private API and Apple may reject an App that performs AMX instructions. # 0.21.1 - 2024-02-08 * [ONNX] Support for external storage of tensors with offset and length # 0.21.0 - 2024-01-16 * MSRV is now 1.75.0 * [internal] ConvUnary and MatmulUnary are replaced by binary, potentially dynamic equivalent # 0.20.22 - 2023-11-28 * [ONNX] LayerNormalization support # 0.20.21 - 2023-10-31 * [ONNX] ignoring output shapes is now the default * # 0.20.18 - 2023-08-30 * [intel] fix in AVX512F matrix vector product * [tflite] alpha, embryonic support. some convolutional models working. * [kaldi] remove abandonned kaldi experimental support * [refactoring] Runtime abstraction and runtime-targetting tests * [refactoring] Refactoring Python and C API around a possible tract-api. Introducing dylib support. * [pytorch compat] fixes around node names starting by / (bug triggered by recent pytorch versions) 0.20.7 to 0.20.17 are misfires # 0.20.6 - 2023-06-09 * Bug fixes, fix display of If operator # 0.20.5 - 2023-05-26 * Various bugfix around Einsum * Einsum now has functions to translate to MatMul and other axes manipulations # 0.20.0, 0.20.1, 0,20.2, 0.20.3 - 2023-04-25 * [optim] 32x32 f32 AMX kernel (for Apple Silicon M family) * [optim] bunch of AMX512F kernels (square, skinny, vector) * [ONNX] introduce Trilu, TopK * [NNEF/OPL] submodel loader * [ONNX] support alternative layout for LSTM (layout=1, batch becomes first axis) * [ONNX] If operators with dynamic condition (very basic optimisations, no nnef support yet). # 0.19.9 & 0.19.10 - 2023-04-17 * HardSwiwh ONNX, tract_core_hard_swish in NNEF/OPL * introducing tract_core_submodel in NNEF/OPL * JSON resource loader in NNEF/OPL * Profiling API tweaks * `--folded` view for model command line dump (hide Scan loops) # 0.19.8 - 2023-03-27 * Various bug fixes # 0.19.7 & 0.19.6 - 2023-02-24 * more bug fixes * wip on python doc auto-deploy # 0.19.5 - 2023-02-22 * 0.19.3 and 0.19.4 are release misfires * lots of bugfixes following 0.19 big changes * introducing the JSON NNEF resource # 0.19.2 - 2023-01-30 * [NNEF/OPL] introduce json resource loader * extend Complex number support (under a feature flag) # 0.19.1 - 2023-01-23 * [nnef] new identifier syntax is now opt-in for serialization (both accepted at loading) * alpha-level C interface. how and how to deploy it (where to put the .h, whether or not to build and ship dylibs) * alpha-level python interface. deployed on pypi as "tract". At this stage, API is undocumented and may still change significantly. # 0.19.0 - 2023-01-11 * [BREAKING] TValue are now used in run() instead of the previous mix of Tensor and Arc * internal API breaking changes: no more op_families, libcli split away. State is no longer Send (but can be "frozen" to a Send counterpart). * Symbols can now be String instead of char. They are not shared globally anymore, but scoped in the Model instead. * [pulse] S symbol is no longer magic. The time dimension symbol must be provided at pulsification time. * [pulse] In most cases, we can now pulsify without an explicit pulse len (pulse len can be expression). * [cli] deprecated "x" syntax for shape is removed * [nnef/opl] new i"..." syntax for escaping identifiers: i"some arbitrary string". Allow serialization of any ONNX model with any kind of string as node names. * [ONNX] Signal processing operators (DTF, STFT, MelWeightMatrix, BlackmanWindow, HammingWindow, HannWindow) * [ONNX] bitwise operations * [ONNX] Compatibility target raised to operator set 18 # 0.18.3 - 2022-10-27 * [NNEF] Introduce a "resource" extension for loading values from a separate source (as a config file) * Workaround for cpu detection failure on FreeBSD / arm64 * Various bug fixes # 0.18.2 - 2022-10-18 * [pulse] improve convolution (and others) pulsification to avoid some unecessary buffering delay * [cli] support multiple streaming inputs and outputs * [ONNX] more relaxed Clip operator rules # 0.18.1 - 2022-10-06 * prepare NNEF for further tract-opl extension (resource support) * more generic matmul * optimise some EinSum cases as matmul # 0.18.0 - 2022-09-21 * [ONNX Breaking] Several changes to move towards supporting ONNX symbolic dimensions (actual fixes, but they may break stuff that was working more or less by accident). It may be required to erase output shapes explicitely when input shape is overriden on models that were working before. * [CLI breaking] ONXN symbolic dimensions has some impact here too. --input-bundle is deprecated, is was overriden and ambiguous. Instead, there is a --input-facts-from-bundle global option, and a --input-from-bundle option in the subcommands run, profile, dump. --allow-random-input is also moved to subcommands. We think all previously supported behaviours are still there. Please open issues if not. # 0.17.7 - 2022-09-05 * clippy up all tract code * various fixes * 0.17.5 and 0.17.6 are misfired # 0.17.4 - 2022-08-11 * [cli] global --set (as a somehat cleaner --concretize successor) allow to set a symbol value after decluttering * [cli] run --save-outputs output.npz to save execution outputs * dozens of fixs and code cleanup (clippy-fication in progress) # 0.17.3 - 2022-07-25 * [License] Allowing https://spdx.org/licenses/Unicode-DFS-2016.html (no tldr yet, but pretty similar to BSD-2) * [Breaking] CLI --json option reports costs as strings instead of numbers (in order to allow symbolic values). * Sigmoid/Tanh f32 reimpl, plus new f16 impl. # 0.17.1 - 2022-07-11 * Sanitiser=address in the CI. Fixed a couple of overflow/memleaks. (Nothing looked too awful.) * ONNX NonMaxSuppression # 0.17.0 - 2022-06-13 * [Breaking] [ONNX-ML] TreeEnsembleClassifier with binary output (single class) now mimicks scikit-learn output layout. # 0.16.9 - 2022-06-10 * bump ONNX protobuf file and support external tensors format * new "skinny" kernels for avx2/fma f32 multiplication (positive impact on low, non 1 batch size for DNN-heavy loads) # 0.16.7 - 2022-05-16 * Softmax is now an operator in core, coming with a direct quantized implementation * new TypedFact constructor API ( f32::fact(&[1, 4, 12]), f32::fact(shape!(Symbol::from('N'), 12))) * fixes and optimisation of re-quantization pipeline * fixes around symbols in NNEF/OPL # 0.16.6 - 2022-05-03 * Various changes around quantization support (qi32 appearance) # 0.16.5 - 2022-04-27 * Intel optimisation are back * Range is now more flexible, should unlock some BERT models with symbolic dimensions. # 0.16.4 - 2022-04-14 * some optimisations in depthwise convolutions * various bugfixes * [Breaking] Fixed nnef "tile" operator definition ("repeats" is plural). As a consequence models using "tile" serialized with tract with prior versions can not be loaded anymore (and vice-versa). # 0.16.3 - 2022-03-30 * [Breaking] tract-opl models Scan syntax changed a bit. Models exported by <0.16.2 are loadable in >=0.16.2, but not the other way around. * Optimisation in deconv # 0.16.1 - 2022-03-02 * [Breaking] Minimum Rust Supported Version is now 1.59.0 * [Breaking] Small API changes in model api: .compact(), .optimize(), .declutter() now take &mut self and work in place. * [LICENSE] Only the licensing for dependencies of the top-level library crates (tensorflow, onnx, kaldi, pulse) will now be monitored. The command line tool (tract crate in cli folder) is for developpers (tract developpers or tract integrators), is not meant to be shipped to end-user, and it concentrates most of the license and dependency complexity. * [LICENSE] BSD-3-Clause is now accepted in tract. * Optimisations around convolutions and deconvolution * Optimisation on Cortex-A53, first round of Cortex-A55 optimisation too. # 0.15.8 - 2021-11-18 * Fix brand new ArrayFeatureExtractor inference # 0.15.7 - 2021-11-16 * ONNX ArrayFeatureExtractor * ConvTranspose/deconv optimisation # 0.15.6 - yanked * just a release script failure # 0.15.5 - 2021-10-26 * hold half at 1.7.x for compat with rust 1.50 # 0.15.4 - 2021-10-21 * ConvTranspose/deconv pulse support * ONNX SpaceToDepth/DepthToSpace # 0.15.3 - 2021-07-29 * optimise i8*u8, u8*i8 and u8*u8 matrix products (and convo) # 0.15.2 - 2021-07-09 * bump prost dep # 0.15.1 - 2021-07-08 * some optimisations for arm32 (cortex-a7 and a9) # 0.15.0 - 2021-06-24 * Switched the order of item_type and item_type_vendor in the NNEF tendor format to be consistent with NNEF-tools, and changed the item_type of integers due to an error in the specification. Breaking for tensor files containing integers or strings. * Scan output batching optimisation * Concat pulsification over a secondary axis * new aarch64 16x4 f32 kernel ## 0.14.2 - 2021-05-27 * better handling of errors in ONNX parser * fix/workaround some performance regressions bubling from recent ndarray changes ## 0.14.1 - 2021-05-18 * ONNX ConvTranspose, Gather, GatherND, GatherElements, Scatter, ScatterND, ScatterElements support (and NNEF deconv) * Fixes around integer serialisation in NNEF * workaround subtle breaking changes in ndarray (between 0.15.1 and 0.15.2) ## 0.14.0 - 2021-04-19 * low-level functions in linalg are now version tagged: two versions of tract can now co-exist in the same binary * rustc minimal version is now 1.50 * dependencies version bumps (ndarray, itertools, and others) ## 0.13.2 * fix sigmoid and tanh variability on intel ## 0.13.1 * temporary disable binary unicast add fusing (too many bugs) ## 0.13.0 * Release are now "in sync": all tract crate versions on a build *must* be aligned * optimisations, with a focus on aarch64 ## 0.12.5 - 2021-01-12 * Dependency bumps ## 0.12.4 - 2021-01-06 * 0.12.3 is a misfire * hotfixes on 0.12.2 new tree classifier * fix X compilation from macos/aarch64 to macos/intel ## 0.12.2 - 2021-01-05 * ONNX-ML: CategoryMapper and TreeEnsembleClassifier (partial, SoftmaxZero and Probits are missing). With NNEF support. * cargo-deny enforces licences choices ## 0.12.1 - 2020-12-11 * 0.12.0 is a misfire. * API BREAKING: TypedFact::dt_shape & friends can not fail anymore, no longer return a result (remove `?`) * Breaking: Rust minimal version bumped to 1.42 * Early, basic, correct but slow support for i8 by u8 matrix mult. * Support for Apple Silicon, aka M1, aka aarch64 darwin (but not in CI yet) * dynamic quantization convolution support * release now ships cli musl builds for linux * optimizations targetting small Cortex-A (like 7, 8, and 9) * command line dump --profile --cost now computes flops * ONNX: OneHot op support ## 0.11.2 - 2020-10-26 * ONNX: new op: DynamicQuantizeLinear * tract-data crate split from core, containing tensor, dim, and datum types. ## 0.11.1 - 2020-10-20 * switch from error_chain to anyhow * simplify trivial gathers to a slice * generalize symbolic dimension a bit: support "2S" and the like * deprecate "x" syntax in CLI, please use `,` instead ## 0.11.0 ### Breaking * NNEF: tract-nnef no longer performs gunziping, but expect an uncompressed tar stream. We found out is it counter-productive (weights matrices are more or less random, they do not compress easily, and decompression is expensive). NNEF networks in the wild are .tgz file. Using flate2, decompression is a one-liner, but it must be done by the client code now. * bumped extended nnef compat version (unchecked at this stage) to "alpha1" * move pulse operators and translation to their own crate and nnef registry * generalize TDim to support an arbitrary number of symbols * concretize_stream_dim is superseded by concrentize_dims ### Notable * new crates, building on tract-opl introduction: * *tract-pulse-opl*: pulse runtime (handful of ops, including Delay) is now separated from core * *tract-onnx-opl*: onnx runtime (4 ops not belonging in core) * *tract-pulse*: pulsification of models (model-translation time) * tract-onnx is now limited to onnx model loading and conversion ## 0.10.10 - 2020-08-30 * load a NNEF as a TypedModel using tract_nnef, and from the CLI * dump a tract TypedModel to NNEF (with extensions for op not nnef compatbile) * not a full coverage of nnef, but enough for most CNN (image categorizers zoo working) * 80% of onnx tests are surviving a NNEF dump and reload at this stage ## 0.10.0 - 2020-07-28 ### ONNX * covered operators compatible with Operator Sets 9, 10, 11 (new) and 12 (new) ### API Breaking * Tensor::l1 method is gone ### Windows * Support for -gnu targets (non-mvsc). ### Notable * --cost now gives the number of parameters in the model * SimpleState is clonable again (actually useful !) ## 0.9.2 - 2020-06-16 * introduce `TypedModel::method.concretize_stream_dim` * various pulsification bugfixes ## 0.9.1 - 2020-06-16 * fix Reshape with TDim ## 0.9.0 - 2020-06-15 Still no shortage of version numbers... ### API Breakage * NormalizedModel (and friends) are gone. They were only useful as a pre-pulse transformation pre-requisite that the current TypedModel (& co) meets. * TypedModel::into_optimized() is gone. InferenceModel::into_optimized() stays as an end-to-end shortcut for simple cases. It does .into_typed()?.declutter()?.optimize()). * TypedModel::codegen() is now ::optimize() ## 0.8.0 - 2020-06-13 I wish I had seen these issues yesterday. Anyway, version numbers are cheap. * Bumping minimum rust to 1.41 ## 0.7.0 - 2020-06-12 * CLI refactoring (hopefully stabilizing a bit?) * `profile --bench` is now bench * profile is now `dump --profile` * cost is now `dump --cost` * profiling is now done during a full net instead of per op * new "compact" graph dumper, profile visual hints * `dump --cost --profile --json` output profiling and cost information * show logical names for ops instead of the Op struct names (not 100% sure it's right) * criterion integration * WASM support for tract-onnx and tract-tensorflow targets (CI) * Convenience methods added to Models to allow model building in fluent style, up to Plan instantiation (SimplePlan now nicknamed RunnableModel). Non breaking. * Support for ONNX bidi LSTM (CI), GRU and RNN (untested, consider alpha) * Fixes around nets with a non trivial batch size (axis simplification code, matmul op fusion) ## 0.6.3 - 2020-04-25 * Lock ndarray version to dodge rustc/llvm issue (https://github.com/rust-lang/rust/issues/71506) ## 0.6.2 - 2020-04-15 * Use http://gihub.com/kali/readings for instrumentation. ## 0.6.0 - 2020-02-19 ### Notable * New jupyter/keras/tf example * ARMv8 tanh / sigmoid optimisation ### API Breaking * refactor exports and dependencies * preferred way to use tract is now to `use tract_tensorflow::prelude::*;` * singleton framework is built by `let tensorflow = tensorflow()`. The Framework trait is in the prelude too. * the prelude contains a reexport of `tract_core`, and of ndarray as `tract_ndarray` * no more need to declare dependency on `tract-core` and/or `tract-linalg` in Cargo.toml * same goes for `tract_onnx` ## 0.5.9 - 2020-02-07 ### Breaking * Rustc minimum version is now 1.39 ### Onnx * Support for MatMulInteger, ConvInteger * Support for QuantizeLinear DequantizeLinear * Basic support for QLinearMatMul, QLinearConv ## 0.5.6 - 2019-10-30 ### Tensorflow * Initial support for GatherV2 ### Onnx * Fix PReLu normalization ## 0.5.5 - 2019-10-25 ### Tensorflow * Initial support for AddV2, Mean, Min, Prod, Sum ## 0.5.4 - 2019-09-30 ### Notable * Make Onnx loader operator set aware, and Slice-10 support. * Cost now reports Delay ops buffer size * Bump dependencies (protobuf) and fix codegen * Windows CI now performs a top-level "cargo check" ## 0.5.1 - 2019-09-24 ### Bugfix * remove the no_panic checks, as too fragile (breaking non-lto builds) ## 0.5.0 - 2019-09-20 ### Breaking * Change tensor facts names for consistency: TensorFact is now InferenceFact. ### Notable * Introduce Windows support, including CI coverage for linalg * Switch from Travis to GitHub Actions * Internal refactoring around tract-core canonic opset * Tract CLI can now compute a FLOP number for networks ("cost" subcommand). Furthermore the CI asserts its value for a few networks to prevent optimisation regressions. * Fix: handling of -1 in ONNX Reshape op ## 0.4.2 - 2019-09-10 * Fix release script after 0.4.1 release disaster. ## 0.4.1 - 2019-09-09 [YANKED] * Fix for OS where CARGO_CFG_TARGET_FAMILY is undefined * Linear Algebra package refactor * tract-core canonic operator set introduction * significant performance boost (up to 20% on some real-life networks) ## 0.4.0 - 2019-07-30 * Start Kaldi networks support (LSTM, Renorm, Affine, downsample) ## Before... This Changelog started way too late. But better late than never. ================================================ FILE: Cargo.toml ================================================ [workspace] resolver = "2" members = [ "data", "linalg", "core", "pulse", "pulse-opl", "hir", "nnef", "nnef/cli", "nnef/nnef-resources", "tensorflow", "tflite", "onnx-opl", "onnx", "libcli", "cli", "gpu", "metal", "extra", "transformers", "cuda", "api", "api/rs", "api/ffi", "api/proxy", "api/proxy/sys", "examples/face_detection_yolov8onnx_example", "examples/face_similarity_arcface_onnx", "examples/tensorflow-mobilenet-v2", "examples/tflite-mobilenet-v3", "examples/keras-tract-tf2", "examples/nemo-parakeet-asr", "examples/nemo-nemotron-asr", "examples/nnef-dump-mobilenet-v2", "examples/nnef-mobilenet-v2", "examples/nnef-mobilenet-v2-api", "examples/onnx-mobilenet-v2", "examples/pytorch-albert-v2", "examples/pytorch-resnet", "examples/causal_llm", "examples/stable-diffusion", "examples/stable-diffusion-3", "examples/stable-diffusion-xl", "harness/core-proptest-pulse", "harness/nnef-inceptionv3", "harness/tf-inceptionv3", "harness/tf-mobilenet-v2", "harness/tfl-mobilenet-v2-q", "test-rt/infra", "test-rt/suite-unit", "test-rt/suite-onnx", "test-rt/test-f16", "test-rt/test-blas", "test-rt/test-metal", "test-rt/test-cuda", "test-rt/test-unit-core", "test-rt/test-onnx-core", "test-rt/test-nnef-cycle", "test-rt/test-tflite" ] # same, without metal, test-metal, cuda, test-cuda and test-tflite which are probelematic on specific targets default-members = [ "data", "linalg", "core", "pulse", "pulse-opl", "hir", "nnef", "nnef/cli", "nnef/nnef-resources", "tensorflow", "tflite", "onnx-opl", "onnx", "libcli", "cli", "extra", "transformers", "api", "api/rs", "api/ffi", "examples/face_detection_yolov8onnx_example", "examples/face_similarity_arcface_onnx", "examples/tensorflow-mobilenet-v2", "examples/tflite-mobilenet-v3", "examples/keras-tract-tf2", "examples/nnef-dump-mobilenet-v2", "examples/nnef-mobilenet-v2", "examples/onnx-mobilenet-v2", "examples/pytorch-albert-v2", "examples/pytorch-resnet", "harness/core-proptest-pulse", "harness/nnef-inceptionv3", "harness/tf-inceptionv3", "harness/tf-mobilenet-v2", "harness/tfl-mobilenet-v2-q", "test-rt/infra", "test-rt/suite-unit", "test-rt/suite-onnx", "test-rt/test-f16", "test-rt/test-blas", "test-rt/test-unit-core", "test-rt/test-onnx-core", "test-rt/test-nnef-cycle", ] [workspace.package] rust-version = "1.91" [workspace.dependencies] accelerate-src = "0.3" anstyle = "1.0.2" anstyle-parse = "1.0.0" anstyle-query = "1.0.0" anyhow = "1.0.43" anymap3 = "1.0" approx = "0.5" atty = "0.2.14" bit-set = "0.10.0" boow = "0.1.3" box_drawing = "0.1.2" byteorder = "1.4.3" bytes = "1.0.1" cc = "1.0.69" clap = { version = "4", features = [ "cargo", "derive" ] } colorous = "1.0.5" core_affinity = "0.8.0" criterion = "0.8" cudarc = { version = "0.19", features = ["dynamic-loading", "f16", "cudnn"] } derive-new = "0.7" dinghy-test = "0.8" dirs = "6.0.0" downcast-rs = "2.0" dyn-clone = "1.0.4" dyn-eq = "0.1" dyn-hash = "1.0" env_logger = "0.11" erased-serde = "0.4" flatbuffers = "25.12.19" flate2 = "1.0.20" float-ord = "0.3.2" fs-err = "3" fs2 = "0.4.3" getrandom = "0.4" half = { version=">=2.4,<3.0", features = [ "std", "num-traits" ] } home = "=0.5.12" icu_normalizer = "2.1" icu_normalizer_data = "2.1" icu_properties = "2.1" icu_properties_data = "2.1" idna_adapter = "1.2.0" image = "0.25" inventory = "0.3.21" itertools = "0.14" lazy_static = "1.5.0" libc = "0.2.164" libloading = "0.9" libm = "0.2.11" liquid = "0.26.8" liquid-core = "0.26.8" liquid-derive = "0.26.8" litemap = "0.8" log = "0.4.14" maplit = "1.0.2" memmap2 = "0.9" metal = { version = "0.33.0" } ndarray = "0.17" ndarray-npy = { version = "0.10", features = [ "compressed_npz" ] } nom = "8.0.0" nom-language = "0.1" nu-ansi-term = "0.50" num-complex = "0.4.0" num-integer = "0.1.44" num-traits = "0.2.14" num_cpus = "1" openblas-src = { version = "0.10", features = ["static"] } pastey = "0.2" proptest = "1.0.0" prost = "0.14" prost-types = "0.14" py_literal = "0.4.0" rand = "0.10" rand_distr = "0.6" rayon = "1.10" readings-probe = "0.1.8" regex = "1.5.4" ron = "0.12" reqwest = { version = "0.13", features = [ "blocking", "rustls-no-provider" ], default-features = false } rustfft = { version = "6.1", features = [ "neon" ] } rustls = { version = "0.23", default-features = false, features = [ "ring", "std", "tls12" ] } webpki-roots = "1" safetensors = "0.7" scan_fmt = "0.2.6" serde = { version = "1.0.127", features = [ "derive" ] } serde_json = "1.0" simd-adler32 = { version = "0.3.7", features = ["std"] } smallvec = "1.6.1" string-interner = "0.19" tar = "0.4.37" tempfile = "3.8" tensorflow = "0.21.0" tflitec = { git = "https://github.com/kali/tflitec-rs.git", rev="9ceb838" } time = "0.3.23" tokenizers = "0.22" unicode-normalization = "0.1.19" walkdir = "2.3.2" zerofrom = "0.1.5" tract-api = { version = "0.23.0-pre", path = 'api' } tract-core = { version = "0.23.0-pre", path = 'core' } tract-cuda = { version = "0.23.0-pre", path = 'cuda' } tract-data = { version = "0.23.0-pre", path = 'data' } tract-extra = { version = "0.23.0-pre", path = 'extra' } tract-gpu = { version = "0.23.0-pre", path = 'gpu' } tract-hir = { version = "0.23.0-pre", path = 'hir' } tract-libcli = { version = "0.23.0-pre", path = 'libcli' } tract-linalg = { version = "0.23.0-pre", path = 'linalg' } tract-metal = { version = "0.23.0-pre", path = 'metal' } tract-nnef-resources = { version = "0.23.0-pre", path = 'nnef/nnef-resources' } tract-nnef = { version = "0.23.0-pre", path = 'nnef' } tract-onnx-opl = { version = "0.23.0-pre", path = 'onnx-opl' } tract-onnx = { version = "0.23.0-pre", path = 'onnx' } tract-pulse-opl = { version = "0.23.0-pre", path = 'pulse-opl' } tract-pulse = { version = "0.23.0-pre", path = 'pulse' } tract-tensorflow = { version = "0.23.0-pre", path = 'tensorflow' } tract-tflite = { version = "0.23.0-pre", path = 'tflite' } tract-transformers = { version = "0.23.0-pre", path = 'transformers' } tract = { version = "0.23.0-pre", path = 'api/rs' } tract-proxy-sys = { version = "0.23.0-pre", path = 'api/proxy/sys' } tract-cli = { version = "0.23.0-pre", path = 'cli' } tract-ffi = { version = "0.23.0-pre" } tract-proxy = { version = "0.23.0-pre" } [profile.opt-no-lto] inherits="release" lto=false [profile.release] # debug = true lto = true [profile.bench] debug = true [profile.dev.package."*"] opt-level = 2 [profile.dev.build-override] debug = false # strip = "debuginfo" does not work on android and ios incremental = false ================================================ FILE: LICENSE ================================================ ## License Licensed under either of * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) at your option. ### Contribution Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. ================================================ FILE: LICENSE-APACHE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: LICENSE-MIT ================================================ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ ![tract-logo](assets/tract-logo/PNG/tract-horizontal-blue.png) ![Rust](https://img.shields.io/badge/rust-%23000000.svg?style=for-the-badge&logo=rust&logoColor=white) ![rustc >= 1.91.0](https://img.shields.io/badge/rustc-%3E%3D1.91.0-brightgreen) ![MIT/Apache 2](https://img.shields.io/crates/l/tract) [![Native Linux test status](https://github.com/snipsco/tract/workflows/Native%20Linux/badge.svg)](https://github.com/snipsco/tract/actions) [![Embedded targets status](https://github.com/snipsco/tract/workflows/Embedded%20targets/badge.svg)](https://github.com/snipsco/tract/actions) [![Doc](https://docs.rs/tract-core/badge.svg)](https://docs.rs/tract-core) [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](https://pypi.org/project/tract/) Sonos' Neural Network inference engine. _This project used to be called tfdeploy, or Tensorflow-deploy-rust._ ## What ? `tract` is a Neural Network inference toolkit. It can read ONNX or NNEF, optimize them and run them. ## Quick start, examples * [MobileNet v2 with ONNX](examples/onnx-mobilenet-v2) * [BERT example with ONNX](examples/pytorch-albert-v2) * [MobileNet v2 with TensorFlow](examples/tensorflow-mobilenet-v2) * [From Keras and TensorFlow 2 to tract](examples/keras-tract-tf2) * [ResNet with PyTorch](examples/pytorch-resnet) There is also [some technical documentation](doc/) and [blog](https://tech-blog.sonos.com/posts/optimising-a-neural-network-for-inference/) posts. ## Tract in the landscape ### ONNX As of today, `tract` passes successfully about 85% of ONNX backends tests. All "real life" integration tests in ONNX test suite are passing: bvlc_alexnet, densenet121, inception_v1, inception_v2, resnet50, shufflenet, squeezenet, vgg19, zfnet512. Notable missing parts are operators dealing with Tensor Sequences and Optional Tensors : tract /really/ wants to flow Tensors and nothing else. This is structural. Changing it would be pretty difficult, and it's unclear whether it can be done without impairing performance or maintainability. We are not convinced these features have shown their interest in the wild yet, so we prefer to leave them aside. Other dark corners are specific operators like "Resize" which fit perfectly in the framework but need a complex internal logic that is far from our core business. In these cases, we are happy to accept contributions and to help. The following operators are implemented and tested. Abs, Acos, Acosh, Add, And, ArgMax, ArgMin, ArrayFeatureExtractor, Asin, Asinh, Atan, Atanh, AveragePool, BatchNormalization, BitShift, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, BlackmanWindow, Cast, CastLike, CategoryMapper, Ceil, Clip, Compress, Concat, Constant, ConstantLike, ConstantOfShape, Conv, ConvInteger, ConvTranspose, Cos, Cosh, CumSum, DFT, DepthToSpace, DequantizeLinear, Div, Dropout, DynamicQuantizeLinear, Einsum, Elu, Equal, Erf, Exp, Expand, EyeLike, Flatten, Floor, GRU, Gather, GatherElements, GatherND, Gemm, GlobalAveragePool, GlobalLpPool, GlobalMaxPool, Greater, GreaterOrEqual, HammingWindow, HannWindow, HardSigmoid, Hardmax, Identity, If, InstanceNormalization, IsInf, IsNaN, LRN, LSTM, LeakyRelu, Less, LessOrEqual, Log, LogSoftmax, MatMul, MatMulInteger, Max, MaxPool, Mean, MelWeightMatrix, Min, Mod, Mul, Multinomial, Neg, NonMaxSuppression, NonZero, Not, OneHot, Or, PRelu, Pad, ParametricSoftplus, Pow, QLinearConv, QLinearMatMul, QuantizeLinear, RNN, RandomNormal, RandomNormalLike, RandomUniform, RandomUniformLike, Range, Reciprocal, ReduceL1, ReduceL2, ReduceLogSum, ReduceLogSumExp, ReduceMax, ReduceMean, ReduceMin, ReduceProd, ReduceSum, ReduceSumSquare, Relu, Reshape, Resize, Round, Rsqrt, STFT, ScaledTanh, Scan, Scatter, ScatterElements, ScatterND, Selu, Shape, Shrink, Sigmoid, Sign, Sin, Sinh, Size, Slice, Softmax, Softplus, Softsign, SpaceToDepth, Split, Sqrt, Squeeze, Sub, Sum, Tan, Tanh, ThresholdedRelu, Tile, Transpose, TreeEnsembleClassifier, Unsqueeze, Where, Xor We test these operators against from ONNX 1.4.1 (operator set 9), up to ONNX 1.13.0 (operator set 18). We are using ONNX test suite, but it does not cover everything. We also deliberately ignore some tests, or restricting their scope depending on what we feel is realistic. Sometimes these decisions are just wrong, and sometimes they become wrong as time goes by and the fields moves in unexpected directions. So if you are puzzled by an ONNX model that does not work in tract, we are happy to take a look. ### NNEF Long story short, TensorFlow and ONNX formats are good for designing and training networks. They need to move fast to follow the research field, tend to integrate new features and operators greedily. They also exhibit a high level of expressivity to facilitate network design. On the other hand, only a subset of operators and network features actually reach production, so systems running production network do not have to deal with so many operators. Furthermore, some information required for training can be stripped from the network before going to production for prediction. NNEF tries to bridge the gap between training frameworks and inference by proposing a format dedicated to production and prediction. Tract supports NNEF: * tract_nnef can load and execute NNEF networks * tract supports most of the NNEF specification, the most notable exception being the ROI operators * tract introduces tract-OPL, a series of NNEF extensions to support other operators (or extend some operators semantics) in order to represent the full range of tract-core neural network support: any network understood by tract should be serializable to tract-OPL. This is a work in progress. * tract command line can translate networks from TensorFlow or ONNX to NNEF/OPL. ### tract-opl version compatibility A remainder: NNEF is not expressive enough to represent all ONNX. tract-OPL extends NNEF using proprietary to support what is missing. Notable extensions are pulse operators, recurring operators (as Scan) and symbolic extensions. There is no strict check in place here, so... implementation is not bullet proof. * NNEF part aims at being very stable. It is strongly constrained with compatibility with NNEF specification. * tract-opl is a bit more in flux. Nevertheless we try to maintain the following golden rule: `models serialized with tract 0.x.y should work with tract 0.x.z where z >= y` * in practice, breaking changes have been relatively rare so far. Most models are forward and retro compatible from when tract has acquired NNEF support. Notable breakage occurred: * 0.16.3 (forward compatible) on Scan operator * 0.17.0 for binary decision tree classifier Starting with `0.17.0`, a model property is injected in tract-opl files (`tract_nnef_ser_version`) to tag which version of tract generated the file. As most models will remain compatible, tract will not do any version check. It is up to the application developer to do so. A softer version tag exists as `tract_nnef_format_version`. pre-0.17.0 version set it to `alpha1`, post-0.17.0 set it `beta1`. Don't put too much emphasis into the "alpha-ness" naming of versions here. ### Note: support for TensorFlow 1.x Even if `tract` is very far from supporting any arbitrary model, it can run Google Inception v3 and Snips wake word models. Missing operators are relatively easy to add. The lack of easy to reuse test suite, and the wide diversity of operators in Tensorflow make it difficult to target a full support. The following operators are implemented and tested: Abs, Add, AddN, AddV2, Assign, AvgPool, BatchToSpaceND, BiasAdd, BlockLSTM, Cast, Ceil, ConcatV2, Const, Conv2D, DepthwiseConv2dNative, Div, Enter, Equal, Exit, ExpandDims, FakeQuantWithMinMaxVars, Fill, FloorMod, FusedBatchNorm, GatherNd, GatherV2, Greater, GreaterEqual, Identity, Less, LessEqual, Log, LogicalAnd, LogicalOr, LoopCond, MatMul, Max, MaxPool, Maximum, Mean, Merge, Min, Minimum, Mul, Neg, NoOp, Pack, Pad, Placeholder, Pow, Prod, RandomUniform, RandomUniformInt, Range, RealDiv, Relu, Relu6, Reshape, Rsqrt, Shape, Sigmoid, Slice, Softmax, SpaceToBatchND, Squeeze, StridedSlice, Sub, Sum, Switch, Tanh, Tile, Transpose, VariableV2 Additionally, the complexity of TensorFlow 2 make it very unlikely that a direct support will ever exist in tract. But many TensorFlow 2 models can be converted to ONNX and then loaded in tract. ## Example of supported networks These models among others, are used to track tract performance evolution as part of the Continuous Integration jobs. See [.travis/README.md](readme) and [.travis/bundle-entrypoint.sh](.travis/bundle-entrypoint.sh) for more information. ### Keyword spotting on Arm Cortex-M Microcontrollers https://github.com/ARM-software/ML-KWS-for-MCU ARM demonstrated the capabilities of the Cortex-M family by providing tutorials and pre-trained models for keyword spotting. While the exercise is ultimately meant for micro-controllers, `tract` can run the intermediate TensorFlow models. For instance, on a Raspberry Pi Zero, the "CNN M" model runs in about 70 micro-seconds, and 11 micro-seconds on a Raspberry Pi 3. ### Snips wake word models https://arxiv.org/abs/1811.07684 Snips uses `tract` to run the wake word detectors. While earlier models were class-based and did not require any special treatment, `tract` pulsing capabilities made it possible to run WaveNet models efficiently enough for a Raspberry Pi Zero. ### Inception v3 | Device | Family | TensorFlow-lite | tract | |---------------------|----------------|-------------------|---------| | Raspberry Pi Zero | Armv6 VFP | 113s | 39s | | Raspberry Pi 2 | Armv7 NEON | 25s | 7s | | Raspberry Pi 3 | aarch32 NEON | 5s | 5s | Notes: * while the Raspberry Pi 3 is an Armv8 device, this bench is running on Raspbian, an armv6 operating system, crippling the performance of both benches * there exists other benches on the internet that show better performance results for TensorFlow (not -Lite) on the Pi 3. They use all four cores of the device. Both TensorFlow-Lite and tract here have been made to run on a single-core. # License Note: files in the `tensorflow/protos` directory are copied from the [TensorFlow](https://github.com/tensorflow/tensorflow) project and are not covered by the following licence statement. Note: files in the `onnx/protos` directory are copied from the [ONNX](https://github.com/onnx/onnx) project and are not covered by the following license statement. ## Apache 2.0/MIT All original work licensed under either of * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) at your option. ## Contribution Unless you explicitly state otherwise, any Contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. ================================================ FILE: api/.gitignore ================================================ *.nnef.tgz *.onnx ================================================ FILE: api/Cargo.toml ================================================ [package] name = "tract-api" version = "0.23.0-pre" license = "MIT OR Apache-2.0" authors = ["Mathieu Poumeyrol "] description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference" repository = "https://github.com/sonos/tract" keywords = [ "NeuralNetworks" ] categories = [ "science" ] autobenches = false edition = "2024" rust-version.workspace = true include = [ "Cargo.toml", "src/**/*.rs", "LICENSE*", "tract.h" ] [dependencies] anyhow.workspace = true boow.workspace = true flate2.workspace = true half.workspace = true ndarray.workspace = true serde.workspace = true serde_json.workspace = true [features] complex = [] [dev-dependencies] lazy_static = "1.4.0" reqwest.workspace = true tempfile.workspace = true ================================================ FILE: api/LICENSE ================================================ ## License Licensed under either of * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) at your option. ### Contribution Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. ================================================ FILE: api/LICENSE-APACHE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: api/LICENSE-MIT ================================================ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: api/c/.gitignore ================================================ *.h *.so mobilenet mobilenet_v2_1.0.onnx.nnef.tgz ================================================ FILE: api/c/Makefile ================================================ run: mobilenet mobilenet_v2_1.0.onnx.nnef.tgz LD_LIBRARY_PATH=. ./mobilenet clean: rm -f mobilenet libtract.so tract.h mobilenet: tract.h libtract.so mobilenet.c cc mobilenet.c -o mobilenet -L. -ltract libtract.so: cargo build -p tract-ffi --profile opt-no-lto cp ../../target/opt-no-lto/libtract.so . tract.h: cd ../ffi ; cbindgen -l c > tract.h cp ../ffi/tract.h . mobilenet_v2_1.0.onnx.nnef.tgz: wget -q https://s3.amazonaws.com/tract-ci-builds/tests/mobilenet_v2_1.0.onnx.nnef.tgz ================================================ FILE: api/c/mobilenet.c ================================================ #include "tract.h" #include #include #include #define check(call) \ { \ TRACT_RESULT result = call; \ if (result == TRACT_RESULT_KO) { \ fprintf(stderr, "Error calling tract: %s", \ tract_get_last_error()); \ exit(1); \ } \ } int main() { // Initialize nnef parser TractNnef *nnef = NULL; check(tract_nnef_create(&nnef)); assert(nnef); // Load the model TractModel *model = NULL; check(tract_nnef_load(nnef, "mobilenet_v2_1.0.onnx.nnef.tgz", &model)); assert(model); assert(nnef); // once the model is build, the framework is not necessary anymore check(tract_nnef_destroy(&nnef)); assert(!nnef); // Pick a runtime TractRuntime *runtime = NULL; check(tract_runtime_for_name("default", &runtime)); assert(runtime); // Make the model runnable TractRunnable *runnable = NULL; check(tract_runtime_prepare(runtime, &model, &runnable)); assert(runnable); assert(!model); float *image = malloc(3 * 224 * 224 * sizeof(float)); FILE *fd = fopen("grace_hopper_3_224_224.f32.raw", "rb"); assert(fread(image, sizeof(float), 3 * 224 * 224, fd) == 3 * 224 * 224); fclose(fd); TractTensor *input = NULL; size_t shape[] = {1, 3, 224, 224}; check( tract_tensor_from_bytes(TRACT_DATUM_TYPE_F32, 4, shape, image, &input)); free(image); TractTensor *output = NULL; // simple stateless run... check(tract_runnable_run(runnable, &input, &output)); const float *data = NULL; check(tract_tensor_as_bytes(output, NULL, NULL, NULL, (const void **)&data)); float max = data[0]; int argmax = 0; for (int i = 0; i < 1000; i++) { float val = data[i]; if (val > max) { max = val; argmax = i; } } printf("Max is %f for category %d\n", max, argmax); check(tract_tensor_destroy(&output)); // or spawn a state to run the model TractState *state = NULL; check(tract_runnable_spawn_state(runnable, &state)); assert(state); // runnable is refcounted by the spawned states, so we can release it now. check(tract_runnable_release(&runnable)); assert(!runnable); check(tract_state_run(state, &input, &output)); check(tract_tensor_as_bytes(output, NULL, NULL, NULL, (const void **)&data)); assert(data[argmax] == max); check(tract_tensor_destroy(&output)); // done with out state and input check(tract_state_destroy(&state)); check(tract_tensor_destroy(&input)); } ================================================ FILE: api/ffi/Cargo.toml ================================================ [package] name = "tract-ffi" version = "0.23.0-pre" license = "MIT OR Apache-2.0" authors = ["Mathieu Poumeyrol "] description = "Tiny, no-nonsense, self contained, neural network inference" repository = "https://github.com/snipsco/tract" keywords = [ "TensorFlow", "NeuralNetworks" ] categories = [ "science" ] autobenches = false edition = "2024" include = [ "Cargo.toml", "src/**/*.rs", "LICENSE*" ] [lib] name = "tract" crate-type = ["cdylib"] [badges] maintenance = { status = "actively-developed" } [dependencies] anyhow.workspace = true flate2.workspace = true serde.workspace = true serde_json.workspace = true tract-api.workspace = true tract-rs = { version = "0.23.0-pre", path = "../rs", package = "tract" } ================================================ FILE: api/ffi/cbindgen.toml ================================================ language = "C" after_includes = """ typedef enum DatumType { TRACT_DATUM_TYPE_BOOL = 1, TRACT_DATUM_TYPE_U8 = 17, TRACT_DATUM_TYPE_U16 = 18, TRACT_DATUM_TYPE_U32 = 20, TRACT_DATUM_TYPE_U64 = 24, TRACT_DATUM_TYPE_I8 = 33, TRACT_DATUM_TYPE_I16 = 34, TRACT_DATUM_TYPE_I32 = 36, TRACT_DATUM_TYPE_I64 = 40, TRACT_DATUM_TYPE_F16 = 50, TRACT_DATUM_TYPE_F32 = 52, TRACT_DATUM_TYPE_F64 = 56, TRACT_DATUM_TYPE_COMPLEX_I16 = 66, TRACT_DATUM_TYPE_COMPLEX_I32 = 68, TRACT_DATUM_TYPE_COMPLEX_I64 = 72, TRACT_DATUM_TYPE_COMPLEX_F16 = 82, TRACT_DATUM_TYPE_COMPLEX_F32 = 84, TRACT_DATUM_TYPE_COMPLEX_F64 = 88, } DatumType; """ ================================================ FILE: api/ffi/src/lib.rs ================================================ #![allow(clippy::missing_safety_doc)] use anyhow::{Context, Result}; use std::cell::RefCell; use std::ffi::{CStr, CString, c_char, c_void}; use tract_api::{ AsFact, DatumType, DimInterface, FactInterface, InferenceModelInterface, ModelInterface, NnefInterface, OnnxInterface, RunnableInterface, RuntimeInterface, StateInterface, TensorInterface, }; use tract_rs::{State, Tensor}; /// Used as a return type of functions that can encounter errors. /// If the function encountered an error, you can retrieve it using the `tract_get_last_error` /// function #[repr(C)] #[allow(non_camel_case_types)] #[derive(Debug, PartialEq, Eq)] pub enum TRACT_RESULT { /// The function returned successfully TRACT_RESULT_OK = 0, /// The function returned an error TRACT_RESULT_KO = 1, } thread_local! { pub(crate) static LAST_ERROR: RefCell> = const { RefCell::new(None) }; } fn wrap anyhow::Result<()>>(func: F) -> TRACT_RESULT { match func() { Ok(_) => TRACT_RESULT::TRACT_RESULT_OK, Err(e) => { let msg = format!("{e:?}"); if std::env::var("TRACT_ERROR_STDERR").is_ok() { eprintln!("{msg}"); } LAST_ERROR.with(|p| { *p.borrow_mut() = Some(CString::new(msg).unwrap_or_else(|_| { CString::new("tract error message contains 0, can't convert to CString") .unwrap() })) }); TRACT_RESULT::TRACT_RESULT_KO } } } /// Retrieve the last error that happened in this thread. A function encountered an error if /// its return type is of type `TRACT_RESULT` and it returned `TRACT_RESULT_KO`. /// /// # Return value /// It returns a pointer to a null-terminated UTF-8 string that will contain the error description. /// Rust side keeps ownership of the buffer. It will be valid as long as no other tract calls is /// performed by the thread. /// If no error occured, null is returned. #[unsafe(no_mangle)] pub extern "C" fn tract_get_last_error() -> *const std::ffi::c_char { LAST_ERROR.with(|msg| msg.borrow().as_ref().map(|s| s.as_ptr()).unwrap_or(std::ptr::null())) } /// Returns a pointer to a static buffer containing a null-terminated version string. /// /// The returned pointer must not be freed. #[unsafe(no_mangle)] pub extern "C" fn tract_version() -> *const std::ffi::c_char { unsafe { CStr::from_bytes_with_nul_unchecked(concat!(env!("CARGO_PKG_VERSION"), "\0").as_bytes()) .as_ptr() } } /// Frees a string allocated by libtract. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_free_cstring(ptr: *mut std::ffi::c_char) { unsafe { if !ptr.is_null() { let _ = CString::from_raw(ptr); } } } macro_rules! check_not_null { ($($ptr:expr),*) => { $( if $ptr.is_null() { anyhow::bail!(concat!("Unexpected null pointer ", stringify!($ptr))); } )* } } macro_rules! release { ($ptr:expr) => { wrap(|| unsafe { check_not_null!($ptr, *$ptr); let _ = Box::from_raw(*$ptr); *$ptr = std::ptr::null_mut(); Ok(()) }) }; } // NNEF pub struct TractNnef(tract_rs::Nnef); /// Creates an instance of an NNEF framework and parser that can be used to load and dump NNEF models. /// /// The returned object should be destroyed with `tract_nnef_destroy` once the model /// has been loaded. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_create(nnef: *mut *mut TractNnef) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef); *nnef = Box::into_raw(Box::new(TractNnef(tract_rs::nnef()?))); Ok(()) }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_enable_tract_core(nnef: *mut TractNnef) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef); (*nnef).0.enable_tract_core() }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_enable_tract_extra(nnef: *mut TractNnef) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef); (*nnef).0.enable_tract_extra() }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_enable_tract_transformers( nnef: *mut TractNnef, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef); (*nnef).0.enable_tract_transformers() }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_enable_onnx(nnef: *mut TractNnef) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef); (*nnef).0.enable_onnx() }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_enable_pulse(nnef: *mut TractNnef) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef); (*nnef).0.enable_pulse() }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_enable_extended_identifier_syntax( nnef: *mut TractNnef, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef); (*nnef).0.enable_extended_identifier_syntax() }) } /// Destroy the NNEF parser. It is safe to detroy the NNEF parser once the model had been loaded. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_destroy(nnef: *mut *mut TractNnef) -> TRACT_RESULT { release!(nnef) } /// Parse and load an NNEF model as a tract TypedModel. /// /// `path` is a null-terminated utf-8 string pointer. It can be an archive (tar or tar.gz file) or a /// directory. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_load( nnef: *const TractNnef, path: *const c_char, model: *mut *mut TractModel, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef, model, path); *model = std::ptr::null_mut(); let path = CStr::from_ptr(path).to_str()?; let m = Box::new(TractModel( (*nnef).0.load(path).with_context(|| format!("opening file {path:?}"))?, )); *model = Box::into_raw(m); Ok(()) }) } /// Parse and load an NNEF buffer as a tract TypedModel. /// /// `data` is a buffer pointer /// `len` ise the buffer len #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_load_buffer( nnef: *const TractNnef, data: *const c_void, len: usize, model: *mut *mut TractModel, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef, model, data); *model = std::ptr::null_mut(); let slice = std::slice::from_raw_parts(data as *const u8, len); let m = Box::new(TractModel((*nnef).0.load_buffer(slice)?)); *model = Box::into_raw(m); Ok(()) }) } /// Dump a TypedModel as a NNEF tar file. /// /// `path` is a null-terminated utf-8 string pointer to the `.tar` file to be created. /// /// This function creates a plain, non-compressed, archive. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_write_model_to_tar( nnef: *const TractNnef, path: *const c_char, model: *const TractModel, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef, model, path); let path = CStr::from_ptr(path).to_str()?; (*nnef).0.write_model_to_tar(path, &(*model).0)?; Ok(()) }) } /// Dump a TypedModel as a NNEF .tar.gz file. /// /// `path` is a null-terminated utf-8 string pointer to the `.tar.gz` file to be created. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_write_model_to_tar_gz( nnef: *const TractNnef, path: *const c_char, model: *const TractModel, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef, model, path); let path = CStr::from_ptr(path).to_str()?; (*nnef).0.write_model_to_tar_gz(path, &(*model).0)?; Ok(()) }) } /// Dump a TypedModel as a NNEF directory. /// /// `path` is a null-terminated utf-8 string pointer to the directory to be created. /// /// This function creates a plain, non-compressed, archive. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_nnef_write_model_to_dir( nnef: *const TractNnef, path: *const c_char, model: *const TractModel, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef, model, path); let path = CStr::from_ptr(path).to_str()?; (*nnef).0.write_model_to_dir(path, &(*model).0)?; Ok(()) }) } // ONNX pub struct TractOnnx(tract_rs::Onnx); /// Creates an instance of an ONNX framework and parser that can be used to load models. /// /// The returned object should be destroyed with `tract_nnef_destroy` once the model /// has been loaded. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_onnx_create(onnx: *mut *mut TractOnnx) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(onnx); *onnx = Box::into_raw(Box::new(TractOnnx(tract_rs::onnx()?))); Ok(()) }) } /// Destroy the NNEF parser. It is safe to detroy the NNEF parser once the model had been loaded. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_onnx_destroy(onnx: *mut *mut TractOnnx) -> TRACT_RESULT { release!(onnx) } /// Parse and load an ONNX model as a tract InferenceModel. /// /// `path` is a null-terminated utf-8 string pointer. It must point to a `.onnx` model file. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_onnx_load( onnx: *const TractOnnx, path: *const c_char, model: *mut *mut TractInferenceModel, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(onnx, path, model); *model = std::ptr::null_mut(); let path = CStr::from_ptr(path).to_str()?; let m = Box::new(TractInferenceModel((*onnx).0.load(path)?)); *model = Box::into_raw(m); Ok(()) }) } /// Parse and load an ONNX buffer as a tract InferenceModel. /// /// `data` is a buffer pointer /// `len` ise the buffer len #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_onnx_load_buffer( onnx: *const TractOnnx, data: *const c_void, len: usize, model: *mut *mut TractInferenceModel, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(onnx, model, data); *model = std::ptr::null_mut(); let slice = std::slice::from_raw_parts(data as *const u8, len); let m = Box::new(TractInferenceModel((*onnx).0.load_buffer(slice)?)); *model = Box::into_raw(m); Ok(()) }) } // INFERENCE MODEL pub struct TractInferenceModel(tract_rs::InferenceModel); /// Query an InferenceModel input counts. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_input_count( model: *const TractInferenceModel, inputs: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, inputs); let model = &(*model).0; *inputs = model.input_count()?; Ok(()) }) } /// Query an InferenceModel output counts. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_output_count( model: *const TractInferenceModel, outputs: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, outputs); let model = &(*model).0; *outputs = model.output_count()?; Ok(()) }) } /// Query the name of a model input. /// /// The returned name must be freed by the caller using tract_free_cstring. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_input_name( model: *const TractInferenceModel, input: usize, name: *mut *mut c_char, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, name); *name = std::ptr::null_mut(); let m = &(*model).0; *name = CString::new(&*m.input_name(input)?)?.into_raw(); Ok(()) }) } /// Query the name of a model output. /// /// The returned name must be freed by the caller using tract_free_cstring. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_output_name( model: *const TractInferenceModel, output: usize, name: *mut *mut i8, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, name); *name = std::ptr::null_mut(); let m = &(*model).0; *name = CString::new(&*m.output_name(output)?)?.into_raw() as _; Ok(()) }) } /// Query a model input fact. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_input_fact( model: *const TractInferenceModel, input_id: usize, fact: *mut *mut TractInferenceFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, fact); *fact = std::ptr::null_mut(); let f = (*model).0.input_fact(input_id)?; *fact = Box::into_raw(Box::new(TractInferenceFact(f))); Ok(()) }) } /// Set an input fact of an InferenceModel. /// /// The `fact` argument is only borrowed by this function, it still must be destroyed. /// `fact` can be set to NULL to erase the current output fact of the model. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_set_input_fact( model: *mut TractInferenceModel, input_id: usize, fact: *const TractInferenceFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model); let f = fact.as_ref().map(|f| &f.0).cloned().unwrap_or_default(); (*model).0.set_input_fact(input_id, f)?; Ok(()) }) } /// Query an output fact for an InferenceModel. /// /// The return model must be freed using `tract_inference_fact_destroy`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_output_fact( model: *const TractInferenceModel, output_id: usize, fact: *mut *mut TractInferenceFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, fact); *fact = std::ptr::null_mut(); let f = (*model).0.output_fact(output_id)?; *fact = Box::into_raw(Box::new(TractInferenceFact(f))); Ok(()) }) } /// Set an output fact of an InferenceModel. /// /// The `fact` argument is only borrowed by this function, it still must be destroyed. /// `fact` can be set to NULL to erase the current output fact of the model. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_set_output_fact( model: *mut TractInferenceModel, output_id: usize, fact: *const TractInferenceFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model); let f = fact.as_ref().map(|f| &f.0).cloned().unwrap_or_default(); (*model).0.set_output_fact(output_id, f)?; Ok(()) }) } /// Analyse an InferencedModel in-place. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_analyse( model: *mut TractInferenceModel, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model); (*model).0.analyse()?; Ok(()) }) } /// Transform a fully analysed InferenceModel to a TypedModel. /// /// This function takes ownership of the InferenceModel `model` whether it succeeds /// or not. `tract_inference_model_destroy` must not be used on `model`. /// /// On the other hand, caller will be owning the newly created typed model. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_into_model( model: *mut *mut TractInferenceModel, typed: *mut *mut TractModel, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, *model, typed); *typed = std::ptr::null_mut(); let m = Box::from_raw(*model); *model = std::ptr::null_mut(); let result = m.0.into_model()?; *typed = Box::into_raw(Box::new(TractModel(result))) as _; Ok(()) }) } /// Destroy an InferenceModel. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_model_destroy( model: *mut *mut TractInferenceModel, ) -> TRACT_RESULT { release!(model) } // TYPED MODEL pub struct TractModel(tract_rs::Model); /// Query an InferenceModel input counts. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_input_count( model: *const TractModel, inputs: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, inputs); let model = &(*model).0; *inputs = model.input_count()?; Ok(()) }) } /// Query an InferenceModel output counts. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_output_count( model: *const TractModel, outputs: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, outputs); let model = &(*model).0; *outputs = model.output_count()?; Ok(()) }) } /// Query the name of a model input. /// /// The returned name must be freed by the caller using tract_free_cstring. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_input_name( model: *const TractModel, input: usize, name: *mut *mut c_char, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, name); *name = std::ptr::null_mut(); let m = &(*model).0; *name = CString::new(m.input_name(input)?)?.into_raw(); Ok(()) }) } /// Query the input fact of a model. /// /// Thre returned fact must be freed with tract_fact_destroy. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_input_fact( model: *const TractModel, input_id: usize, fact: *mut *mut TractFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, fact); *fact = std::ptr::null_mut(); let f = (*model).0.input_fact(input_id)?; *fact = Box::into_raw(Box::new(TractFact(f))); Ok(()) }) } /// Query the name of a model output. /// /// The returned name must be freed by the caller using tract_free_cstring. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_output_name( model: *const TractModel, output: usize, name: *mut *mut c_char, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, name); *name = std::ptr::null_mut(); let m = &(*model).0; *name = CString::new(m.output_name(output)?)?.into_raw(); Ok(()) }) } /// Query the output fact of a model. /// /// Thre returned fact must be freed with tract_fact_destroy. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_output_fact( model: *const TractModel, input_id: usize, fact: *mut *mut TractFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, fact); *fact = std::ptr::null_mut(); let f = (*model).0.output_fact(input_id)?; *fact = Box::into_raw(Box::new(TractFact(f))); Ok(()) }) } /// Apply a transform to the model. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_transform( model: *mut TractModel, transform: *const i8, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, transform); let t = CStr::from_ptr(transform as _) .to_str() .context("failed to parse transform name (not utf8)")?; (*model).0.transform(t) }) } /// Perform a profile of the model using the provided inputs. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_profile_json( model: *mut TractRunnable, inputs: *mut *mut TractTensor, json: *mut *mut i8, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, json); let input: Option> = if !inputs.is_null() { let input_len = (*model).0.input_count()?; Some( std::slice::from_raw_parts(inputs, input_len) .iter() .map(|tv| (**tv).0.clone()) .collect(), ) } else { None }; let profile = (*model).0.profile_json(input)?; *json = CString::new(profile)?.into_raw() as _; Ok(()) }) } /// Convert a TypedModel into a TypedRunnableModel. /// /// This function transfers ownership of the `model` argument to the newly-created `runnable` model. /// /// Runnable are reference counted. When done, it should be released with `tract_runnable_release`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_into_runnable( model: *mut *mut TractModel, runnable: *mut *mut TractRunnable, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, runnable); let m = Box::from_raw(*model).0; *model = std::ptr::null_mut(); *runnable = Box::into_raw(Box::new(TractRunnable(m.into_runnable()?))) as _; Ok(()) }) } /// Query the number of properties in a model. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_property_count( model: *const TractModel, count: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, count); *count = (*model).0.property_keys()?.len(); Ok(()) }) } /// Query the properties names of a model. /// /// The "names" array should be big enough to fit `tract_model_property_count` string pointers. /// /// Each name will have to be freed using `tract_free_cstring`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_property_names( model: *const TractModel, names: *mut *mut i8, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, names); for (ix, name) in (*model).0.property_keys()?.iter().enumerate() { *names.add(ix) = CString::new(&**name)?.into_raw() as _; } Ok(()) }) } /// Query a property tensor in a model. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_property( model: *const TractModel, name: *const i8, tensor: *mut *mut TractTensor, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, name, tensor); let name = CStr::from_ptr(name as _) .to_str() .context("failed to parse property name (not utf8)")? .to_owned(); let v = (*model).0.property(name).context("Property not found")?; *tensor = Box::into_raw(Box::new(TractTensor(v))); Ok(()) }) } /// Parse a fact specification string into an Fact. /// /// The returned fact must be free with `tract_fact_destroy`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_parse_fact( model: *mut TractModel, spec: *const c_char, fact: *mut *mut TractFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, spec, fact); let spec = CStr::from_ptr(spec).to_str()?; let f: tract_rs::Fact = spec.as_fact(&(*model).0)?.as_ref().clone(); *fact = Box::into_raw(Box::new(TractFact(f))); Ok(()) }) } /// Destroy a TypedModel. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_model_destroy(model: *mut *mut TractModel) -> TRACT_RESULT { release!(model) } // RUNTIME MODEL pub struct TractRuntime(tract_rs::Runtime); /// Creates an instance of a tract Runtime that can be used to run model on a specific /// hardware / software stack (like a GPU). /// /// The returned object should be released with `tract_runtime_release`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runtime_for_name( name: *const c_char, nnef: *mut *mut TractRuntime, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(nnef); let name = CStr::from_ptr(name).to_str()?; *nnef = Box::into_raw(Box::new(TractRuntime(tract_rs::runtime_for_name(name)?))); Ok(()) }) } /// Query the name of a Runtime. /// /// The returned name must be freed by the caller using tract_free_cstring. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runtime_name( runtime: *const TractRuntime, name: *mut *mut c_char, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(runtime, name); *name = std::ptr::null_mut(); let n = (*runtime).0.name()?; *name = CString::new(n)?.into_raw(); Ok(()) }) } /// Convert a Model into a Runnable for this Runtime. /// /// This function transfers ownership of the `model` argument to the newly-created `runnable` model. /// /// Runnable are reference counted. When done, it should be released with `tract_runnable_release`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runtime_prepare( runtime: *const TractRuntime, model: *mut *mut TractModel, runnable: *mut *mut TractRunnable, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(runtime, model, runnable); let m = Box::from_raw(*model).0; *model = std::ptr::null_mut(); *runnable = Box::into_raw(Box::new(TractRunnable((*runtime).0.prepare(m)?))) as _; Ok(()) }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runtime_release(runtime: *mut *mut TractRuntime) -> TRACT_RESULT { release!(runtime) } // RUNNABLE MODEL pub struct TractRunnable(tract_rs::Runnable); /// Spawn a session state from a runnable model. /// /// This function does not take ownership of the `runnable` object, it can be used again to spawn /// other state instances. The runnable object is internally reference counted, it will be /// kept alive as long as any associated `State` exists (or as long as the `runnable` is not /// explicitely release with `tract_runnable_release`). /// /// `state` is a newly-created object. It should ultimately be detroyed with `tract_state_destroy`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_spawn_state( runnable: *mut TractRunnable, state: *mut *mut TractState, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(runnable, state); *state = std::ptr::null_mut(); let s = (*runnable).0.spawn_state()?; *state = Box::into_raw(Box::new(TractState(s))); Ok(()) }) } /// Convenience function to run a stateless model. /// /// `inputs` is a pointer to an pre-existing array of input TractTensor. Its length *must* be equal /// to the number of inputs of the models. The function does not take ownership of the input /// tensors. /// `outputs` is a pointer to a pre-existing array of TractTensor pointers that will be overwritten /// with pointers to output tensors. These tensors are under the responsiblity of the caller, it /// will have to release them with `tract_tensor_destroy`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_run( runnable: *mut TractRunnable, inputs: *mut *mut TractTensor, outputs: *mut *mut TractTensor, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(runnable); let mut s = (*runnable).0.spawn_state()?; state_run(&mut s, inputs, outputs) }) } /// Query a Runnable input counts. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_input_count( model: *const TractRunnable, inputs: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, inputs); let model = &(*model).0; *inputs = model.input_count()?; Ok(()) }) } /// Query an Runnable output counts. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_output_count( model: *const TractRunnable, outputs: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, outputs); let model = &(*model).0; *outputs = model.output_count()?; Ok(()) }) } /// Query the input fact of a runnable model. /// /// Thre returned fact must be freed with tract_fact_destroy. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_input_fact( runnable: *const TractRunnable, input_id: usize, fact: *mut *mut TractFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(runnable, fact); *fact = std::ptr::null_mut(); let f = (*runnable).0.input_fact(input_id)?; *fact = Box::into_raw(Box::new(TractFact(f))); Ok(()) }) } /// Query the output fact of a runnable model. /// /// Thre returned fact must be freed with tract_fact_destroy. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_output_fact( runnable: *const TractRunnable, output_id: usize, fact: *mut *mut TractFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(runnable, fact); *fact = std::ptr::null_mut(); let f = (*runnable).0.output_fact(output_id)?; *fact = Box::into_raw(Box::new(TractFact(f))); Ok(()) }) } /// Query the number of properties in a runnable model. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_property_count( model: *const TractRunnable, count: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, count); *count = (*model).0.property_keys()?.len(); Ok(()) }) } /// Query the properties names of a runnable model. /// /// The "names" array should be big enough to fit `tract_model_property_count` string pointers. /// /// Each name will have to be freed using `tract_free_cstring`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_property_names( model: *const TractRunnable, names: *mut *mut i8, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, names); for (ix, name) in (*model).0.property_keys()?.iter().enumerate() { *names.add(ix) = CString::new(&**name)?.into_raw() as _; } Ok(()) }) } /// Query a property tensor in a runnable model. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_property( model: *const TractRunnable, name: *const i8, tensor: *mut *mut TractTensor, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, name, tensor); let name = CStr::from_ptr(name as _) .to_str() .context("failed to parse property name (not utf8)")? .to_owned(); let v = (*model).0.property(name).context("Property not found")?; *tensor = Box::into_raw(Box::new(TractTensor(v))); Ok(()) }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_runnable_release(runnable: *mut *mut TractRunnable) -> TRACT_RESULT { release!(runnable) } // TENSOR pub struct TractTensor(tract_rs::Tensor); /// Create a TractTensor from caller data and metadata. /// /// This call copies the data into tract space. All the pointers only need to be alive for the /// duration of the call. /// /// rank is the number of dimensions of the tensor (i.e. the length of the shape vector). /// /// The returned tensor must be destroyed by `tract_tensor_destroy`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_tensor_from_bytes( datum_type: DatumType, rank: usize, shape: *const usize, data: *mut c_void, tensor: *mut *mut TractTensor, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(tensor); *tensor = std::ptr::null_mut(); let shape = std::slice::from_raw_parts(shape, rank); let len = shape.iter().product::(); let data = std::slice::from_raw_parts(data as *const u8, len * datum_type.size_of()); let it = Tensor::from_bytes(datum_type, shape, data)?; *tensor = Box::into_raw(Box::new(TractTensor(it))); Ok(()) }) } /// Write a tensor as a debug string /// /// The returned string must be freed by the caller using tract_free_cstring. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_tensor_dump( tensor: *const TractTensor, spec: *mut *mut c_char, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(tensor, spec); *spec = CString::new(format!("{:?}", (*tensor).0))?.into_raw(); Ok(()) }) } /// Convert a tensor to a new datum type. /// /// This function will perform a cheap shallow clone if the destination type is /// the same as the current type, otherwise it returns a newly allocated Tensor instead. /// /// In both cases, the returned tensor must be destroyed by `tract_tensor_destroy`. /// The input tensor is not consumed, it still need to be destroyed. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_tensor_convert_to( input: *const TractTensor, datum_type: DatumType, output: *mut *mut TractTensor, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(input, output); *output = std::ptr::null_mut(); let new = (*input).0.convert_to(datum_type)?; *output = Box::into_raw(Box::new(TractTensor(new))); Ok(()) }) } /// Destroy a tensor. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_tensor_destroy(tensor: *mut *mut TractTensor) -> TRACT_RESULT { release!(tensor) } /// Inspect part of a tensor. Except `tensor`, all argument pointers can be null if only some specific bits /// are required. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_tensor_as_bytes( tensor: *mut TractTensor, datum_type: *mut DatumType, rank: *mut usize, shape: *mut *const usize, data: *mut *const c_void, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(tensor); let tensor = &(*tensor).0; let bits = tensor.as_bytes()?; if !datum_type.is_null() { *datum_type = bits.0; } if !rank.is_null() { *rank = bits.1.len(); } if !shape.is_null() { *shape = bits.1.as_ptr(); } if !data.is_null() { *data = bits.2.as_ptr() as _; } Ok(()) }) } // STATE pub struct TractState(tract_rs::State); /// Run a turn on a model state /// /// `inputs` is a pointer to an pre-existing array of input TractTensor. Its length *must* be equal /// to the number of inputs of the models. The function does not take ownership of the input /// tensors. /// `outputs` is a pointer to a pre-existing array of TractTensor pointers that will be overwritten /// with pointers to output tensors. These tensors are under the responsiblity of the caller, it /// will have to release them with `tract_tensor_destroy`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_state_run( state: *mut TractState, inputs: *mut *mut TractTensor, outputs: *mut *mut TractTensor, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(state, inputs, outputs); state_run(&mut (*state).0, inputs, outputs) }) } /// Query a State input counts. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_state_input_count( state: *const TractState, inputs: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(state, inputs); let state = &(*state).0; *inputs = state.input_count()?; Ok(()) }) } /// Query an State output counts. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_state_output_count( state: *const TractState, outputs: *mut usize, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(state, outputs); let state = &(*state).0; *outputs = state.output_count()?; Ok(()) }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_state_destroy(state: *mut *mut TractState) -> TRACT_RESULT { release!(state) } // FACT pub struct TractFact(tract_rs::Fact); /// Gets the rank (aka number of axes/dimensions) of a fact. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_fact_rank(fact: *const TractFact, rank: *mut usize) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(fact, rank); *rank = (*fact).0.rank()?; Ok(()) }) } /// Extract the datum type of the fact. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_fact_datum_type( fact: *const TractFact, datum_type: *mut DatumType, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(fact, datum_type); *datum_type = (*fact).0.datum_type()?; Ok(()) }) } /// Extract the dimension from one dimension of the fact. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_fact_dim( fact: *const TractFact, axis: usize, dim: *mut *mut TractDim, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(fact, dim); let d = (*fact).0.dim(axis)?; *dim = Box::into_raw(Box::new(TractDim(d))); Ok(()) }) } /// Write a fact as its specification string. /// /// The returned string must be freed by the caller using tract_free_cstring. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_fact_dump( fact: *const TractFact, spec: *mut *mut c_char, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(fact, spec); *spec = CString::new(format!("{}", (*fact).0))?.into_raw(); Ok(()) }) } #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_fact_destroy(fact: *mut *mut TractFact) -> TRACT_RESULT { release!(fact) } // INFERENCE FACT pub struct TractInferenceFact(tract_rs::InferenceFact); /// Parse a fact specification string into an InferenceFact. /// /// The returned fact must be free with `tract_inference_fact_destroy`. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_fact_parse( model: *mut TractInferenceModel, spec: *const c_char, fact: *mut *mut TractInferenceFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(model, spec, fact); let spec = CStr::from_ptr(spec).to_str()?; let f: tract_rs::InferenceFact = spec.as_fact(&(*model).0)?.as_ref().clone(); *fact = Box::into_raw(Box::new(TractInferenceFact(f))); Ok(()) }) } /// Creates an empty inference fact. /// /// The returned fact must be freed by the caller using tract_inference_fact_destroy #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_fact_empty( fact: *mut *mut TractInferenceFact, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(fact); *fact = Box::into_raw(Box::new(TractInferenceFact(Default::default()))); Ok(()) }) } /// Write an inference fact as its specification string. /// /// The returned string must be freed by the caller using tract_free_cstring. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_fact_dump( fact: *const TractInferenceFact, spec: *mut *mut c_char, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(fact, spec); *spec = CString::new(format!("{}", (*fact).0))?.into_raw(); Ok(()) }) } /// Destroy a fact. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_inference_fact_destroy( fact: *mut *mut TractInferenceFact, ) -> TRACT_RESULT { release!(fact) } /// Dim pub struct TractDim(tract_rs::Dim); /// Substitute symbols by the provided values in the Dim, generating a new one. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_dim_eval( dim: *const TractDim, nb_symbols: usize, symbols: *const *const i8, values: *const i64, result: *mut *mut TractDim, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(dim, symbols, values, result); let mut table = vec![]; for i in 0..nb_symbols { let name = CStr::from_ptr(*symbols.add(i) as _) .to_str() .with_context(|| { format!("failed to parse symbol name for {i}th symbol (not utf8)") })? .to_owned(); table.push((name, *values.add(i))); } let r = (*dim).0.eval(table)?; *result = Box::into_raw(Box::new(TractDim(r))); Ok(()) }) } /// Try converting a Dim into an actual integer /// /// Will fail if the Dim contains symbols. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_dim_to_int64(fact: *const TractDim, i: *mut i64) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(fact, i); *i = (*fact).0.to_int64()?; Ok(()) }) } /// Write a dim as its specification string. /// /// The returned string must be freed by the caller using tract_free_cstring. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_dim_dump( dim: *const TractDim, spec: *mut *mut c_char, ) -> TRACT_RESULT { wrap(|| unsafe { check_not_null!(dim, spec); *spec = CString::new((*dim).0.to_string())?.into_raw(); Ok(()) }) } /// Destroy a dim. #[unsafe(no_mangle)] pub unsafe extern "C" fn tract_dim_destroy(dim: *mut *mut TractDim) -> TRACT_RESULT { release!(dim) } // MISC // HELPERS unsafe fn state_run( state: &mut State, inputs: *mut *mut TractTensor, outputs: *mut *mut TractTensor, ) -> Result<()> { unsafe { let values: Vec<_> = std::slice::from_raw_parts(inputs, state.input_count()?) .iter() .map(|tv| (**tv).0.clone()) .collect(); let values = state.run(values)?; for (i, value) in values.into_iter().enumerate() { *(outputs.add(i)) = Box::into_raw(Box::new(TractTensor(value))) } Ok(()) } } ================================================ FILE: api/generate-tract-h.sh ================================================ #!/bin/sh set -ex cargo install cbindgen cbindgen ffi > tract.h cp tract.h c mv tract.h proxy/sys ================================================ FILE: api/proxy/Cargo.toml ================================================ [package] name = "tract-proxy" version = "0.23.0-pre" license = "MIT OR Apache-2.0" authors = ["Mathieu Poumeyrol "] description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference" repository = "https://github.com/sonos/tract" keywords = [ "NeuralNetworks" ] categories = [ "science" ] autobenches = false edition = "2024" rust-version.workspace = true include = [ "Cargo.toml", "src/**/*.rs", "LICENSE*" ] [dependencies] anyhow.workspace = true boow.workspace = true home.workspace = true ndarray.workspace = true tract-api.workspace = true tract-proxy-sys.workspace = true [dev-dependencies] reqwest.workspace = true rustls.workspace = true tempfile.workspace = true serde_json.workspace = true ================================================ FILE: api/proxy/LICENSE ================================================ ## License Licensed under either of * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) at your option. ### Contribution Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. ================================================ FILE: api/proxy/LICENSE-APACHE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: api/proxy/LICENSE-MIT ================================================ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: api/proxy/ci.sh ================================================ #!/bin/sh ROOT=$(dirname $(realpath $0))/../.. set -ex cargo build --release -p tract-ffi $CARGO_EXTRA SO=$(cargo build --message-format=json --release -p tract-ffi $CARGO_EXTRA | grep cdylib | jshon -e filenames -e 0 -u) SO_PATH=$(dirname $SO) export TRACT_DYLIB_SEARCH_PATH=$SO_PATH export LD_LIBRARY_PATH=$SO_PATH cd $(dirname $(realpath $0)) cargo test $CARGO_EXTRA ================================================ FILE: api/proxy/src/lib.rs ================================================ use std::ffi::{CStr, CString}; use std::path::Path; use std::ptr::{null, null_mut}; use tract_api::*; use tract_proxy_sys as sys; use anyhow::{Context, Result}; use ndarray::*; macro_rules! check { ($expr:expr) => { unsafe { if $expr == sys::TRACT_RESULT_TRACT_RESULT_KO { let buf = CStr::from_ptr(sys::tract_get_last_error()); Err(anyhow::anyhow!(buf.to_string_lossy().to_string())) } else { Ok(()) } } }; } macro_rules! wrapper { ($new_type:ident, $c_type:ident, $dest:ident $(, $typ:ty )*) => { #[derive(Debug, Clone)] pub struct $new_type(*mut sys::$c_type $(, $typ)*); impl Drop for $new_type { fn drop(&mut self) { unsafe { sys::$dest(&mut self.0); } } } }; } pub fn nnef() -> Result { let mut nnef = null_mut(); check!(sys::tract_nnef_create(&mut nnef))?; Ok(Nnef(nnef)) } pub fn onnx() -> Result { let mut onnx = null_mut(); check!(sys::tract_onnx_create(&mut onnx))?; Ok(Onnx(onnx)) } pub fn version() -> &'static str { unsafe { CStr::from_ptr(sys::tract_version()).to_str().unwrap() } } wrapper!(Nnef, TractNnef, tract_nnef_destroy); impl NnefInterface for Nnef { type Model = Model; fn load(&self, path: impl AsRef) -> Result { let path = path.as_ref(); let path = CString::new( path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?, )?; let mut model = null_mut(); check!(sys::tract_nnef_load(self.0, path.as_ptr(), &mut model))?; Ok(Model(model)) } fn load_buffer(&self, data: &[u8]) -> Result { let mut model = null_mut(); check!(sys::tract_nnef_load_buffer(self.0, data.as_ptr() as _, data.len(), &mut model))?; Ok(Model(model)) } fn enable_tract_core(&mut self) -> Result<()> { check!(sys::tract_nnef_enable_tract_core(self.0)) } fn enable_tract_extra(&mut self) -> Result<()> { check!(sys::tract_nnef_enable_tract_extra(self.0)) } fn enable_tract_transformers(&mut self) -> Result<()> { check!(sys::tract_nnef_enable_tract_transformers(self.0)) } fn enable_onnx(&mut self) -> Result<()> { check!(sys::tract_nnef_enable_onnx(self.0)) } fn enable_pulse(&mut self) -> Result<()> { check!(sys::tract_nnef_enable_pulse(self.0)) } fn enable_extended_identifier_syntax(&mut self) -> Result<()> { check!(sys::tract_nnef_enable_extended_identifier_syntax(self.0)) } fn write_model_to_dir(&self, path: impl AsRef, model: &Model) -> Result<()> { let path = path.as_ref(); let path = CString::new( path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?, )?; check!(sys::tract_nnef_write_model_to_dir(self.0, path.as_ptr(), model.0))?; Ok(()) } fn write_model_to_tar(&self, path: impl AsRef, model: &Model) -> Result<()> { let path = path.as_ref(); let path = CString::new( path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?, )?; check!(sys::tract_nnef_write_model_to_tar(self.0, path.as_ptr(), model.0))?; Ok(()) } fn write_model_to_tar_gz(&self, path: impl AsRef, model: &Model) -> Result<()> { let path = path.as_ref(); let path = CString::new( path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?, )?; check!(sys::tract_nnef_write_model_to_tar_gz(self.0, path.as_ptr(), model.0))?; Ok(()) } } // ONNX wrapper!(Onnx, TractOnnx, tract_onnx_destroy); impl OnnxInterface for Onnx { type InferenceModel = InferenceModel; fn load(&self, path: impl AsRef) -> Result { let path = path.as_ref(); let path = CString::new( path.to_str().with_context(|| format!("Failed to re-encode {path:?} to uff-8"))?, )?; let mut model = null_mut(); check!(sys::tract_onnx_load(self.0, path.as_ptr(), &mut model))?; Ok(InferenceModel(model)) } fn load_buffer(&self, data: &[u8]) -> Result { let mut model = null_mut(); check!(sys::tract_onnx_load_buffer(self.0, data.as_ptr() as _, data.len(), &mut model))?; Ok(InferenceModel(model)) } } // INFERENCE MODEL wrapper!(InferenceModel, TractInferenceModel, tract_inference_model_destroy); impl InferenceModelInterface for InferenceModel { type Model = Model; type InferenceFact = InferenceFact; fn input_count(&self) -> Result { let mut count = 0; check!(sys::tract_inference_model_input_count(self.0, &mut count))?; Ok(count) } fn output_count(&self) -> Result { let mut count = 0; check!(sys::tract_inference_model_output_count(self.0, &mut count))?; Ok(count) } fn input_name(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_inference_model_input_name(self.0, id, &mut ptr))?; unsafe { let ret = CStr::from_ptr(ptr).to_str()?.to_owned(); sys::tract_free_cstring(ptr); Ok(ret) } } fn output_name(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_inference_model_output_name(self.0, id, &mut ptr))?; unsafe { let ret = CStr::from_ptr(ptr).to_str()?.to_owned(); sys::tract_free_cstring(ptr); Ok(ret) } } fn input_fact(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_inference_model_input_fact(self.0, id, &mut ptr))?; Ok(InferenceFact(ptr)) } fn set_input_fact( &mut self, id: usize, fact: impl AsFact, ) -> Result<()> { let fact = fact.as_fact(self)?; check!(sys::tract_inference_model_set_input_fact(self.0, id, fact.0))?; Ok(()) } fn output_fact(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_inference_model_output_fact(self.0, id, &mut ptr))?; Ok(InferenceFact(ptr)) } fn set_output_fact( &mut self, id: usize, fact: impl AsFact, ) -> Result<()> { let fact = fact.as_fact(self)?; check!(sys::tract_inference_model_set_output_fact(self.0, id, fact.0))?; Ok(()) } fn analyse(&mut self) -> Result<()> { check!(sys::tract_inference_model_analyse(self.0))?; Ok(()) } fn into_model(mut self) -> Result { let mut ptr = null_mut(); check!(sys::tract_inference_model_into_model(&mut self.0, &mut ptr))?; Ok(Model(ptr)) } } // MODEL wrapper!(Model, TractModel, tract_model_destroy); impl ModelInterface for Model { type Fact = Fact; type Tensor = Tensor; type Runnable = Runnable; fn input_count(&self) -> Result { let mut count = 0; check!(sys::tract_model_input_count(self.0, &mut count))?; Ok(count) } fn output_count(&self) -> Result { let mut count = 0; check!(sys::tract_model_output_count(self.0, &mut count))?; Ok(count) } fn input_name(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_model_input_name(self.0, id, &mut ptr))?; unsafe { let ret = CStr::from_ptr(ptr).to_str()?.to_owned(); sys::tract_free_cstring(ptr); Ok(ret) } } fn output_name(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_model_output_name(self.0, id, &mut ptr))?; unsafe { let ret = CStr::from_ptr(ptr).to_str()?.to_owned(); sys::tract_free_cstring(ptr); Ok(ret) } } fn input_fact(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_model_input_fact(self.0, id, &mut ptr))?; Ok(Fact(ptr)) } fn output_fact(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_model_output_fact(self.0, id, &mut ptr))?; Ok(Fact(ptr)) } fn into_runnable(self) -> Result { let mut model = self; let mut runnable = null_mut(); check!(sys::tract_model_into_runnable(&mut model.0, &mut runnable))?; Ok(Runnable(runnable)) } fn transform(&mut self, spec: impl Into) -> Result<()> { let transform = spec.into().to_transform_string(); let t = CString::new(transform)?; check!(sys::tract_model_transform(self.0, t.as_ptr()))?; Ok(()) } fn property_keys(&self) -> Result> { let mut len = 0; check!(sys::tract_model_property_count(self.0, &mut len))?; let mut keys = vec![null_mut(); len]; check!(sys::tract_model_property_names(self.0, keys.as_mut_ptr()))?; unsafe { keys.into_iter() .map(|pc| { let s = CStr::from_ptr(pc).to_str()?.to_owned(); sys::tract_free_cstring(pc); Ok(s) }) .collect() } } fn property(&self, name: impl AsRef) -> Result { let mut v = null_mut(); let name = CString::new(name.as_ref())?; check!(sys::tract_model_property(self.0, name.as_ptr(), &mut v))?; Ok(Tensor(v)) } fn parse_fact(&self, spec: &str) -> Result { let spec = CString::new(spec)?; let mut ptr = null_mut(); check!(sys::tract_model_parse_fact(self.0, spec.as_ptr(), &mut ptr))?; Ok(Fact(ptr)) } } // RUNTIME wrapper!(Runtime, TractRuntime, tract_runtime_release); pub fn runtime_for_name(name: &str) -> Result { let mut rt = null_mut(); let name = CString::new(name)?; check!(sys::tract_runtime_for_name(name.as_ptr(), &mut rt))?; Ok(Runtime(rt)) } impl RuntimeInterface for Runtime { type Runnable = Runnable; type Model = Model; fn name(&self) -> Result { let mut ptr = null_mut(); check!(sys::tract_runtime_name(self.0, &mut ptr))?; unsafe { let ret = CStr::from_ptr(ptr).to_str()?.to_owned(); sys::tract_free_cstring(ptr); Ok(ret) } } fn prepare(&self, model: Self::Model) -> Result { let mut model = model; let mut runnable = null_mut(); check!(sys::tract_runtime_prepare(self.0, &mut model.0, &mut runnable))?; Ok(Runnable(runnable)) } } // RUNNABLE wrapper!(Runnable, TractRunnable, tract_runnable_release); unsafe impl Send for Runnable {} unsafe impl Sync for Runnable {} impl RunnableInterface for Runnable { type Tensor = Tensor; type State = State; type Fact = Fact; fn run(&self, inputs: impl IntoInputs) -> Result> { StateInterface::run(&mut self.spawn_state()?, inputs.into_inputs()?) } fn spawn_state(&self) -> Result { let mut state = null_mut(); check!(sys::tract_runnable_spawn_state(self.0, &mut state))?; Ok(State(state)) } fn input_count(&self) -> Result { let mut count = 0; check!(sys::tract_runnable_input_count(self.0, &mut count))?; Ok(count) } fn output_count(&self) -> Result { let mut count = 0; check!(sys::tract_runnable_output_count(self.0, &mut count))?; Ok(count) } fn input_fact(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_runnable_input_fact(self.0, id, &mut ptr))?; Ok(Fact(ptr)) } fn output_fact(&self, id: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_runnable_output_fact(self.0, id, &mut ptr))?; Ok(Fact(ptr)) } fn property_keys(&self) -> Result> { let mut len = 0; check!(sys::tract_runnable_property_count(self.0, &mut len))?; let mut keys = vec![null_mut(); len]; check!(sys::tract_runnable_property_names(self.0, keys.as_mut_ptr()))?; unsafe { keys.into_iter() .map(|pc| { let s = CStr::from_ptr(pc).to_str()?.to_owned(); sys::tract_free_cstring(pc); Ok(s) }) .collect() } } fn property(&self, name: impl AsRef) -> Result { let mut v = null_mut(); let name = CString::new(name.as_ref())?; check!(sys::tract_runnable_property(self.0, name.as_ptr(), &mut v))?; Ok(Tensor(v)) } fn cost_json(&self) -> Result { let input: Option> = None; self.profile_json(input) } fn profile_json(&self, inputs: Option) -> Result where I: IntoIterator, IV: TryInto, IE: Into, { let inputs = if let Some(inputs) = inputs { let inputs = inputs .into_iter() .map(|i| i.try_into().map_err(|e| e.into())) .collect::>>()?; anyhow::ensure!(self.input_count()? == inputs.len()); Some(inputs) } else { None }; let mut iptrs: Option> = inputs.as_ref().map(|is| is.iter().map(|v| v.0).collect()); let mut json: *mut i8 = null_mut(); let values = iptrs.as_mut().map(|it| it.as_mut_ptr()).unwrap_or(null_mut()); check!(sys::tract_runnable_profile_json(self.0, values, &mut json))?; anyhow::ensure!(!json.is_null()); unsafe { let s = CStr::from_ptr(json).to_owned(); sys::tract_free_cstring(json); Ok(s.to_str()?.to_owned()) } } } // STATE wrapper!(State, TractState, tract_state_destroy); impl StateInterface for State { type Tensor = Tensor; type Fact = Fact; fn run(&mut self, inputs: impl IntoInputs) -> Result> { let inputs = inputs.into_inputs()?; let mut outputs = vec![null_mut(); self.output_count()?]; let mut inputs: Vec<_> = inputs.iter().map(|v| v.0).collect(); check!(sys::tract_state_run(self.0, inputs.as_mut_ptr(), outputs.as_mut_ptr()))?; let outputs = outputs.into_iter().map(Tensor).collect(); Ok(outputs) } fn input_count(&self) -> Result { let mut count = 0; check!(sys::tract_state_input_count(self.0, &mut count))?; Ok(count) } fn output_count(&self) -> Result { let mut count = 0; check!(sys::tract_state_output_count(self.0, &mut count))?; Ok(count) } } // TENSOR wrapper!(Tensor, TractTensor, tract_tensor_destroy); unsafe impl Send for Tensor {} unsafe impl Sync for Tensor {} impl TensorInterface for Tensor { fn from_bytes(dt: DatumType, shape: &[usize], data: &[u8]) -> Result { anyhow::ensure!(data.len() == shape.iter().product::() * dt.size_of()); let mut value = null_mut(); check!(sys::tract_tensor_from_bytes( dt as _, shape.len(), shape.as_ptr(), data.as_ptr() as _, &mut value ))?; Ok(Tensor(value)) } fn as_bytes(&self) -> Result<(DatumType, &[usize], &[u8])> { let mut rank = 0; let mut dt = sys::DatumType_TRACT_DATUM_TYPE_BOOL as _; let mut shape = null(); let mut data = null(); check!(sys::tract_tensor_as_bytes(self.0, &mut dt, &mut rank, &mut shape, &mut data))?; unsafe { let dt: DatumType = std::mem::transmute(dt); let shape = std::slice::from_raw_parts(shape, rank); let len: usize = shape.iter().product(); let data = std::slice::from_raw_parts(data as *const u8, len * dt.size_of()); Ok((dt, shape, data)) } } fn datum_type(&self) -> Result { let mut dt = sys::DatumType_TRACT_DATUM_TYPE_BOOL as _; check!(sys::tract_tensor_as_bytes( self.0, &mut dt, std::ptr::null_mut(), std::ptr::null_mut(), std::ptr::null_mut() ))?; unsafe { let dt: DatumType = std::mem::transmute(dt); Ok(dt) } } fn convert_to(&self, to: DatumType) -> Result { let mut new = null_mut(); check!(sys::tract_tensor_convert_to(self.0, to as _, &mut new))?; Ok(Tensor(new)) } } impl PartialEq for Tensor { fn eq(&self, other: &Self) -> bool { let Ok((me_dt, me_shape, me_data)) = self.as_bytes() else { return false }; let Ok((other_dt, other_shape, other_data)) = other.as_bytes() else { return false }; me_dt == other_dt && me_shape == other_shape && me_data == other_data } } tensor_from_to_ndarray!(); // FACT wrapper!(Fact, TractFact, tract_fact_destroy); impl Fact { fn new(model: &Model, spec: impl ToString) -> Result { let cstr = CString::new(spec.to_string())?; let mut fact = null_mut(); check!(sys::tract_model_parse_fact(model.0, cstr.as_ptr(), &mut fact))?; Ok(Fact(fact)) } fn dump(&self) -> Result { let mut ptr = null_mut(); check!(sys::tract_fact_dump(self.0, &mut ptr))?; unsafe { let s = CStr::from_ptr(ptr).to_owned(); sys::tract_free_cstring(ptr); Ok(s.to_str()?.to_owned()) } } } impl FactInterface for Fact { type Dim = Dim; fn datum_type(&self) -> Result { let mut dt = 0u32; check!(sys::tract_fact_datum_type(self.0, &mut dt as *const u32 as _))?; Ok(unsafe { std::mem::transmute::(dt) }) } fn rank(&self) -> Result { let mut rank = 0; check!(sys::tract_fact_rank(self.0, &mut rank))?; Ok(rank) } fn dim(&self, axis: usize) -> Result { let mut ptr = null_mut(); check!(sys::tract_fact_dim(self.0, axis, &mut ptr))?; Ok(Dim(ptr)) } } impl std::fmt::Display for Fact { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.dump() { Ok(s) => f.write_str(&s), Err(_) => Err(std::fmt::Error), } } } // INFERENCE FACT wrapper!(InferenceFact, TractInferenceFact, tract_inference_fact_destroy); impl InferenceFact { fn new(model: &InferenceModel, spec: impl ToString) -> Result { let cstr = CString::new(spec.to_string())?; let mut fact = null_mut(); check!(sys::tract_inference_fact_parse(model.0, cstr.as_ptr(), &mut fact))?; Ok(InferenceFact(fact)) } fn dump(&self) -> Result { let mut ptr = null_mut(); check!(sys::tract_inference_fact_dump(self.0, &mut ptr))?; unsafe { let s = CStr::from_ptr(ptr).to_owned(); sys::tract_free_cstring(ptr); Ok(s.to_str()?.to_owned()) } } } impl InferenceFactInterface for InferenceFact { fn empty() -> Result { let mut fact = null_mut(); check!(sys::tract_inference_fact_empty(&mut fact))?; Ok(InferenceFact(fact)) } } impl Default for InferenceFact { fn default() -> Self { Self::empty().unwrap() } } impl std::fmt::Display for InferenceFact { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.dump() { Ok(s) => f.write_str(&s), Err(_) => Err(std::fmt::Error), } } } as_inference_fact_impl!(InferenceModel, InferenceFact); as_fact_impl!(Model, Fact); // Dim wrapper!(Dim, TractDim, tract_dim_destroy); impl Dim { fn dump(&self) -> Result { let mut ptr = null_mut(); check!(sys::tract_dim_dump(self.0, &mut ptr))?; unsafe { let s = CStr::from_ptr(ptr).to_owned(); sys::tract_free_cstring(ptr); Ok(s.to_str()?.to_owned()) } } } impl DimInterface for Dim { fn eval(&self, values: impl IntoIterator, i64)>) -> Result { let (names, values): (Vec<_>, Vec<_>) = values.into_iter().unzip(); let c_strings: Vec = names.into_iter().map(|a| Ok(CString::new(a.as_ref())?)).collect::>()?; let ptrs: Vec<_> = c_strings.iter().map(|cs| cs.as_ptr()).collect(); let mut ptr = null_mut(); check!(sys::tract_dim_eval(self.0, ptrs.len(), ptrs.as_ptr(), values.as_ptr(), &mut ptr))?; Ok(Dim(ptr)) } fn to_int64(&self) -> Result { let mut i = 0; check!(sys::tract_dim_to_int64(self.0, &mut i))?; Ok(i) } } impl std::fmt::Display for Dim { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.dump() { Ok(s) => f.write_str(&s), Err(_) => Err(std::fmt::Error), } } } ================================================ FILE: api/proxy/sys/Cargo.toml ================================================ [package] name = "tract-proxy-sys" version = "0.23.0-pre" license = "MIT OR Apache-2.0" authors = ["Mathieu Poumeyrol "] description = "Tiny, no-nonsense, self contained, TensorFlow and ONNX inference" repository = "https://github.com/sonos/tract" keywords = [ "NeuralNetworks" ] categories = [ "science" ] autobenches = false edition = "2024" rust-version.workspace = true include = [ "Cargo.toml", "src/**/*.rs", "LICENSE*", "build.rs", "tract.h" ] [build-dependencies] bindgen = "0.72.1" ================================================ FILE: api/proxy/sys/build.rs ================================================ use std::env; use std::path::PathBuf; fn main() { println!("cargo:rerun-if-env-changed=TRACT_DYLIB_SEARCH_PATH"); println!("cargo:rerun-if-changed=tract.h"); if let Ok(path) = std::env::var("TRACT_DYLIB_SEARCH_PATH") { println!("cargo:rustc-link-search={path}"); } println!("cargo:rustc-link-lib=tract"); let bindings = bindgen::Builder::default() .header("tract.h") .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) .generate() .expect("Unable to generate bindings"); let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); bindings.write_to_file(out_path.join("bindings.rs")).expect("Couldn't write bindings!"); } ================================================ FILE: api/proxy/sys/src/lib.rs ================================================ #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] #![allow(non_snake_case)] #![allow(improper_ctypes)] #![allow(deref_nullptr)] #![allow(unsafe_op_in_unsafe_fn)] #![allow(clippy::redundant_static_lifetimes)] #![allow(clippy::useless_transmute)] include!(concat!(env!("OUT_DIR"), "/bindings.rs")); ================================================ FILE: api/proxy/sys/tract.h ================================================ #include #include #include #include typedef enum DatumType { TRACT_DATUM_TYPE_BOOL = 1, TRACT_DATUM_TYPE_U8 = 17, TRACT_DATUM_TYPE_U16 = 18, TRACT_DATUM_TYPE_U32 = 20, TRACT_DATUM_TYPE_U64 = 24, TRACT_DATUM_TYPE_I8 = 33, TRACT_DATUM_TYPE_I16 = 34, TRACT_DATUM_TYPE_I32 = 36, TRACT_DATUM_TYPE_I64 = 40, TRACT_DATUM_TYPE_F16 = 50, TRACT_DATUM_TYPE_F32 = 52, TRACT_DATUM_TYPE_F64 = 56, TRACT_DATUM_TYPE_COMPLEX_I16 = 66, TRACT_DATUM_TYPE_COMPLEX_I32 = 68, TRACT_DATUM_TYPE_COMPLEX_I64 = 72, TRACT_DATUM_TYPE_COMPLEX_F16 = 82, TRACT_DATUM_TYPE_COMPLEX_F32 = 84, TRACT_DATUM_TYPE_COMPLEX_F64 = 88, } DatumType; /** * Used as a return type of functions that can encounter errors. * If the function encountered an error, you can retrieve it using the `tract_get_last_error` * function */ typedef enum TRACT_RESULT { /** * The function returned successfully */ TRACT_RESULT_OK = 0, /** * The function returned an error */ TRACT_RESULT_KO = 1, } TRACT_RESULT; /** * Dim */ typedef struct TractDim TractDim; typedef struct TractFact TractFact; typedef struct TractInferenceFact TractInferenceFact; typedef struct TractInferenceModel TractInferenceModel; typedef struct TractModel TractModel; typedef struct TractNnef TractNnef; typedef struct TractOnnx TractOnnx; typedef struct TractRunnable TractRunnable; typedef struct TractRuntime TractRuntime; typedef struct TractState TractState; typedef struct TractTensor TractTensor; /** * Retrieve the last error that happened in this thread. A function encountered an error if * its return type is of type `TRACT_RESULT` and it returned `TRACT_RESULT_KO`. * * # Return value * It returns a pointer to a null-terminated UTF-8 string that will contain the error description. * Rust side keeps ownership of the buffer. It will be valid as long as no other tract calls is * performed by the thread. * If no error occured, null is returned. */ const char *tract_get_last_error(void); /** * Returns a pointer to a static buffer containing a null-terminated version string. * * The returned pointer must not be freed. */ const char *tract_version(void); /** * Frees a string allocated by libtract. */ void tract_free_cstring(char *ptr); /** * Creates an instance of an NNEF framework and parser that can be used to load and dump NNEF models. * * The returned object should be destroyed with `tract_nnef_destroy` once the model * has been loaded. */ enum TRACT_RESULT tract_nnef_create(struct TractNnef **nnef); enum TRACT_RESULT tract_nnef_enable_tract_core(struct TractNnef *nnef); enum TRACT_RESULT tract_nnef_enable_tract_extra(struct TractNnef *nnef); enum TRACT_RESULT tract_nnef_enable_tract_transformers(struct TractNnef *nnef); enum TRACT_RESULT tract_nnef_enable_onnx(struct TractNnef *nnef); enum TRACT_RESULT tract_nnef_enable_pulse(struct TractNnef *nnef); enum TRACT_RESULT tract_nnef_enable_extended_identifier_syntax(struct TractNnef *nnef); /** * Destroy the NNEF parser. It is safe to detroy the NNEF parser once the model had been loaded. */ enum TRACT_RESULT tract_nnef_destroy(struct TractNnef **nnef); /** * Parse and load an NNEF model as a tract TypedModel. * * `path` is a null-terminated utf-8 string pointer. It can be an archive (tar or tar.gz file) or a * directory. */ enum TRACT_RESULT tract_nnef_load(const struct TractNnef *nnef, const char *path, struct TractModel **model); /** * Parse and load an NNEF buffer as a tract TypedModel. * * `data` is a buffer pointer * `len` ise the buffer len */ enum TRACT_RESULT tract_nnef_load_buffer(const struct TractNnef *nnef, const void *data, uintptr_t len, struct TractModel **model); /** * Dump a TypedModel as a NNEF tar file. * * `path` is a null-terminated utf-8 string pointer to the `.tar` file to be created. * * This function creates a plain, non-compressed, archive. */ enum TRACT_RESULT tract_nnef_write_model_to_tar(const struct TractNnef *nnef, const char *path, const struct TractModel *model); /** * Dump a TypedModel as a NNEF .tar.gz file. * * `path` is a null-terminated utf-8 string pointer to the `.tar.gz` file to be created. */ enum TRACT_RESULT tract_nnef_write_model_to_tar_gz(const struct TractNnef *nnef, const char *path, const struct TractModel *model); /** * Dump a TypedModel as a NNEF directory. * * `path` is a null-terminated utf-8 string pointer to the directory to be created. * * This function creates a plain, non-compressed, archive. */ enum TRACT_RESULT tract_nnef_write_model_to_dir(const struct TractNnef *nnef, const char *path, const struct TractModel *model); /** * Creates an instance of an ONNX framework and parser that can be used to load models. * * The returned object should be destroyed with `tract_nnef_destroy` once the model * has been loaded. */ enum TRACT_RESULT tract_onnx_create(struct TractOnnx **onnx); /** * Destroy the NNEF parser. It is safe to detroy the NNEF parser once the model had been loaded. */ enum TRACT_RESULT tract_onnx_destroy(struct TractOnnx **onnx); /** * Parse and load an ONNX model as a tract InferenceModel. * * `path` is a null-terminated utf-8 string pointer. It must point to a `.onnx` model file. */ enum TRACT_RESULT tract_onnx_load(const struct TractOnnx *onnx, const char *path, struct TractInferenceModel **model); /** * Parse and load an ONNX buffer as a tract InferenceModel. * * `data` is a buffer pointer * `len` ise the buffer len */ enum TRACT_RESULT tract_onnx_load_buffer(const struct TractOnnx *onnx, const void *data, uintptr_t len, struct TractInferenceModel **model); /** * Query an InferenceModel input counts. */ enum TRACT_RESULT tract_inference_model_input_count(const struct TractInferenceModel *model, uintptr_t *inputs); /** * Query an InferenceModel output counts. */ enum TRACT_RESULT tract_inference_model_output_count(const struct TractInferenceModel *model, uintptr_t *outputs); /** * Query the name of a model input. * * The returned name must be freed by the caller using tract_free_cstring. */ enum TRACT_RESULT tract_inference_model_input_name(const struct TractInferenceModel *model, uintptr_t input, char **name); /** * Query the name of a model output. * * The returned name must be freed by the caller using tract_free_cstring. */ enum TRACT_RESULT tract_inference_model_output_name(const struct TractInferenceModel *model, uintptr_t output, int8_t **name); /** * Query a model input fact. */ enum TRACT_RESULT tract_inference_model_input_fact(const struct TractInferenceModel *model, uintptr_t input_id, struct TractInferenceFact **fact); /** * Set an input fact of an InferenceModel. * * The `fact` argument is only borrowed by this function, it still must be destroyed. * `fact` can be set to NULL to erase the current output fact of the model. */ enum TRACT_RESULT tract_inference_model_set_input_fact(struct TractInferenceModel *model, uintptr_t input_id, const struct TractInferenceFact *fact); /** * Query an output fact for an InferenceModel. * * The return model must be freed using `tract_inference_fact_destroy`. */ enum TRACT_RESULT tract_inference_model_output_fact(const struct TractInferenceModel *model, uintptr_t output_id, struct TractInferenceFact **fact); /** * Set an output fact of an InferenceModel. * * The `fact` argument is only borrowed by this function, it still must be destroyed. * `fact` can be set to NULL to erase the current output fact of the model. */ enum TRACT_RESULT tract_inference_model_set_output_fact(struct TractInferenceModel *model, uintptr_t output_id, const struct TractInferenceFact *fact); /** * Analyse an InferencedModel in-place. */ enum TRACT_RESULT tract_inference_model_analyse(struct TractInferenceModel *model); /** * Transform a fully analysed InferenceModel to a TypedModel. * * This function takes ownership of the InferenceModel `model` whether it succeeds * or not. `tract_inference_model_destroy` must not be used on `model`. * * On the other hand, caller will be owning the newly created typed model. */ enum TRACT_RESULT tract_inference_model_into_model(struct TractInferenceModel **model, struct TractModel **typed); /** * Destroy an InferenceModel. */ enum TRACT_RESULT tract_inference_model_destroy(struct TractInferenceModel **model); /** * Query an InferenceModel input counts. */ enum TRACT_RESULT tract_model_input_count(const struct TractModel *model, uintptr_t *inputs); /** * Query an InferenceModel output counts. */ enum TRACT_RESULT tract_model_output_count(const struct TractModel *model, uintptr_t *outputs); /** * Query the name of a model input. * * The returned name must be freed by the caller using tract_free_cstring. */ enum TRACT_RESULT tract_model_input_name(const struct TractModel *model, uintptr_t input, char **name); /** * Query the input fact of a model. * * Thre returned fact must be freed with tract_fact_destroy. */ enum TRACT_RESULT tract_model_input_fact(const struct TractModel *model, uintptr_t input_id, struct TractFact **fact); /** * Query the name of a model output. * * The returned name must be freed by the caller using tract_free_cstring. */ enum TRACT_RESULT tract_model_output_name(const struct TractModel *model, uintptr_t output, char **name); /** * Query the output fact of a model. * * Thre returned fact must be freed with tract_fact_destroy. */ enum TRACT_RESULT tract_model_output_fact(const struct TractModel *model, uintptr_t input_id, struct TractFact **fact); /** * Apply a transform to the model. */ enum TRACT_RESULT tract_model_transform(struct TractModel *model, const int8_t *transform); /** * Perform a profile of the model using the provided inputs. */ enum TRACT_RESULT tract_runnable_profile_json(struct TractRunnable *model, struct TractTensor **inputs, int8_t **json); /** * Convert a TypedModel into a TypedRunnableModel. * * This function transfers ownership of the `model` argument to the newly-created `runnable` model. * * Runnable are reference counted. When done, it should be released with `tract_runnable_release`. */ enum TRACT_RESULT tract_model_into_runnable(struct TractModel **model, struct TractRunnable **runnable); /** * Query the number of properties in a model. */ enum TRACT_RESULT tract_model_property_count(const struct TractModel *model, uintptr_t *count); /** * Query the properties names of a model. * * The "names" array should be big enough to fit `tract_model_property_count` string pointers. * * Each name will have to be freed using `tract_free_cstring`. */ enum TRACT_RESULT tract_model_property_names(const struct TractModel *model, int8_t **names); /** * Query a property tensor in a model. */ enum TRACT_RESULT tract_model_property(const struct TractModel *model, const int8_t *name, struct TractTensor **tensor); /** * Parse a fact specification string into an Fact. * * The returned fact must be free with `tract_fact_destroy`. */ enum TRACT_RESULT tract_model_parse_fact(struct TractModel *model, const char *spec, struct TractFact **fact); /** * Destroy a TypedModel. */ enum TRACT_RESULT tract_model_destroy(struct TractModel **model); /** * Creates an instance of a tract Runtime that can be used to run model on a specific * hardware / software stack (like a GPU). * * The returned object should be released with `tract_runtime_release`. */ enum TRACT_RESULT tract_runtime_for_name(const char *name, struct TractRuntime **nnef); /** * Query the name of a Runtime. * * The returned name must be freed by the caller using tract_free_cstring. */ enum TRACT_RESULT tract_runtime_name(const struct TractRuntime *runtime, char **name); /** * Convert a Model into a Runnable for this Runtime. * * This function transfers ownership of the `model` argument to the newly-created `runnable` model. * * Runnable are reference counted. When done, it should be released with `tract_runnable_release`. */ enum TRACT_RESULT tract_runtime_prepare(const struct TractRuntime *runtime, struct TractModel **model, struct TractRunnable **runnable); enum TRACT_RESULT tract_runtime_release(struct TractRuntime **runtime); /** * Spawn a session state from a runnable model. * * This function does not take ownership of the `runnable` object, it can be used again to spawn * other state instances. The runnable object is internally reference counted, it will be * kept alive as long as any associated `State` exists (or as long as the `runnable` is not * explicitely release with `tract_runnable_release`). * * `state` is a newly-created object. It should ultimately be detroyed with `tract_state_destroy`. */ enum TRACT_RESULT tract_runnable_spawn_state(struct TractRunnable *runnable, struct TractState **state); /** * Convenience function to run a stateless model. * * `inputs` is a pointer to an pre-existing array of input TractTensor. Its length *must* be equal * to the number of inputs of the models. The function does not take ownership of the input * tensors. * `outputs` is a pointer to a pre-existing array of TractTensor pointers that will be overwritten * with pointers to output tensors. These tensors are under the responsiblity of the caller, it * will have to release them with `tract_tensor_destroy`. */ enum TRACT_RESULT tract_runnable_run(struct TractRunnable *runnable, struct TractTensor **inputs, struct TractTensor **outputs); /** * Query a Runnable input counts. */ enum TRACT_RESULT tract_runnable_input_count(const struct TractRunnable *model, uintptr_t *inputs); /** * Query an Runnable output counts. */ enum TRACT_RESULT tract_runnable_output_count(const struct TractRunnable *model, uintptr_t *outputs); /** * Query the input fact of a runnable model. * * Thre returned fact must be freed with tract_fact_destroy. */ enum TRACT_RESULT tract_runnable_input_fact(const struct TractRunnable *runnable, uintptr_t input_id, struct TractFact **fact); /** * Query the output fact of a runnable model. * * Thre returned fact must be freed with tract_fact_destroy. */ enum TRACT_RESULT tract_runnable_output_fact(const struct TractRunnable *runnable, uintptr_t output_id, struct TractFact **fact); /** * Query the number of properties in a runnable model. */ enum TRACT_RESULT tract_runnable_property_count(const struct TractRunnable *model, uintptr_t *count); /** * Query the properties names of a runnable model. * * The "names" array should be big enough to fit `tract_model_property_count` string pointers. * * Each name will have to be freed using `tract_free_cstring`. */ enum TRACT_RESULT tract_runnable_property_names(const struct TractRunnable *model, int8_t **names); /** * Query a property tensor in a runnable model. */ enum TRACT_RESULT tract_runnable_property(const struct TractRunnable *model, const int8_t *name, struct TractTensor **tensor); enum TRACT_RESULT tract_runnable_release(struct TractRunnable **runnable); /** * Create a TractTensor from caller data and metadata. * * This call copies the data into tract space. All the pointers only need to be alive for the * duration of the call. * * rank is the number of dimensions of the tensor (i.e. the length of the shape vector). * * The returned tensor must be destroyed by `tract_tensor_destroy`. */ enum TRACT_RESULT tract_tensor_from_bytes(DatumType datum_type, uintptr_t rank, const uintptr_t *shape, void *data, struct TractTensor **tensor); /** * Write a tensor as a debug string * * The returned string must be freed by the caller using tract_free_cstring. */ enum TRACT_RESULT tract_tensor_dump(const struct TractTensor *tensor, char **spec); /** * Convert a tensor to a new datum type. * * This function will perform a cheap shallow clone if the destination type is * the same as the current type, otherwise it returns a newly allocated tensor instead. * * In both cases, the returned tensor must be destroyed by `tract_tensor_destroy`. * The input tensor is not consumed, it still need to be destroyed. */ enum TRACT_RESULT tract_tensor_convert_to(const struct TractTensor *input, DatumType datum_type, struct TractTensor **output); /** * Destroy a tensor. */ enum TRACT_RESULT tract_tensor_destroy(struct TractTensor **tensor); /** * Inspect part of a tensor. Except `tensor`, all argument pointers can be null if only some specific bits * are required. */ enum TRACT_RESULT tract_tensor_as_bytes(struct TractTensor *tensor, DatumType *datum_type, uintptr_t *rank, const uintptr_t **shape, const void **data); /** * Run a turn on a model state * * `inputs` is a pointer to an pre-existing array of input TractTensor. Its length *must* be equal * to the number of inputs of the models. The function does not take ownership of the input * tensors. * `outputs` is a pointer to a pre-existing array of TractTensor pointers that will be overwritten * with pointers to output tensors. These tensors are under the responsiblity of the caller, it * will have to release them with `tract_tensor_destroy`. */ enum TRACT_RESULT tract_state_run(struct TractState *state, struct TractTensor **inputs, struct TractTensor **outputs); /** * Query a State input counts. */ enum TRACT_RESULT tract_state_input_count(const struct TractState *state, uintptr_t *inputs); /** * Query an State output counts. */ enum TRACT_RESULT tract_state_output_count(const struct TractState *state, uintptr_t *outputs); enum TRACT_RESULT tract_state_destroy(struct TractState **state); /** * Gets the rank (aka number of axes/dimensions) of a fact. */ enum TRACT_RESULT tract_fact_rank(const struct TractFact *fact, uintptr_t *rank); /** * Extract the datum type of the fact. */ enum TRACT_RESULT tract_fact_datum_type(const struct TractFact *fact, DatumType *datum_type); /** * Extract the dimension from one dimension of the fact. */ enum TRACT_RESULT tract_fact_dim(const struct TractFact *fact, uintptr_t axis, struct TractDim **dim); /** * Write a fact as its specification string. * * The returned string must be freed by the caller using tract_free_cstring. */ enum TRACT_RESULT tract_fact_dump(const struct TractFact *fact, char **spec); enum TRACT_RESULT tract_fact_destroy(struct TractFact **fact); /** * Parse a fact specification string into an InferenceFact. * * The returned fact must be free with `tract_inference_fact_destroy`. */ enum TRACT_RESULT tract_inference_fact_parse(struct TractInferenceModel *model, const char *spec, struct TractInferenceFact **fact); /** * Creates an empty inference fact. * * The returned fact must be freed by the caller using tract_inference_fact_destroy */ enum TRACT_RESULT tract_inference_fact_empty(struct TractInferenceFact **fact); /** * Write an inference fact as its specification string. * * The returned string must be freed by the caller using tract_free_cstring. */ enum TRACT_RESULT tract_inference_fact_dump(const struct TractInferenceFact *fact, char **spec); /** * Destroy a fact. */ enum TRACT_RESULT tract_inference_fact_destroy(struct TractInferenceFact **fact); /** * Substitute symbols by the provided values in the Dim, generating a new one. */ enum TRACT_RESULT tract_dim_eval(const struct TractDim *dim, uintptr_t nb_symbols, const int8_t *const *symbols, const int64_t *values, struct TractDim **result); /** * Try converting a Dim into an actual integer * * Will fail if the Dim contains symbols. */ enum TRACT_RESULT tract_dim_to_int64(const struct TractDim *fact, int64_t *i); /** * Write a dim as its specification string. * * The returned string must be freed by the caller using tract_free_cstring. */ enum TRACT_RESULT tract_dim_dump(const struct TractDim *dim, char **spec); /** * Destroy a dim. */ enum TRACT_RESULT tract_dim_destroy(struct TractDim **dim); ================================================ FILE: api/proxy/tests/mobilenet.rs ================================================ use tract_api::*; use tract_proxy::*; include!("../../tests/mobilenet/mod.rs"); ================================================ FILE: api/py/.gitignore ================================================ __pycache__ *.so *.egg-info *.onnx build mobilenet_v2_1.0.onnx.nnef.tgz rust-workspace dist ================================================ FILE: api/py/MANIFEST.in ================================================ graft rust-workspace graft docs ================================================ FILE: api/py/_static/redirect-index.html ================================================

Redirecting to documentation...

================================================ FILE: api/py/_static/version-switcher.js ================================================ // Version switcher for multi-version gh-pages docs. // Reads versions.json (mike-compatible format) from the site root and injects // a