Full Code of Tencent/ncnn for AI

master 939f24fc2b44 cached
3805 files
33.0 MB
4.2M tokens
2590 symbols
1 requests
Copy disabled (too large) Download .txt
Showing preview only (16,537K chars total). Download the full file to get everything.
Repository: Tencent/ncnn
Branch: master
Commit: 939f24fc2b44
Files: 3805
Total size: 33.0 MB

Directory structure:
gitextract_nmtq5ath/

├── .astylerc
├── .clang-format
├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug.md
│   │   ├── model-convert.md
│   │   ├── others.md
│   │   └── quantization.md
│   ├── dependabot.yml
│   ├── labeler.yml
│   └── workflows/
│       ├── android.yml
│       ├── code-format-msg.yml
│       ├── code-format.yml
│       ├── codeql-analysis.yml
│       ├── compare-binary-size-pr-comment.yml
│       ├── compare-binary-size.yml
│       ├── elf-riscv32.yml
│       ├── elf-riscv64.yml
│       ├── esp32.yml
│       ├── harmonyos.yml
│       ├── ios.yml
│       ├── labeler.yml
│       ├── linux-aarch64.yml
│       ├── linux-arm.yml
│       ├── linux-loongarch64.yml
│       ├── linux-mips.yml
│       ├── linux-mips64.yml
│       ├── linux-ppc64.yml
│       ├── linux-riscv32.yml
│       ├── linux-riscv64.yml
│       ├── linux-x64-cpu-clang.yml
│       ├── linux-x64-cpu-gcc-musl.yml
│       ├── linux-x64-cpu-gcc.yml
│       ├── linux-x64-gpu-clang.yml
│       ├── linux-x64-gpu-gcc.yml
│       ├── linux-x64-sde.yml
│       ├── linux-x86-cpu-clang.yml
│       ├── linux-x86-cpu-gcc.yml
│       ├── mac-catalyst.yml
│       ├── macos.yml
│       ├── pnnx.yml
│       ├── python.yml
│       ├── release-python.yml
│       ├── release.yml
│       ├── sync-wiki.yml
│       ├── test-coverage.yml
│       ├── tvos.yml
│       ├── visionos.yml
│       ├── watchos.yml
│       ├── web-assembly.yml
│       ├── windows-arm.yml
│       ├── windows-clang.yml
│       ├── windows-mingw.yml
│       ├── windows-xp.yml
│       └── windows.yml
├── .gitignore
├── .gitmodules
├── CITATION.cff
├── CMakeLists.txt
├── CONTRIBUTING.md
├── Info.plist
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── benchmark/
│   ├── CMakeLists.txt
│   ├── FastestDet.param
│   ├── README.md
│   ├── RankCards/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── Rcards.h
│   │   └── main.cpp
│   ├── alexnet.param
│   ├── benchncnn.cpp
│   ├── benchncnn_param_data.h.in
│   ├── blazeface.param
│   ├── efficientnet_b0.param
│   ├── efficientnetv2_b0.param
│   ├── googlenet.param
│   ├── googlenet_int8.param
│   ├── mnasnet.param
│   ├── mobilenet.param
│   ├── mobilenet_int8.param
│   ├── mobilenet_ssd.param
│   ├── mobilenet_ssd_int8.param
│   ├── mobilenet_v2.param
│   ├── mobilenet_v3.param
│   ├── mobilenet_yolo.param
│   ├── mobilenetv2_yolov3.param
│   ├── nanodet_m.param
│   ├── proxylessnasnet.param
│   ├── regnety_400m.param
│   ├── resnet18.param
│   ├── resnet18_int8.param
│   ├── resnet50.param
│   ├── resnet50_int8.param
│   ├── shufflenet.param
│   ├── shufflenet_v2.param
│   ├── squeezenet.param
│   ├── squeezenet_int8.param
│   ├── squeezenet_ssd.param
│   ├── squeezenet_ssd_int8.param
│   ├── vgg16.param
│   ├── vgg16_int8.param
│   ├── vision_transformer.param
│   ├── yolo-fastest-1.1.param
│   ├── yolo-fastestv2.param
│   └── yolov4-tiny.param
├── build-android.cmd
├── build.sh
├── cmake/
│   ├── ncnnConfig.cmake.in
│   ├── ncnn_add_layer.cmake
│   ├── ncnn_add_param.cmake
│   ├── ncnn_add_shader.cmake
│   ├── ncnn_generate_avx512_source.cmake
│   ├── ncnn_generate_avx_source.cmake
│   ├── ncnn_generate_fma_source.cmake
│   ├── ncnn_generate_lasx_source.cmake
│   ├── ncnn_generate_lsx_source.cmake
│   ├── ncnn_generate_msa_source.cmake
│   ├── ncnn_generate_param_header.cmake
│   ├── ncnn_generate_rvv_source.cmake
│   ├── ncnn_generate_shader_comp_header.cmake
│   ├── ncnn_generate_xtheadvector_source.cmake
│   └── run_test.cmake
├── codeformat.sh
├── docs/
│   ├── Home.md
│   ├── application-with-ncnn-inside.md
│   ├── benchmark/
│   │   ├── the-benchmark-of-caffe-android-lib,-mini-caffe,-and-ncnn.md
│   │   └── vulkan-conformance-test.md
│   ├── developer-guide/
│   │   ├── aarch64-mix-assembly-and-intrinsic.md
│   │   ├── add-custom-layer.zh.md
│   │   ├── arm-a53-a55-dual-issue.md
│   │   ├── armv7-mix-assembly-and-intrinsic.md
│   │   ├── binaryop-broadcasting.md
│   │   ├── build-ncnn-on-windows-xp.zh.md
│   │   ├── custom-allocator.md
│   │   ├── element-packing.md
│   │   ├── expression.md
│   │   ├── glsl-extension.md
│   │   ├── glsl-extension.zh.md
│   │   ├── how-to-be-a-contributor.zh.md
│   │   ├── how-to-implement-custom-layer-step-by-step.md
│   │   ├── how-to-write-a-neon-optimized-op-kernel.md
│   │   ├── how-to-write-a-sse-optimized-op-kernel.zh.md
│   │   ├── kvcache.md
│   │   ├── layer-feat-mask.md
│   │   ├── layer-support-behavior.md
│   │   ├── low-level-operation-api.md
│   │   ├── ncnn-tips-and-tricks.zh.md
│   │   ├── new-model-load-api.md
│   │   ├── new-param-load-api.md
│   │   ├── operation-param-weight-table.md
│   │   ├── operators.md
│   │   ├── param-and-model-file-structure.md
│   │   ├── preload-practice.zh.md
│   │   ├── tensorflow-op-combination.md
│   │   └── vulkan-driver-loader.md
│   ├── faq.en.md
│   ├── faq.md
│   ├── how-to-build/
│   │   ├── build-mlir2ncnn.md
│   │   └── how-to-build.md
│   └── how-to-use-and-FAQ/
│       ├── FAQ-ncnn-produce-wrong-result.md
│       ├── FAQ-ncnn-protobuf-problem.zh.md
│       ├── FAQ-ncnn-throw-error.md
│       ├── FAQ-ncnn-vulkan.md
│       ├── build-minimal-library.md
│       ├── efficient-roi-resize-rotate.md
│       ├── ncnn-load-model.md
│       ├── openmp-best-practice.md
│       ├── openmp-best-practice.zh.md
│       ├── quantized-int8-inference.md
│       ├── use-ncnn-with-alexnet.md
│       ├── use-ncnn-with-alexnet.zh.md
│       ├── use-ncnn-with-opencv.md
│       ├── use-ncnn-with-own-project.md
│       ├── use-ncnn-with-pytorch-or-onnx.md
│       ├── use-ncnnoptimize-to-optimize-model.md
│       └── vulkan-notes.md
├── examples/
│   ├── CMakeLists.txt
│   ├── arcface.cpp
│   ├── fasterrcnn.cpp
│   ├── mobilenetssd.cpp
│   ├── mobilenetv2ssdlite.cpp
│   ├── mobilenetv3ssdlite.cpp
│   ├── nanodet.cpp
│   ├── nanodetplus_pnnx.cpp
│   ├── p2pnet.cpp
│   ├── peleenetssd_seg.cpp
│   ├── piper.cpp
│   ├── ppocrv5.cpp
│   ├── ppocrv5_dict.h
│   ├── retinaface.cpp
│   ├── rfcn.cpp
│   ├── rvm.cpp
│   ├── scrfd.cpp
│   ├── scrfd_crowdhuman.cpp
│   ├── shufflenetv2.cpp
│   ├── simplepose.cpp
│   ├── squeezencnn/
│   │   └── README.md
│   ├── squeezenet.cpp
│   ├── squeezenet_c_api.cpp
│   ├── squeezenet_v1.1.caffemodel
│   ├── squeezenet_v1.1.param
│   ├── squeezenet_v1.1.prototxt
│   ├── squeezenetssd.cpp
│   ├── synset_words.txt
│   ├── whisper.cpp
│   ├── yolact.cpp
│   ├── yolo11.cpp
│   ├── yolo11_cls.cpp
│   ├── yolo11_obb.cpp
│   ├── yolo11_pose.cpp
│   ├── yolo11_seg.cpp
│   ├── yolov2.cpp
│   ├── yolov3.cpp
│   ├── yolov4.cpp
│   ├── yolov5.cpp
│   ├── yolov5_pnnx.cpp
│   ├── yolov7.cpp
│   ├── yolov7_pnnx.cpp
│   ├── yolov8.cpp
│   ├── yolov8_cls.cpp
│   ├── yolov8_obb.cpp
│   ├── yolov8_pose.cpp
│   ├── yolov8_seg.cpp
│   ├── yoloworld.cpp
│   └── yolox.cpp
├── package.sh
├── pyproject.toml
├── python/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── examples/
│   │   ├── fasterrcnn.py
│   │   ├── mobilenetssd.py
│   │   ├── mobilenetv2ssdlite.py
│   │   ├── mobilenetv3ssdlite.py
│   │   ├── model_zoo.py
│   │   ├── nanodet.py
│   │   ├── peleenetssd.py
│   │   ├── retinaface.py
│   │   ├── rfcn.py
│   │   ├── shufflenetv2.py
│   │   ├── simplepose.py
│   │   ├── squeezenet.py
│   │   ├── squeezenetssd.py
│   │   ├── yolact.py
│   │   ├── yolov2.py
│   │   ├── yolov3.py
│   │   ├── yolov4.py
│   │   ├── yolov5.py
│   │   └── yolov8.py
│   ├── ncnn/
│   │   ├── __init__.py
│   │   ├── model_zoo/
│   │   │   ├── __init__.py
│   │   │   ├── fasterrcnn.py
│   │   │   ├── mobilenetssd.py
│   │   │   ├── mobilenetv2ssdlite.py
│   │   │   ├── mobilenetv3ssdlite.py
│   │   │   ├── model_store.py
│   │   │   ├── model_zoo.py
│   │   │   ├── nanodet.py
│   │   │   ├── peleenetssd.py
│   │   │   ├── retinaface.py
│   │   │   ├── rfcn.py
│   │   │   ├── shufflenetv2.py
│   │   │   ├── simplepose.py
│   │   │   ├── squeezenet.py
│   │   │   ├── squeezenetssd.py
│   │   │   ├── yolact.py
│   │   │   ├── yolov2.py
│   │   │   ├── yolov3.py
│   │   │   ├── yolov4.py
│   │   │   ├── yolov5.py
│   │   │   ├── yolov7.py
│   │   │   └── yolov8.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── download.py
│   │       ├── functional.py
│   │       ├── objects.py
│   │       └── visual.py
│   ├── requirements.txt
│   ├── setup.py.i
│   ├── src/
│   │   ├── main.cpp
│   │   ├── pybind11_allocator.h
│   │   ├── pybind11_bind.h
│   │   ├── pybind11_datareader.h
│   │   ├── pybind11_layer.h
│   │   ├── pybind11_mat.h
│   │   └── pybind11_modelbin.h
│   └── tests/
│       ├── benchmark.py
│       ├── custom_layer.param
│       ├── test.param
│       ├── test_allocator.py
│       ├── test_blob.py
│       ├── test_extractor.py
│       ├── test_mat.py
│       ├── test_net.py
│       ├── test_option.py
│       ├── test_paramdict.py
│       ├── test_vulkan_allocator.py
│       └── test_vulkan_device.py
├── setup.py
├── src/
│   ├── CMakeLists.txt
│   ├── allocator.cpp
│   ├── allocator.h
│   ├── benchmark.cpp
│   ├── benchmark.h
│   ├── blob.cpp
│   ├── blob.h
│   ├── c_api.cpp
│   ├── c_api.h
│   ├── command.cpp
│   ├── command.h
│   ├── convert_ycbcr.comp
│   ├── cpu.cpp
│   ├── cpu.h
│   ├── datareader.cpp
│   ├── datareader.h
│   ├── expression.cpp
│   ├── expression.h
│   ├── gpu.cpp
│   ├── gpu.h
│   ├── layer/
│   │   ├── absval.cpp
│   │   ├── absval.h
│   │   ├── argmax.cpp
│   │   ├── argmax.h
│   │   ├── arm/
│   │   │   ├── absval_arm.cpp
│   │   │   ├── absval_arm.h
│   │   │   ├── arm_activation.h
│   │   │   ├── arm_usability.h
│   │   │   ├── batchnorm_arm.cpp
│   │   │   ├── batchnorm_arm.h
│   │   │   ├── batchnorm_arm_asimdhp.cpp
│   │   │   ├── bias_arm.cpp
│   │   │   ├── bias_arm.h
│   │   │   ├── binaryop_arm.cpp
│   │   │   ├── binaryop_arm.h
│   │   │   ├── binaryop_arm_asimdhp.cpp
│   │   │   ├── cast_arm.cpp
│   │   │   ├── cast_arm.h
│   │   │   ├── cast_arm_bf16.cpp
│   │   │   ├── cast_arm_vfpv4.cpp
│   │   │   ├── cast_bf16.h
│   │   │   ├── cast_fp16.h
│   │   │   ├── clip_arm.cpp
│   │   │   ├── clip_arm.h
│   │   │   ├── clip_arm_asimdhp.cpp
│   │   │   ├── concat_arm.cpp
│   │   │   ├── concat_arm.h
│   │   │   ├── convolution1d_arm.cpp
│   │   │   ├── convolution1d_arm.h
│   │   │   ├── convolution1d_arm_asimdhp.cpp
│   │   │   ├── convolution1d_packed.h
│   │   │   ├── convolution1d_packed_bf16s.h
│   │   │   ├── convolution1d_packed_fp16s.h
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_2x2.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_int8.h
│   │   │   ├── convolution_3x3_pack1to4.h
│   │   │   ├── convolution_3x3_pack1to4_bf16s.h
│   │   │   ├── convolution_3x3_pack1to4_fp16s.h
│   │   │   ├── convolution_3x3_pack1to8_fp16s.h
│   │   │   ├── convolution_3x3_pack4.h
│   │   │   ├── convolution_3x3_pack4_bf16s.h
│   │   │   ├── convolution_3x3_pack4_fp16s.h
│   │   │   ├── convolution_3x3_pack4to1.h
│   │   │   ├── convolution_3x3_pack8_fp16s.h
│   │   │   ├── convolution_3x3_winograd.h
│   │   │   ├── convolution_3x3_winograd_bf16s.h
│   │   │   ├── convolution_3x3_winograd_fp16s.h
│   │   │   ├── convolution_3x3_winograd_int8.h
│   │   │   ├── convolution_4x4.h
│   │   │   ├── convolution_5x5.h
│   │   │   ├── convolution_5x5_pack4.h
│   │   │   ├── convolution_5x5_pack4_bf16s.h
│   │   │   ├── convolution_5x5_pack8_fp16s.h
│   │   │   ├── convolution_7x7.h
│   │   │   ├── convolution_7x7_pack1to4.h
│   │   │   ├── convolution_7x7_pack1to4_bf16s.h
│   │   │   ├── convolution_7x7_pack1to8_fp16s.h
│   │   │   ├── convolution_arm.cpp
│   │   │   ├── convolution_arm.h
│   │   │   ├── convolution_arm_asimddp.cpp
│   │   │   ├── convolution_arm_asimdhp.cpp
│   │   │   ├── convolution_arm_i8mm.cpp
│   │   │   ├── convolution_im2col_gemm.h
│   │   │   ├── convolution_im2col_gemm_bf16s.h
│   │   │   ├── convolution_im2col_gemm_bf16s_fp16s.h
│   │   │   ├── convolution_im2col_gemm_fp16s.h
│   │   │   ├── convolution_im2col_gemm_int8.h
│   │   │   ├── convolution_packed.h
│   │   │   ├── convolution_packed_bf16s.h
│   │   │   ├── convolution_packed_fp16s.h
│   │   │   ├── convolution_packed_int8.h
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_fp16s.h
│   │   │   ├── convolutiondepthwise_3x3_int8.h
│   │   │   ├── convolutiondepthwise_3x3_pack4.h
│   │   │   ├── convolutiondepthwise_3x3_pack4_bf16s.h
│   │   │   ├── convolutiondepthwise_3x3_pack8_fp16s.h
│   │   │   ├── convolutiondepthwise_3x3_pack8_int8.h
│   │   │   ├── convolutiondepthwise_5x5.h
│   │   │   ├── convolutiondepthwise_5x5_pack4.h
│   │   │   ├── convolutiondepthwise_5x5_pack4_bf16s.h
│   │   │   ├── convolutiondepthwise_5x5_pack8_fp16s.h
│   │   │   ├── convolutiondepthwise_arm.cpp
│   │   │   ├── convolutiondepthwise_arm.h
│   │   │   ├── convolutiondepthwise_arm_asimdhp.cpp
│   │   │   ├── crop_arm.cpp
│   │   │   ├── crop_arm.h
│   │   │   ├── deconvolution_3x3.h
│   │   │   ├── deconvolution_4x4.h
│   │   │   ├── deconvolution_4x4_fp16s.h
│   │   │   ├── deconvolution_arm.cpp
│   │   │   ├── deconvolution_arm.h
│   │   │   ├── deconvolution_arm_asimdhp.cpp
│   │   │   ├── deconvolutiondepthwise_arm.cpp
│   │   │   ├── deconvolutiondepthwise_arm.h
│   │   │   ├── deconvolutiondepthwise_arm_asimdhp.cpp
│   │   │   ├── dequantize_arm.cpp
│   │   │   ├── dequantize_arm.h
│   │   │   ├── dequantize_arm_asimdhp.cpp
│   │   │   ├── dropout_arm.cpp
│   │   │   ├── dropout_arm.h
│   │   │   ├── eltwise_arm.cpp
│   │   │   ├── eltwise_arm.h
│   │   │   ├── eltwise_arm_asimdhp.cpp
│   │   │   ├── flatten_arm.cpp
│   │   │   ├── flatten_arm.h
│   │   │   ├── gelu_arm.cpp
│   │   │   ├── gelu_arm.h
│   │   │   ├── gelu_arm_asimdhp.cpp
│   │   │   ├── gemm_arm.cpp
│   │   │   ├── gemm_arm.h
│   │   │   ├── gemm_arm_asimddp.cpp
│   │   │   ├── gemm_arm_asimdfhm.cpp
│   │   │   ├── gemm_arm_asimdhp.cpp
│   │   │   ├── gemm_arm_i8mm.cpp
│   │   │   ├── gemm_arm_vfpv4.cpp
│   │   │   ├── gemm_bf16s.h
│   │   │   ├── gemm_bf16s_fp16s.h
│   │   │   ├── gemm_fp16s.h
│   │   │   ├── gemm_int8.h
│   │   │   ├── gemm_int8_bf16s.h
│   │   │   ├── gemm_int8_fp16s.h
│   │   │   ├── groupnorm_arm.cpp
│   │   │   ├── groupnorm_arm.h
│   │   │   ├── groupnorm_arm_asimdhp.cpp
│   │   │   ├── gru_arm.cpp
│   │   │   ├── gru_arm.h
│   │   │   ├── gru_arm_asimddp.cpp
│   │   │   ├── gru_arm_asimdhp.cpp
│   │   │   ├── gru_arm_vfpv4.cpp
│   │   │   ├── gru_int8.h
│   │   │   ├── hardsigmoid_arm.cpp
│   │   │   ├── hardsigmoid_arm.h
│   │   │   ├── hardsigmoid_arm_asimdhp.cpp
│   │   │   ├── hardswish_arm.cpp
│   │   │   ├── hardswish_arm.h
│   │   │   ├── hardswish_arm_asimdhp.cpp
│   │   │   ├── innerproduct_arm.cpp
│   │   │   ├── innerproduct_arm.h
│   │   │   ├── innerproduct_arm_asimdfhm.cpp
│   │   │   ├── innerproduct_arm_asimdhp.cpp
│   │   │   ├── innerproduct_arm_vfpv4.cpp
│   │   │   ├── innerproduct_fp16s.h
│   │   │   ├── innerproduct_gemm_fp16s.h
│   │   │   ├── instancenorm_arm.cpp
│   │   │   ├── instancenorm_arm.h
│   │   │   ├── instancenorm_arm_asimdhp.cpp
│   │   │   ├── interp_arm.cpp
│   │   │   ├── interp_arm.h
│   │   │   ├── interp_arm_asimdhp.cpp
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_bf16s.h
│   │   │   ├── interp_bicubic_fp16s.h
│   │   │   ├── interp_bicubic_pack4.h
│   │   │   ├── interp_bicubic_pack4_bf16s.h
│   │   │   ├── interp_bicubic_pack4_fp16s.h
│   │   │   ├── interp_bicubic_pack8_fp16s.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_bf16s.h
│   │   │   ├── interp_bilinear_fp16s.h
│   │   │   ├── interp_bilinear_pack4.h
│   │   │   ├── interp_bilinear_pack4_bf16s.h
│   │   │   ├── interp_bilinear_pack4_fp16s.h
│   │   │   ├── interp_bilinear_pack8_fp16s.h
│   │   │   ├── layernorm_arm.cpp
│   │   │   ├── layernorm_arm.h
│   │   │   ├── layernorm_arm_asimdhp.cpp
│   │   │   ├── lrn_arm.cpp
│   │   │   ├── lrn_arm.h
│   │   │   ├── lstm_arm.cpp
│   │   │   ├── lstm_arm.h
│   │   │   ├── lstm_arm_asimddp.cpp
│   │   │   ├── lstm_arm_asimdhp.cpp
│   │   │   ├── lstm_arm_vfpv4.cpp
│   │   │   ├── lstm_int8.h
│   │   │   ├── matmul_arm.cpp
│   │   │   ├── matmul_arm.h
│   │   │   ├── mish_arm.cpp
│   │   │   ├── mish_arm.h
│   │   │   ├── mish_arm_asimdhp.cpp
│   │   │   ├── multiheadattention_arm.cpp
│   │   │   ├── multiheadattention_arm.h
│   │   │   ├── neon_mathfun.h
│   │   │   ├── neon_mathfun_fp16s.h
│   │   │   ├── neon_mathfun_tanh.h
│   │   │   ├── packing_arm.cpp
│   │   │   ├── packing_arm.h
│   │   │   ├── padding_arm.cpp
│   │   │   ├── padding_arm.h
│   │   │   ├── padding_pack4.h
│   │   │   ├── padding_pack4_bf16s_fp16s.h
│   │   │   ├── padding_pack8_fp16s.h
│   │   │   ├── padding_pack8_int8.h
│   │   │   ├── pixelshuffle_arm.cpp
│   │   │   ├── pixelshuffle_arm.h
│   │   │   ├── pooling_2x2.h
│   │   │   ├── pooling_2x2_pack4.h
│   │   │   ├── pooling_2x2_pack4_bf16s.h
│   │   │   ├── pooling_3x3.h
│   │   │   ├── pooling_3x3_pack4.h
│   │   │   ├── pooling_3x3_pack4_bf16s.h
│   │   │   ├── pooling_arm.cpp
│   │   │   ├── pooling_arm.h
│   │   │   ├── pooling_arm_asimdhp.cpp
│   │   │   ├── prelu_arm.cpp
│   │   │   ├── prelu_arm.h
│   │   │   ├── prelu_arm_asimdhp.cpp
│   │   │   ├── quantize_arm.cpp
│   │   │   ├── quantize_arm.h
│   │   │   ├── quantize_arm_asimdhp.cpp
│   │   │   ├── relu_arm.cpp
│   │   │   ├── relu_arm.h
│   │   │   ├── relu_arm_asimdhp.cpp
│   │   │   ├── requantize_arm.cpp
│   │   │   ├── requantize_arm.h
│   │   │   ├── reshape_arm.cpp
│   │   │   ├── reshape_arm.h
│   │   │   ├── rmsnorm_arm.cpp
│   │   │   ├── rmsnorm_arm.h
│   │   │   ├── rmsnorm_arm_asimdhp.cpp
│   │   │   ├── rnn_arm.cpp
│   │   │   ├── rnn_arm.h
│   │   │   ├── rnn_arm_asimddp.cpp
│   │   │   ├── rnn_arm_asimdhp.cpp
│   │   │   ├── rnn_arm_vfpv4.cpp
│   │   │   ├── rnn_int8.h
│   │   │   ├── scale_arm.cpp
│   │   │   ├── scale_arm.h
│   │   │   ├── selu_arm.cpp
│   │   │   ├── selu_arm.h
│   │   │   ├── shufflechannel_arm.cpp
│   │   │   ├── shufflechannel_arm.h
│   │   │   ├── sigmoid_arm.cpp
│   │   │   ├── sigmoid_arm.h
│   │   │   ├── sigmoid_arm_asimdhp.cpp
│   │   │   ├── slice_arm.cpp
│   │   │   ├── slice_arm.h
│   │   │   ├── softmax_arm.cpp
│   │   │   ├── softmax_arm.h
│   │   │   ├── softmax_arm_asimdhp.cpp
│   │   │   ├── swish_arm.cpp
│   │   │   ├── swish_arm.h
│   │   │   ├── swish_arm_asimdhp.cpp
│   │   │   ├── tanh_arm.cpp
│   │   │   ├── tanh_arm.h
│   │   │   ├── tanh_arm_asimdhp.cpp
│   │   │   ├── unaryop_arm.cpp
│   │   │   ├── unaryop_arm.h
│   │   │   └── unaryop_arm_asimdhp.cpp
│   │   ├── batchnorm.cpp
│   │   ├── batchnorm.h
│   │   ├── bias.cpp
│   │   ├── bias.h
│   │   ├── binaryop.cpp
│   │   ├── binaryop.h
│   │   ├── bnll.cpp
│   │   ├── bnll.h
│   │   ├── cast.cpp
│   │   ├── cast.h
│   │   ├── celu.cpp
│   │   ├── celu.h
│   │   ├── clip.cpp
│   │   ├── clip.h
│   │   ├── concat.cpp
│   │   ├── concat.h
│   │   ├── convolution.cpp
│   │   ├── convolution.h
│   │   ├── convolution1d.cpp
│   │   ├── convolution1d.h
│   │   ├── convolution3d.cpp
│   │   ├── convolution3d.h
│   │   ├── convolutiondepthwise.cpp
│   │   ├── convolutiondepthwise.h
│   │   ├── convolutiondepthwise1d.cpp
│   │   ├── convolutiondepthwise1d.h
│   │   ├── convolutiondepthwise3d.cpp
│   │   ├── convolutiondepthwise3d.h
│   │   ├── copyto.cpp
│   │   ├── copyto.h
│   │   ├── crop.cpp
│   │   ├── crop.h
│   │   ├── cumulativesum.cpp
│   │   ├── cumulativesum.h
│   │   ├── deconvolution.cpp
│   │   ├── deconvolution.h
│   │   ├── deconvolution1d.cpp
│   │   ├── deconvolution1d.h
│   │   ├── deconvolution3d.cpp
│   │   ├── deconvolution3d.h
│   │   ├── deconvolutiondepthwise.cpp
│   │   ├── deconvolutiondepthwise.h
│   │   ├── deconvolutiondepthwise1d.cpp
│   │   ├── deconvolutiondepthwise1d.h
│   │   ├── deconvolutiondepthwise3d.cpp
│   │   ├── deconvolutiondepthwise3d.h
│   │   ├── deepcopy.cpp
│   │   ├── deepcopy.h
│   │   ├── deformableconv2d.cpp
│   │   ├── deformableconv2d.h
│   │   ├── dequantize.cpp
│   │   ├── dequantize.h
│   │   ├── detectionoutput.cpp
│   │   ├── detectionoutput.h
│   │   ├── diag.cpp
│   │   ├── diag.h
│   │   ├── dropout.cpp
│   │   ├── dropout.h
│   │   ├── einsum.cpp
│   │   ├── einsum.h
│   │   ├── eltwise.cpp
│   │   ├── eltwise.h
│   │   ├── elu.cpp
│   │   ├── elu.h
│   │   ├── embed.cpp
│   │   ├── embed.h
│   │   ├── erf.cpp
│   │   ├── erf.h
│   │   ├── exp.cpp
│   │   ├── exp.h
│   │   ├── expanddims.cpp
│   │   ├── expanddims.h
│   │   ├── flatten.cpp
│   │   ├── flatten.h
│   │   ├── flip.cpp
│   │   ├── flip.h
│   │   ├── fold.cpp
│   │   ├── fold.h
│   │   ├── fused_activation.h
│   │   ├── gelu.cpp
│   │   ├── gelu.h
│   │   ├── gemm.cpp
│   │   ├── gemm.h
│   │   ├── glu.cpp
│   │   ├── glu.h
│   │   ├── gridsample.cpp
│   │   ├── gridsample.h
│   │   ├── groupnorm.cpp
│   │   ├── groupnorm.h
│   │   ├── gru.cpp
│   │   ├── gru.h
│   │   ├── hardsigmoid.cpp
│   │   ├── hardsigmoid.h
│   │   ├── hardswish.cpp
│   │   ├── hardswish.h
│   │   ├── innerproduct.cpp
│   │   ├── innerproduct.h
│   │   ├── input.cpp
│   │   ├── input.h
│   │   ├── instancenorm.cpp
│   │   ├── instancenorm.h
│   │   ├── interp.cpp
│   │   ├── interp.h
│   │   ├── inversespectrogram.cpp
│   │   ├── inversespectrogram.h
│   │   ├── layernorm.cpp
│   │   ├── layernorm.h
│   │   ├── log.cpp
│   │   ├── log.h
│   │   ├── loongarch/
│   │   │   ├── absval_loongarch.cpp
│   │   │   ├── absval_loongarch.h
│   │   │   ├── batchnorm_loongarch.cpp
│   │   │   ├── batchnorm_loongarch.h
│   │   │   ├── bias_loongarch.cpp
│   │   │   ├── bias_loongarch.h
│   │   │   ├── binaryop_loongarch.cpp
│   │   │   ├── binaryop_loongarch.h
│   │   │   ├── cast_loongarch.cpp
│   │   │   ├── cast_loongarch.h
│   │   │   ├── clip_loongarch.cpp
│   │   │   ├── clip_loongarch.h
│   │   │   ├── concat_loongarch.cpp
│   │   │   ├── concat_loongarch.h
│   │   │   ├── convolution1d_loongarch.cpp
│   │   │   ├── convolution1d_loongarch.h
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_1x1_int8.h
│   │   │   ├── convolution_1x1_pack1to4_int8.h
│   │   │   ├── convolution_1x1_pack4.h
│   │   │   ├── convolution_1x1_pack4to1.h
│   │   │   ├── convolution_1x1_pack8to1_int8.h
│   │   │   ├── convolution_1x1_pack8to4_int8.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_int8.h
│   │   │   ├── convolution_3x3_pack1to4.h
│   │   │   ├── convolution_3x3_pack4.h
│   │   │   ├── convolution_3x3_pack8to1_int8.h
│   │   │   ├── convolution_3x3_pack8to4_int8.h
│   │   │   ├── convolution_7x7_pack1to4.h
│   │   │   ├── convolution_int8.h
│   │   │   ├── convolution_loongarch.cpp
│   │   │   ├── convolution_loongarch.h
│   │   │   ├── convolution_pack1to4.h
│   │   │   ├── convolution_pack1to4_int8.h
│   │   │   ├── convolution_pack4.h
│   │   │   ├── convolution_pack4to1.h
│   │   │   ├── convolution_pack8to1_int8.h
│   │   │   ├── convolution_pack8to4_int8.h
│   │   │   ├── convolution_sgemm.h
│   │   │   ├── convolution_sgemm_int8.h
│   │   │   ├── convolution_sgemm_pack1to4_int8.h
│   │   │   ├── convolution_sgemm_pack4.h
│   │   │   ├── convolution_sgemm_pack4to1.h
│   │   │   ├── convolution_sgemm_pack8to1_int8.h
│   │   │   ├── convolution_sgemm_pack8to4_int8.h
│   │   │   ├── convolution_winograd_dot.h
│   │   │   ├── convolution_winograd_dot_int8.h
│   │   │   ├── convolution_winograd_dot_pack4.h
│   │   │   ├── convolution_winograd_dot_pack8to1_int8.h
│   │   │   ├── convolution_winograd_dot_pack8to4_int8.h
│   │   │   ├── convolution_winograd_transform.h
│   │   │   ├── convolution_winograd_transform_int8.h
│   │   │   ├── convolution_winograd_transform_pack4.h
│   │   │   ├── convolution_winograd_transform_pack4_int8.h
│   │   │   ├── convolution_winograd_transform_pack8_int8.h
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_pack4.h
│   │   │   ├── convolutiondepthwise_5x5_pack4.h
│   │   │   ├── convolutiondepthwise_loongarch.cpp
│   │   │   ├── convolutiondepthwise_loongarch.h
│   │   │   ├── crop_loongarch.cpp
│   │   │   ├── crop_loongarch.h
│   │   │   ├── deconvolution_loongarch.cpp
│   │   │   ├── deconvolution_loongarch.h
│   │   │   ├── deconvolution_pack1to4.h
│   │   │   ├── deconvolution_pack4.h
│   │   │   ├── deconvolution_pack4to1.h
│   │   │   ├── deconvolutiondepthwise_loongarch.cpp
│   │   │   ├── deconvolutiondepthwise_loongarch.h
│   │   │   ├── dequantize_loongarch.cpp
│   │   │   ├── dequantize_loongarch.h
│   │   │   ├── dropout_loongarch.cpp
│   │   │   ├── dropout_loongarch.h
│   │   │   ├── eltwise_loongarch.cpp
│   │   │   ├── eltwise_loongarch.h
│   │   │   ├── flatten_loongarch.cpp
│   │   │   ├── flatten_loongarch.h
│   │   │   ├── hardsigmoid_loongarch.cpp
│   │   │   ├── hardsigmoid_loongarch.h
│   │   │   ├── hardswish_loongarch.cpp
│   │   │   ├── hardswish_loongarch.h
│   │   │   ├── innerproduct_loongarch.cpp
│   │   │   ├── innerproduct_loongarch.h
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_pack4.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_pack4.h
│   │   │   ├── interp_loongarch.cpp
│   │   │   ├── interp_loongarch.h
│   │   │   ├── lasx_mathfun.h
│   │   │   ├── loongarch_activation.h
│   │   │   ├── loongarch_usability.h
│   │   │   ├── lsx_mathfun.h
│   │   │   ├── mish_loongarch.cpp
│   │   │   ├── mish_loongarch.h
│   │   │   ├── packing_loongarch.cpp
│   │   │   ├── packing_loongarch.h
│   │   │   ├── padding_loongarch.cpp
│   │   │   ├── padding_loongarch.h
│   │   │   ├── padding_pack4.h
│   │   │   ├── padding_pack8_int8.h
│   │   │   ├── pooling_loongarch.cpp
│   │   │   ├── pooling_loongarch.h
│   │   │   ├── prelu_loongarch.cpp
│   │   │   ├── prelu_loongarch.h
│   │   │   ├── quantize_loongarch.cpp
│   │   │   ├── quantize_loongarch.h
│   │   │   ├── relu_loongarch.cpp
│   │   │   ├── relu_loongarch.h
│   │   │   ├── requantize_loongarch.cpp
│   │   │   ├── requantize_loongarch.h
│   │   │   ├── sigmoid_loongarch.cpp
│   │   │   ├── sigmoid_loongarch.h
│   │   │   ├── slice_loongarch.cpp
│   │   │   ├── slice_loongarch.h
│   │   │   ├── softmax_loongarch.cpp
│   │   │   ├── softmax_loongarch.h
│   │   │   ├── swish_loongarch.cpp
│   │   │   ├── swish_loongarch.h
│   │   │   ├── tanh_loongarch.cpp
│   │   │   ├── tanh_loongarch.h
│   │   │   ├── unaryop_loongarch.cpp
│   │   │   └── unaryop_loongarch.h
│   │   ├── lrn.cpp
│   │   ├── lrn.h
│   │   ├── lstm.cpp
│   │   ├── lstm.h
│   │   ├── matmul.cpp
│   │   ├── matmul.h
│   │   ├── memorydata.cpp
│   │   ├── memorydata.h
│   │   ├── mips/
│   │   │   ├── absval_mips.cpp
│   │   │   ├── absval_mips.h
│   │   │   ├── batchnorm_mips.cpp
│   │   │   ├── batchnorm_mips.h
│   │   │   ├── bias_mips.cpp
│   │   │   ├── bias_mips.h
│   │   │   ├── binaryop_mips.cpp
│   │   │   ├── binaryop_mips.h
│   │   │   ├── cast_mips.cpp
│   │   │   ├── cast_mips.h
│   │   │   ├── clip_mips.cpp
│   │   │   ├── clip_mips.h
│   │   │   ├── concat_mips.cpp
│   │   │   ├── concat_mips.h
│   │   │   ├── convolution1d_mips.cpp
│   │   │   ├── convolution1d_mips.h
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_1x1_int8.h
│   │   │   ├── convolution_1x1_pack1to4_int8.h
│   │   │   ├── convolution_1x1_pack4.h
│   │   │   ├── convolution_1x1_pack4to1.h
│   │   │   ├── convolution_1x1_pack8to1_int8.h
│   │   │   ├── convolution_1x1_pack8to4_int8.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_int8.h
│   │   │   ├── convolution_3x3_pack1to4.h
│   │   │   ├── convolution_3x3_pack4.h
│   │   │   ├── convolution_3x3_pack8to1_int8.h
│   │   │   ├── convolution_3x3_pack8to4_int8.h
│   │   │   ├── convolution_7x7_pack1to4.h
│   │   │   ├── convolution_int8.h
│   │   │   ├── convolution_mips.cpp
│   │   │   ├── convolution_mips.h
│   │   │   ├── convolution_mips_mmi.cpp
│   │   │   ├── convolution_pack1to4.h
│   │   │   ├── convolution_pack1to4_int8.h
│   │   │   ├── convolution_pack4.h
│   │   │   ├── convolution_pack4to1.h
│   │   │   ├── convolution_pack8to1_int8.h
│   │   │   ├── convolution_pack8to4_int8.h
│   │   │   ├── convolution_sgemm.h
│   │   │   ├── convolution_sgemm_int8.h
│   │   │   ├── convolution_sgemm_pack1to4_int8.h
│   │   │   ├── convolution_sgemm_pack4.h
│   │   │   ├── convolution_sgemm_pack4to1.h
│   │   │   ├── convolution_sgemm_pack8to1_int8.h
│   │   │   ├── convolution_sgemm_pack8to4_int8.h
│   │   │   ├── convolution_winograd_dot.h
│   │   │   ├── convolution_winograd_dot_int8.h
│   │   │   ├── convolution_winograd_dot_pack4.h
│   │   │   ├── convolution_winograd_dot_pack8to1_int8.h
│   │   │   ├── convolution_winograd_dot_pack8to4_int8.h
│   │   │   ├── convolution_winograd_transform.h
│   │   │   ├── convolution_winograd_transform_int8.h
│   │   │   ├── convolution_winograd_transform_pack4.h
│   │   │   ├── convolution_winograd_transform_pack4_int8.h
│   │   │   ├── convolution_winograd_transform_pack8_int8.h
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_pack4.h
│   │   │   ├── convolutiondepthwise_5x5_pack4.h
│   │   │   ├── convolutiondepthwise_mips.cpp
│   │   │   ├── convolutiondepthwise_mips.h
│   │   │   ├── crop_mips.cpp
│   │   │   ├── crop_mips.h
│   │   │   ├── deconvolution_mips.cpp
│   │   │   ├── deconvolution_mips.h
│   │   │   ├── deconvolution_pack1to4.h
│   │   │   ├── deconvolution_pack4.h
│   │   │   ├── deconvolution_pack4to1.h
│   │   │   ├── deconvolutiondepthwise_mips.cpp
│   │   │   ├── deconvolutiondepthwise_mips.h
│   │   │   ├── dequantize_mips.cpp
│   │   │   ├── dequantize_mips.h
│   │   │   ├── dropout_mips.cpp
│   │   │   ├── dropout_mips.h
│   │   │   ├── eltwise_mips.cpp
│   │   │   ├── eltwise_mips.h
│   │   │   ├── elu_mips.cpp
│   │   │   ├── elu_mips.h
│   │   │   ├── erf_mips.cpp
│   │   │   ├── erf_mips.h
│   │   │   ├── flatten_mips.cpp
│   │   │   ├── flatten_mips.h
│   │   │   ├── gelu_mips.cpp
│   │   │   ├── gelu_mips.h
│   │   │   ├── hardsigmoid_mips.cpp
│   │   │   ├── hardsigmoid_mips.h
│   │   │   ├── hardswish_mips.cpp
│   │   │   ├── hardswish_mips.h
│   │   │   ├── innerproduct_mips.cpp
│   │   │   ├── innerproduct_mips.h
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_pack4.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_pack4.h
│   │   │   ├── interp_mips.cpp
│   │   │   ├── interp_mips.h
│   │   │   ├── loongson_mmi.h
│   │   │   ├── mips_activation.h
│   │   │   ├── mips_usability.h
│   │   │   ├── mish_mips.cpp
│   │   │   ├── mish_mips.h
│   │   │   ├── msa_mathfun.h
│   │   │   ├── packing_mips.cpp
│   │   │   ├── packing_mips.h
│   │   │   ├── padding_mips.cpp
│   │   │   ├── padding_mips.h
│   │   │   ├── padding_pack4.h
│   │   │   ├── padding_pack8_int8.h
│   │   │   ├── pooling_mips.cpp
│   │   │   ├── pooling_mips.h
│   │   │   ├── prelu_mips.cpp
│   │   │   ├── prelu_mips.h
│   │   │   ├── quantize_mips.cpp
│   │   │   ├── quantize_mips.h
│   │   │   ├── relu_mips.cpp
│   │   │   ├── relu_mips.h
│   │   │   ├── requantize_mips.cpp
│   │   │   ├── requantize_mips.h
│   │   │   ├── selu_mips.cpp
│   │   │   ├── selu_mips.h
│   │   │   ├── sigmoid_mips.cpp
│   │   │   ├── sigmoid_mips.h
│   │   │   ├── slice_mips.cpp
│   │   │   ├── slice_mips.h
│   │   │   ├── softmax_mips.cpp
│   │   │   ├── softmax_mips.h
│   │   │   ├── swish_mips.cpp
│   │   │   ├── swish_mips.h
│   │   │   ├── tanh_mips.cpp
│   │   │   ├── tanh_mips.h
│   │   │   ├── unaryop_mips.cpp
│   │   │   └── unaryop_mips.h
│   │   ├── mish.cpp
│   │   ├── mish.h
│   │   ├── multiheadattention.cpp
│   │   ├── multiheadattention.h
│   │   ├── mvn.cpp
│   │   ├── mvn.h
│   │   ├── noop.cpp
│   │   ├── noop.h
│   │   ├── normalize.cpp
│   │   ├── normalize.h
│   │   ├── packing.cpp
│   │   ├── packing.h
│   │   ├── padding.cpp
│   │   ├── padding.h
│   │   ├── permute.cpp
│   │   ├── permute.h
│   │   ├── pixelshuffle.cpp
│   │   ├── pixelshuffle.h
│   │   ├── pooling.cpp
│   │   ├── pooling.h
│   │   ├── pooling1d.cpp
│   │   ├── pooling1d.h
│   │   ├── pooling3d.cpp
│   │   ├── pooling3d.h
│   │   ├── power.cpp
│   │   ├── power.h
│   │   ├── prelu.cpp
│   │   ├── prelu.h
│   │   ├── priorbox.cpp
│   │   ├── priorbox.h
│   │   ├── proposal.cpp
│   │   ├── proposal.h
│   │   ├── psroipooling.cpp
│   │   ├── psroipooling.h
│   │   ├── quantize.cpp
│   │   ├── quantize.h
│   │   ├── reduction.cpp
│   │   ├── reduction.h
│   │   ├── relu.cpp
│   │   ├── relu.h
│   │   ├── reorg.cpp
│   │   ├── reorg.h
│   │   ├── requantize.cpp
│   │   ├── requantize.h
│   │   ├── reshape.cpp
│   │   ├── reshape.h
│   │   ├── riscv/
│   │   │   ├── absval_riscv.cpp
│   │   │   ├── absval_riscv.h
│   │   │   ├── absval_riscv_zfh.cpp
│   │   │   ├── batchnorm_riscv.cpp
│   │   │   ├── batchnorm_riscv.h
│   │   │   ├── batchnorm_riscv_zfh.cpp
│   │   │   ├── bias_riscv.cpp
│   │   │   ├── bias_riscv.h
│   │   │   ├── bias_riscv_zfh.cpp
│   │   │   ├── binaryop_riscv.cpp
│   │   │   ├── binaryop_riscv.h
│   │   │   ├── binaryop_riscv_zfh.cpp
│   │   │   ├── bnll_riscv.cpp
│   │   │   ├── bnll_riscv.h
│   │   │   ├── bnll_riscv_zfh.cpp
│   │   │   ├── cast_riscv.cpp
│   │   │   ├── cast_riscv.h
│   │   │   ├── cast_riscv_zfh.cpp
│   │   │   ├── celu_riscv.cpp
│   │   │   ├── celu_riscv.h
│   │   │   ├── celu_riscv_zfh.cpp
│   │   │   ├── clip_riscv.cpp
│   │   │   ├── clip_riscv.h
│   │   │   ├── clip_riscv_zfh.cpp
│   │   │   ├── concat_riscv.cpp
│   │   │   ├── concat_riscv.h
│   │   │   ├── convolution1d_riscv.cpp
│   │   │   ├── convolution1d_riscv.h
│   │   │   ├── convolution1d_riscv_zfh.cpp
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_1x1_fp16s.h
│   │   │   ├── convolution_1x1_pack1ton.h
│   │   │   ├── convolution_1x1_pack1ton_fp16s.h
│   │   │   ├── convolution_1x1_packn.h
│   │   │   ├── convolution_1x1_packn_fp16s.h
│   │   │   ├── convolution_1x1_packnto1.h
│   │   │   ├── convolution_1x1_packnto1_fp16s.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_pack1ton.h
│   │   │   ├── convolution_3x3_pack1ton_fp16s.h
│   │   │   ├── convolution_3x3_packn.h
│   │   │   ├── convolution_3x3_packn_fp16s.h
│   │   │   ├── convolution_7x7_pack1ton.h
│   │   │   ├── convolution_7x7_pack1ton_fp16s.h
│   │   │   ├── convolution_fp16s.h
│   │   │   ├── convolution_pack1ton.h
│   │   │   ├── convolution_pack1ton_fp16s.h
│   │   │   ├── convolution_packn.h
│   │   │   ├── convolution_packn_fp16s.h
│   │   │   ├── convolution_packnto1.h
│   │   │   ├── convolution_packnto1_fp16s.h
│   │   │   ├── convolution_riscv.cpp
│   │   │   ├── convolution_riscv.h
│   │   │   ├── convolution_riscv_zfh.cpp
│   │   │   ├── convolution_sgemm.h
│   │   │   ├── convolution_sgemm_fp16s.h
│   │   │   ├── convolution_sgemm_pack1ton.h
│   │   │   ├── convolution_sgemm_pack1ton_fp16s.h
│   │   │   ├── convolution_sgemm_packn.h
│   │   │   ├── convolution_sgemm_packn_fp16s.h
│   │   │   ├── convolution_sgemm_packnto1.h
│   │   │   ├── convolution_sgemm_packnto1_fp16s.h
│   │   │   ├── convolution_winograd_dot.h
│   │   │   ├── convolution_winograd_dot_packn.h
│   │   │   ├── convolution_winograd_dot_packn_fp16s.h
│   │   │   ├── convolution_winograd_transform.h
│   │   │   ├── convolution_winograd_transform_packn.h
│   │   │   ├── convolution_winograd_transform_packn_fp16s.h
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_packn.h
│   │   │   ├── convolutiondepthwise_3x3_packn_fp16s.h
│   │   │   ├── convolutiondepthwise_5x5_packn.h
│   │   │   ├── convolutiondepthwise_5x5_packn_fp16s.h
│   │   │   ├── convolutiondepthwise_riscv.cpp
│   │   │   ├── convolutiondepthwise_riscv.h
│   │   │   ├── convolutiondepthwise_riscv_zfh.cpp
│   │   │   ├── crop_riscv.cpp
│   │   │   ├── crop_riscv.h
│   │   │   ├── deconvolution_fp16s.h
│   │   │   ├── deconvolution_pack1ton.h
│   │   │   ├── deconvolution_pack1ton_fp16s.h
│   │   │   ├── deconvolution_packn.h
│   │   │   ├── deconvolution_packn_fp16s.h
│   │   │   ├── deconvolution_packnto1.h
│   │   │   ├── deconvolution_packnto1_fp16s.h
│   │   │   ├── deconvolution_riscv.cpp
│   │   │   ├── deconvolution_riscv.h
│   │   │   ├── deconvolution_riscv_zfh.cpp
│   │   │   ├── deconvolutiondepthwise_riscv.cpp
│   │   │   ├── deconvolutiondepthwise_riscv.h
│   │   │   ├── deconvolutiondepthwise_riscv_zfh.cpp
│   │   │   ├── deformableconv2d_pack1ton.h
│   │   │   ├── deformableconv2d_packn.h
│   │   │   ├── deformableconv2d_packnto1.h
│   │   │   ├── deformableconv2d_riscv.cpp
│   │   │   ├── deformableconv2d_riscv.h
│   │   │   ├── dropout_riscv.cpp
│   │   │   ├── dropout_riscv.h
│   │   │   ├── eltwise_riscv.cpp
│   │   │   ├── eltwise_riscv.h
│   │   │   ├── eltwise_riscv_zfh.cpp
│   │   │   ├── flatten_riscv.cpp
│   │   │   ├── flatten_riscv.h
│   │   │   ├── gelu_riscv.cpp
│   │   │   ├── gelu_riscv.h
│   │   │   ├── gemm_bf16s_fp16s.h
│   │   │   ├── gemm_fp16s.h
│   │   │   ├── gemm_riscv.cpp
│   │   │   ├── gemm_riscv.h
│   │   │   ├── gemm_riscv_zfh.cpp
│   │   │   ├── gru_riscv.cpp
│   │   │   ├── gru_riscv.h
│   │   │   ├── gru_riscv_zfh.cpp
│   │   │   ├── hardsigmoid_riscv.cpp
│   │   │   ├── hardsigmoid_riscv.h
│   │   │   ├── hardsigmoid_riscv_zfh.cpp
│   │   │   ├── hardswish_riscv.cpp
│   │   │   ├── hardswish_riscv.h
│   │   │   ├── hardswish_riscv_zfh.cpp
│   │   │   ├── innerproduct_riscv.cpp
│   │   │   ├── innerproduct_riscv.h
│   │   │   ├── innerproduct_riscv_zfh.cpp
│   │   │   ├── instancenorm_riscv.cpp
│   │   │   ├── instancenorm_riscv.h
│   │   │   ├── instancenorm_riscv_zfh.cpp
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_fp16s.h
│   │   │   ├── interp_bicubic_packn.h
│   │   │   ├── interp_bicubic_packn_fp16s.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_fp16s.h
│   │   │   ├── interp_bilinear_packn.h
│   │   │   ├── interp_bilinear_packn_fp16s.h
│   │   │   ├── interp_riscv.cpp
│   │   │   ├── interp_riscv.h
│   │   │   ├── interp_riscv_zfh.cpp
│   │   │   ├── layernorm_riscv.cpp
│   │   │   ├── layernorm_riscv.h
│   │   │   ├── layernorm_riscv_zfh.cpp
│   │   │   ├── mish_riscv.cpp
│   │   │   ├── mish_riscv.h
│   │   │   ├── mish_riscv_zfh.cpp
│   │   │   ├── packing_riscv.cpp
│   │   │   ├── packing_riscv.h
│   │   │   ├── padding_packn.h
│   │   │   ├── padding_riscv.cpp
│   │   │   ├── padding_riscv.h
│   │   │   ├── pooling_riscv.cpp
│   │   │   ├── pooling_riscv.h
│   │   │   ├── pooling_riscv_zfh.cpp
│   │   │   ├── prelu_riscv.cpp
│   │   │   ├── prelu_riscv.h
│   │   │   ├── prelu_riscv_zfh.cpp
│   │   │   ├── relu_riscv.cpp
│   │   │   ├── relu_riscv.h
│   │   │   ├── relu_riscv_zfh.cpp
│   │   │   ├── riscv_activation.h
│   │   │   ├── riscv_usability.h
│   │   │   ├── rvv_mathfun.h
│   │   │   ├── rvv_mathfun_fp16s.h
│   │   │   ├── selu_riscv.cpp
│   │   │   ├── selu_riscv.h
│   │   │   ├── shufflechannel_riscv.cpp
│   │   │   ├── shufflechannel_riscv.h
│   │   │   ├── sigmoid_riscv.cpp
│   │   │   ├── sigmoid_riscv.h
│   │   │   ├── sigmoid_riscv_zfh.cpp
│   │   │   ├── softmax_riscv.cpp
│   │   │   ├── softmax_riscv.h
│   │   │   ├── swish_riscv.cpp
│   │   │   ├── swish_riscv.h
│   │   │   ├── swish_riscv_zfh.cpp
│   │   │   ├── tanh_riscv.cpp
│   │   │   ├── tanh_riscv.h
│   │   │   ├── tanh_riscv_zfh.cpp
│   │   │   ├── unaryop_riscv.cpp
│   │   │   ├── unaryop_riscv.h
│   │   │   └── unaryop_riscv_zfh.cpp
│   │   ├── rmsnorm.cpp
│   │   ├── rmsnorm.h
│   │   ├── rnn.cpp
│   │   ├── rnn.h
│   │   ├── roialign.cpp
│   │   ├── roialign.h
│   │   ├── roipooling.cpp
│   │   ├── roipooling.h
│   │   ├── rotaryembed.cpp
│   │   ├── rotaryembed.h
│   │   ├── scale.cpp
│   │   ├── scale.h
│   │   ├── sdpa.cpp
│   │   ├── sdpa.h
│   │   ├── selu.cpp
│   │   ├── selu.h
│   │   ├── shrink.cpp
│   │   ├── shrink.h
│   │   ├── shufflechannel.cpp
│   │   ├── shufflechannel.h
│   │   ├── sigmoid.cpp
│   │   ├── sigmoid.h
│   │   ├── slice.cpp
│   │   ├── slice.h
│   │   ├── softmax.cpp
│   │   ├── softmax.h
│   │   ├── softplus.cpp
│   │   ├── softplus.h
│   │   ├── spectrogram.cpp
│   │   ├── spectrogram.h
│   │   ├── split.cpp
│   │   ├── split.h
│   │   ├── spp.cpp
│   │   ├── spp.h
│   │   ├── squeeze.cpp
│   │   ├── squeeze.h
│   │   ├── statisticspooling.cpp
│   │   ├── statisticspooling.h
│   │   ├── swish.cpp
│   │   ├── swish.h
│   │   ├── tanh.cpp
│   │   ├── tanh.h
│   │   ├── threshold.cpp
│   │   ├── threshold.h
│   │   ├── tile.cpp
│   │   ├── tile.h
│   │   ├── unaryop.cpp
│   │   ├── unaryop.h
│   │   ├── unfold.cpp
│   │   ├── unfold.h
│   │   ├── vulkan/
│   │   │   ├── absval_vulkan.cpp
│   │   │   ├── absval_vulkan.h
│   │   │   ├── batchnorm_vulkan.cpp
│   │   │   ├── batchnorm_vulkan.h
│   │   │   ├── binaryop_vulkan.cpp
│   │   │   ├── binaryop_vulkan.h
│   │   │   ├── cast_vulkan.cpp
│   │   │   ├── cast_vulkan.h
│   │   │   ├── celu_vulkan.cpp
│   │   │   ├── celu_vulkan.h
│   │   │   ├── clip_vulkan.cpp
│   │   │   ├── clip_vulkan.h
│   │   │   ├── concat_vulkan.cpp
│   │   │   ├── concat_vulkan.h
│   │   │   ├── convolution1d_vulkan.cpp
│   │   │   ├── convolution1d_vulkan.h
│   │   │   ├── convolution_vulkan.cpp
│   │   │   ├── convolution_vulkan.h
│   │   │   ├── convolutiondepthwise_vulkan.cpp
│   │   │   ├── convolutiondepthwise_vulkan.h
│   │   │   ├── crop_vulkan.cpp
│   │   │   ├── crop_vulkan.h
│   │   │   ├── deconvolution_vulkan.cpp
│   │   │   ├── deconvolution_vulkan.h
│   │   │   ├── deconvolutiondepthwise_vulkan.cpp
│   │   │   ├── deconvolutiondepthwise_vulkan.h
│   │   │   ├── deepcopy_vulkan.cpp
│   │   │   ├── deepcopy_vulkan.h
│   │   │   ├── dequantize_vulkan.cpp
│   │   │   ├── dequantize_vulkan.h
│   │   │   ├── dropout_vulkan.cpp
│   │   │   ├── dropout_vulkan.h
│   │   │   ├── eltwise_vulkan.cpp
│   │   │   ├── eltwise_vulkan.h
│   │   │   ├── elu_vulkan.cpp
│   │   │   ├── elu_vulkan.h
│   │   │   ├── erf_vulkan.cpp
│   │   │   ├── erf_vulkan.h
│   │   │   ├── flatten_vulkan.cpp
│   │   │   ├── flatten_vulkan.h
│   │   │   ├── gelu_vulkan.cpp
│   │   │   ├── gelu_vulkan.h
│   │   │   ├── gemm_vulkan.cpp
│   │   │   ├── gemm_vulkan.h
│   │   │   ├── groupnorm_vulkan.cpp
│   │   │   ├── groupnorm_vulkan.h
│   │   │   ├── hardsigmoid_vulkan.cpp
│   │   │   ├── hardsigmoid_vulkan.h
│   │   │   ├── hardswish_vulkan.cpp
│   │   │   ├── hardswish_vulkan.h
│   │   │   ├── innerproduct_vulkan.cpp
│   │   │   ├── innerproduct_vulkan.h
│   │   │   ├── instancenorm_vulkan.cpp
│   │   │   ├── instancenorm_vulkan.h
│   │   │   ├── interp_vulkan.cpp
│   │   │   ├── interp_vulkan.h
│   │   │   ├── layernorm_vulkan.cpp
│   │   │   ├── layernorm_vulkan.h
│   │   │   ├── lrn_vulkan.cpp
│   │   │   ├── lrn_vulkan.h
│   │   │   ├── memorydata_vulkan.cpp
│   │   │   ├── memorydata_vulkan.h
│   │   │   ├── mish_vulkan.cpp
│   │   │   ├── mish_vulkan.h
│   │   │   ├── multiheadattention_vulkan.cpp
│   │   │   ├── multiheadattention_vulkan.h
│   │   │   ├── noop_vulkan.cpp
│   │   │   ├── noop_vulkan.h
│   │   │   ├── normalize_vulkan.cpp
│   │   │   ├── normalize_vulkan.h
│   │   │   ├── packing_vulkan.cpp
│   │   │   ├── packing_vulkan.h
│   │   │   ├── padding_vulkan.cpp
│   │   │   ├── padding_vulkan.h
│   │   │   ├── permute_vulkan.cpp
│   │   │   ├── permute_vulkan.h
│   │   │   ├── pixelshuffle_vulkan.cpp
│   │   │   ├── pixelshuffle_vulkan.h
│   │   │   ├── pooling_vulkan.cpp
│   │   │   ├── pooling_vulkan.h
│   │   │   ├── prelu_vulkan.cpp
│   │   │   ├── prelu_vulkan.h
│   │   │   ├── priorbox_vulkan.cpp
│   │   │   ├── priorbox_vulkan.h
│   │   │   ├── quantize_vulkan.cpp
│   │   │   ├── quantize_vulkan.h
│   │   │   ├── reduction_vulkan.cpp
│   │   │   ├── reduction_vulkan.h
│   │   │   ├── relu_vulkan.cpp
│   │   │   ├── relu_vulkan.h
│   │   │   ├── reorg_vulkan.cpp
│   │   │   ├── reorg_vulkan.h
│   │   │   ├── requantize_vulkan.cpp
│   │   │   ├── requantize_vulkan.h
│   │   │   ├── reshape_vulkan.cpp
│   │   │   ├── reshape_vulkan.h
│   │   │   ├── rmsnorm_vulkan.cpp
│   │   │   ├── rmsnorm_vulkan.h
│   │   │   ├── rotaryembed_vulkan.cpp
│   │   │   ├── rotaryembed_vulkan.h
│   │   │   ├── scale_vulkan.cpp
│   │   │   ├── scale_vulkan.h
│   │   │   ├── sdpa_vulkan.cpp
│   │   │   ├── sdpa_vulkan.h
│   │   │   ├── selu_vulkan.cpp
│   │   │   ├── selu_vulkan.h
│   │   │   ├── shader/
│   │   │   │   ├── .clang-format
│   │   │   │   ├── absval.comp
│   │   │   │   ├── batchnorm.comp
│   │   │   │   ├── batchnorm_pack4.comp
│   │   │   │   ├── binaryop.comp
│   │   │   │   ├── binaryop_broadcast.comp
│   │   │   │   ├── binaryop_broadcast_pack1to4.comp
│   │   │   │   ├── binaryop_broadcast_pack4.comp
│   │   │   │   ├── binaryop_pack4.comp
│   │   │   │   ├── cast_fp16_to_fp32.comp
│   │   │   │   ├── cast_fp16_to_fp32_pack4.comp
│   │   │   │   ├── cast_fp32_to_fp16.comp
│   │   │   │   ├── cast_fp32_to_fp16_pack4.comp
│   │   │   │   ├── celu.comp
│   │   │   │   ├── clip.comp
│   │   │   │   ├── concat.comp
│   │   │   │   ├── concat_pack4.comp
│   │   │   │   ├── concat_pack4to1.comp
│   │   │   │   ├── convolution1d_packed.comp
│   │   │   │   ├── convolution_1x1s1d1_cm.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd23_transform_input.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd23_transform_output.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd43_transform_input.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd43_transform_output.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd_gemm.comp
│   │   │   │   ├── convolution_gemm_cm.comp
│   │   │   │   ├── convolution_pack1to4_3x3s1d1_winograd_gemm.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd23_transform_input.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd23_transform_output.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd43_transform_input.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd43_transform_output.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd_gemm.comp
│   │   │   │   ├── convolution_pack4to1_3x3s1d1_winograd_gemm.comp
│   │   │   │   ├── convolution_packed.comp
│   │   │   │   ├── convolution_packed_1x1s1d1.comp
│   │   │   │   ├── convolution_packed_gemm.comp
│   │   │   │   ├── convolution_winograd_gemm_cm.comp
│   │   │   │   ├── convolutiondepthwise.comp
│   │   │   │   ├── convolutiondepthwise_group.comp
│   │   │   │   ├── convolutiondepthwise_group_pack1to4.comp
│   │   │   │   ├── convolutiondepthwise_group_pack4.comp
│   │   │   │   ├── convolutiondepthwise_group_pack4to1.comp
│   │   │   │   ├── convolutiondepthwise_pack4.comp
│   │   │   │   ├── crop.comp
│   │   │   │   ├── crop_pack1to4.comp
│   │   │   │   ├── crop_pack4.comp
│   │   │   │   ├── crop_pack4to1.comp
│   │   │   │   ├── deconvolution_col2im.comp
│   │   │   │   ├── deconvolution_gemm_cm.comp
│   │   │   │   ├── deconvolution_gemm_packed.comp
│   │   │   │   ├── deconvolution_pack4_col2im.comp
│   │   │   │   ├── deconvolution_packed.comp
│   │   │   │   ├── deconvolutiondepthwise.comp
│   │   │   │   ├── deconvolutiondepthwise_group.comp
│   │   │   │   ├── deconvolutiondepthwise_group_pack1to4.comp
│   │   │   │   ├── deconvolutiondepthwise_group_pack4.comp
│   │   │   │   ├── deconvolutiondepthwise_group_pack4to1.comp
│   │   │   │   ├── deconvolutiondepthwise_pack4.comp
│   │   │   │   ├── deepcopy.comp
│   │   │   │   ├── deepcopy_pack4.comp
│   │   │   │   ├── dequantize.comp
│   │   │   │   ├── dequantize_pack4.comp
│   │   │   │   ├── dropout.comp
│   │   │   │   ├── eltwise.comp
│   │   │   │   ├── elu.comp
│   │   │   │   ├── erf.comp
│   │   │   │   ├── flatten.comp
│   │   │   │   ├── flatten_pack1to4.comp
│   │   │   │   ├── flatten_pack4.comp
│   │   │   │   ├── gelu.comp
│   │   │   │   ├── gemm.comp
│   │   │   │   ├── gemm_cm.comp
│   │   │   │   ├── gemm_sg.comp
│   │   │   │   ├── groupnorm_coeffs.comp
│   │   │   │   ├── groupnorm_coeffs_pack4.comp
│   │   │   │   ├── groupnorm_norm.comp
│   │   │   │   ├── groupnorm_norm_pack4.comp
│   │   │   │   ├── groupnorm_reduce_mean.comp
│   │   │   │   ├── groupnorm_reduce_mean_pack4.comp
│   │   │   │   ├── groupnorm_reduce_sum4_fp16_to_fp32.comp
│   │   │   │   ├── groupnorm_reduce_sum4_fp16_to_fp32_pack4.comp
│   │   │   │   ├── groupnorm_reduce_sum4_fp32.comp
│   │   │   │   ├── groupnorm_reduce_sum4_fp32_pack4.comp
│   │   │   │   ├── groupnorm_sub_mean_square.comp
│   │   │   │   ├── groupnorm_sub_mean_square_pack4.comp
│   │   │   │   ├── hardsigmoid.comp
│   │   │   │   ├── hardswish.comp
│   │   │   │   ├── innerproduct.comp
│   │   │   │   ├── innerproduct_gemm.comp
│   │   │   │   ├── innerproduct_gemm_wp1to4.comp
│   │   │   │   ├── innerproduct_gemm_wp4.comp
│   │   │   │   ├── innerproduct_gemm_wp4to1.comp
│   │   │   │   ├── innerproduct_pack1to4.comp
│   │   │   │   ├── innerproduct_pack4.comp
│   │   │   │   ├── innerproduct_pack4to1.comp
│   │   │   │   ├── innerproduct_reduce_sum8.comp
│   │   │   │   ├── innerproduct_reduce_sum8_pack4.comp
│   │   │   │   ├── innerproduct_sum8.comp
│   │   │   │   ├── innerproduct_sum8_pack1to4.comp
│   │   │   │   ├── innerproduct_sum8_pack4.comp
│   │   │   │   ├── innerproduct_sum8_pack4to1.comp
│   │   │   │   ├── instancenorm_coeffs.comp
│   │   │   │   ├── instancenorm_coeffs_pack4.comp
│   │   │   │   ├── instancenorm_norm.comp
│   │   │   │   ├── instancenorm_norm_pack4.comp
│   │   │   │   ├── instancenorm_reduce_mean.comp
│   │   │   │   ├── instancenorm_reduce_mean_pack4.comp
│   │   │   │   ├── instancenorm_reduce_sum4_fp16_to_fp32.comp
│   │   │   │   ├── instancenorm_reduce_sum4_fp16_to_fp32_pack4.comp
│   │   │   │   ├── instancenorm_reduce_sum4_fp32.comp
│   │   │   │   ├── instancenorm_reduce_sum4_fp32_pack4.comp
│   │   │   │   ├── instancenorm_sub_mean_square.comp
│   │   │   │   ├── instancenorm_sub_mean_square_pack4.comp
│   │   │   │   ├── interp.comp
│   │   │   │   ├── interp_bicubic.comp
│   │   │   │   ├── interp_bicubic_coeffs.comp
│   │   │   │   ├── interp_bicubic_pack4.comp
│   │   │   │   ├── interp_pack4.comp
│   │   │   │   ├── layernorm_coeffs.comp
│   │   │   │   ├── layernorm_coeffs_pack4.comp
│   │   │   │   ├── layernorm_norm.comp
│   │   │   │   ├── layernorm_norm_pack4.comp
│   │   │   │   ├── layernorm_reduce_mean.comp
│   │   │   │   ├── layernorm_reduce_mean_pack4.comp
│   │   │   │   ├── layernorm_reduce_sum4_fp16_to_fp32.comp
│   │   │   │   ├── layernorm_reduce_sum4_fp16_to_fp32_pack4.comp
│   │   │   │   ├── layernorm_reduce_sum4_fp32.comp
│   │   │   │   ├── layernorm_reduce_sum4_fp32_pack4.comp
│   │   │   │   ├── layernorm_sub_mean_square.comp
│   │   │   │   ├── layernorm_sub_mean_square_pack4.comp
│   │   │   │   ├── lrn_norm.comp
│   │   │   │   ├── lrn_norm_across_channel_pack4.comp
│   │   │   │   ├── lrn_norm_within_channel_pack4.comp
│   │   │   │   ├── lrn_square_pad.comp
│   │   │   │   ├── lrn_square_pad_across_channel_pack4.comp
│   │   │   │   ├── lrn_square_pad_within_channel_pack4.comp
│   │   │   │   ├── mish.comp
│   │   │   │   ├── multiheadattention_qk_cross.comp
│   │   │   │   ├── multiheadattention_qk_cross_pack1to4.comp
│   │   │   │   ├── multiheadattention_qk_cross_pack4.comp
│   │   │   │   ├── multiheadattention_qk_cross_pack4to1.comp
│   │   │   │   ├── multiheadattention_qkv_cross.comp
│   │   │   │   ├── multiheadattention_qkv_cross_pack1to4.comp
│   │   │   │   ├── multiheadattention_qkv_cross_pack4.comp
│   │   │   │   ├── multiheadattention_qkv_cross_pack4to1.comp
│   │   │   │   ├── normalize_coeffs.comp
│   │   │   │   ├── normalize_coeffs_pack4.comp
│   │   │   │   ├── normalize_norm.comp
│   │   │   │   ├── normalize_norm_pack4.comp
│   │   │   │   ├── normalize_reduce_sum4_fp16_to_fp32.comp
│   │   │   │   ├── normalize_reduce_sum4_fp16_to_fp32_pack4.comp
│   │   │   │   ├── normalize_reduce_sum4_fp32.comp
│   │   │   │   ├── normalize_reduce_sum4_fp32_pack4.comp
│   │   │   │   ├── packing.comp
│   │   │   │   ├── packing_int8.comp
│   │   │   │   ├── packing_pack1to4.comp
│   │   │   │   ├── packing_pack1to4_int8.comp
│   │   │   │   ├── packing_pack4to1.comp
│   │   │   │   ├── packing_pack4to1_int8.comp
│   │   │   │   ├── padding.comp
│   │   │   │   ├── padding_3d.comp
│   │   │   │   ├── padding_3d_pack4.comp
│   │   │   │   ├── padding_pack1to4.comp
│   │   │   │   ├── padding_pack4.comp
│   │   │   │   ├── padding_pack4to1.comp
│   │   │   │   ├── permute.comp
│   │   │   │   ├── permute_pack1to4.comp
│   │   │   │   ├── permute_pack4.comp
│   │   │   │   ├── permute_pack4to1.comp
│   │   │   │   ├── pixelshuffle.comp
│   │   │   │   ├── pixelshuffle_pack4.comp
│   │   │   │   ├── pixelshuffle_pack4to1.comp
│   │   │   │   ├── pooling.comp
│   │   │   │   ├── pooling_adaptive.comp
│   │   │   │   ├── pooling_adaptive_pack4.comp
│   │   │   │   ├── pooling_global_reduce_max.comp
│   │   │   │   ├── pooling_global_reduce_max_first.comp
│   │   │   │   ├── pooling_global_reduce_max_first_pack4.comp
│   │   │   │   ├── pooling_global_reduce_max_last.comp
│   │   │   │   ├── pooling_global_reduce_max_last_pack4.comp
│   │   │   │   ├── pooling_global_reduce_max_pack4.comp
│   │   │   │   ├── pooling_global_reduce_sum.comp
│   │   │   │   ├── pooling_global_reduce_sum_first.comp
│   │   │   │   ├── pooling_global_reduce_sum_first_pack4.comp
│   │   │   │   ├── pooling_global_reduce_sum_last.comp
│   │   │   │   ├── pooling_global_reduce_sum_last_pack4.comp
│   │   │   │   ├── pooling_global_reduce_sum_pack4.comp
│   │   │   │   ├── pooling_pack4.comp
│   │   │   │   ├── prelu.comp
│   │   │   │   ├── prelu_pack4.comp
│   │   │   │   ├── priorbox.comp
│   │   │   │   ├── priorbox_mxnet.comp
│   │   │   │   ├── quantize.comp
│   │   │   │   ├── quantize_pack4.comp
│   │   │   │   ├── reduction.comp
│   │   │   │   ├── relu.comp
│   │   │   │   ├── reorg.comp
│   │   │   │   ├── reorg_pack1to4.comp
│   │   │   │   ├── reorg_pack4.comp
│   │   │   │   ├── requantize.comp
│   │   │   │   ├── requantize_pack4.comp
│   │   │   │   ├── reshape.comp
│   │   │   │   ├── reshape_pack1to4.comp
│   │   │   │   ├── reshape_pack4.comp
│   │   │   │   ├── reshape_pack4to1.comp
│   │   │   │   ├── rmsnorm_coeffs.comp
│   │   │   │   ├── rmsnorm_coeffs_pack4.comp
│   │   │   │   ├── rmsnorm_norm.comp
│   │   │   │   ├── rmsnorm_norm_pack4.comp
│   │   │   │   ├── rmsnorm_square.comp
│   │   │   │   ├── rmsnorm_square_pack4.comp
│   │   │   │   ├── rotaryembed.comp
│   │   │   │   ├── rotaryembed_pack4.comp
│   │   │   │   ├── scale.comp
│   │   │   │   ├── scale_pack4.comp
│   │   │   │   ├── sdpa_cross.comp
│   │   │   │   ├── sdpa_cross_cm.comp
│   │   │   │   ├── sdpa_fa.comp
│   │   │   │   ├── sdpa_fa_cm.comp
│   │   │   │   ├── selu.comp
│   │   │   │   ├── shrink.comp
│   │   │   │   ├── shufflechannel.comp
│   │   │   │   ├── shufflechannel_pack4.comp
│   │   │   │   ├── sigmoid.comp
│   │   │   │   ├── slice.comp
│   │   │   │   ├── slice_pack1to4.comp
│   │   │   │   ├── slice_pack4.comp
│   │   │   │   ├── softmax_div_sum.comp
│   │   │   │   ├── softmax_div_sum_pack4.comp
│   │   │   │   ├── softmax_exp_sub_max.comp
│   │   │   │   ├── softmax_exp_sub_max_pack4.comp
│   │   │   │   ├── softmax_reduce_max.comp
│   │   │   │   ├── softmax_reduce_max_pack4.comp
│   │   │   │   ├── softmax_reduce_sum.comp
│   │   │   │   ├── softmax_reduce_sum_pack4.comp
│   │   │   │   ├── softplus.comp
│   │   │   │   ├── swish.comp
│   │   │   │   ├── tanh.comp
│   │   │   │   ├── unaryop.comp
│   │   │   │   ├── unfold_im2col.comp
│   │   │   │   ├── unfold_im2col_pack1to4.comp
│   │   │   │   ├── unfold_im2col_pack4.comp
│   │   │   │   ├── unfold_im2col_pack4to1.comp
│   │   │   │   └── vulkan_activation.comp
│   │   │   ├── shrink_vulkan.cpp
│   │   │   ├── shrink_vulkan.h
│   │   │   ├── shufflechannel_vulkan.cpp
│   │   │   ├── shufflechannel_vulkan.h
│   │   │   ├── sigmoid_vulkan.cpp
│   │   │   ├── sigmoid_vulkan.h
│   │   │   ├── slice_vulkan.cpp
│   │   │   ├── slice_vulkan.h
│   │   │   ├── softmax_vulkan.cpp
│   │   │   ├── softmax_vulkan.h
│   │   │   ├── softplus_vulkan.cpp
│   │   │   ├── softplus_vulkan.h
│   │   │   ├── split_vulkan.cpp
│   │   │   ├── split_vulkan.h
│   │   │   ├── swish_vulkan.cpp
│   │   │   ├── swish_vulkan.h
│   │   │   ├── tanh_vulkan.cpp
│   │   │   ├── tanh_vulkan.h
│   │   │   ├── unaryop_vulkan.cpp
│   │   │   ├── unaryop_vulkan.h
│   │   │   ├── unfold_vulkan.cpp
│   │   │   └── unfold_vulkan.h
│   │   ├── x86/
│   │   │   ├── absval_x86.cpp
│   │   │   ├── absval_x86.h
│   │   │   ├── avx512_mathfun.h
│   │   │   ├── avx_mathfun.h
│   │   │   ├── batchnorm_bf16s.h
│   │   │   ├── batchnorm_x86.cpp
│   │   │   ├── batchnorm_x86.h
│   │   │   ├── batchnorm_x86_avx512bf16.cpp
│   │   │   ├── bias_x86.cpp
│   │   │   ├── bias_x86.h
│   │   │   ├── binaryop_bf16s.h
│   │   │   ├── binaryop_functor.h
│   │   │   ├── binaryop_x86.cpp
│   │   │   ├── binaryop_x86.h
│   │   │   ├── binaryop_x86_avx512bf16.cpp
│   │   │   ├── bnll_x86.cpp
│   │   │   ├── bnll_x86.h
│   │   │   ├── cast_bf16.h
│   │   │   ├── cast_fp16.h
│   │   │   ├── cast_x86.cpp
│   │   │   ├── cast_x86.h
│   │   │   ├── cast_x86_avx2.cpp
│   │   │   ├── cast_x86_avx512bf16.cpp
│   │   │   ├── cast_x86_f16c.cpp
│   │   │   ├── clip_bf16s.h
│   │   │   ├── clip_x86.cpp
│   │   │   ├── clip_x86.h
│   │   │   ├── clip_x86_avx512bf16.cpp
│   │   │   ├── concat_x86.cpp
│   │   │   ├── concat_x86.h
│   │   │   ├── convolution1d_packed.h
│   │   │   ├── convolution1d_x86.cpp
│   │   │   ├── convolution1d_x86.h
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_2x2_pack8.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_int8.h
│   │   │   ├── convolution_3x3_pack16to1.h
│   │   │   ├── convolution_3x3_pack1to4.h
│   │   │   ├── convolution_3x3_pack1to8.h
│   │   │   ├── convolution_3x3_pack8.h
│   │   │   ├── convolution_3x3_pack8to1.h
│   │   │   ├── convolution_3x3_winograd.h
│   │   │   ├── convolution_3x3_winograd_int8.h
│   │   │   ├── convolution_5x5.h
│   │   │   ├── convolution_im2col_gemm.h
│   │   │   ├── convolution_im2col_gemm_int8.h
│   │   │   ├── convolution_packed.h
│   │   │   ├── convolution_packed_int8.h
│   │   │   ├── convolution_x86.cpp
│   │   │   ├── convolution_x86.h
│   │   │   ├── convolution_x86_avx2.cpp
│   │   │   ├── convolution_x86_avx512vnni.cpp
│   │   │   ├── convolution_x86_avxvnni.cpp
│   │   │   ├── convolution_x86_avxvnniint8.cpp
│   │   │   ├── convolution_x86_xop.cpp
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_int8.h
│   │   │   ├── convolutiondepthwise_3x3_pack16.h
│   │   │   ├── convolutiondepthwise_3x3_pack4.h
│   │   │   ├── convolutiondepthwise_3x3_pack8.h
│   │   │   ├── convolutiondepthwise_5x5_pack16.h
│   │   │   ├── convolutiondepthwise_5x5_pack4.h
│   │   │   ├── convolutiondepthwise_5x5_pack8.h
│   │   │   ├── convolutiondepthwise_x86.cpp
│   │   │   ├── convolutiondepthwise_x86.h
│   │   │   ├── crop_x86.cpp
│   │   │   ├── crop_x86.h
│   │   │   ├── deconvolution_packed.h
│   │   │   ├── deconvolution_x86.cpp
│   │   │   ├── deconvolution_x86.h
│   │   │   ├── deconvolutiondepthwise_x86.cpp
│   │   │   ├── deconvolutiondepthwise_x86.h
│   │   │   ├── deformableconv2d_packed.h
│   │   │   ├── deformableconv2d_x86.cpp
│   │   │   ├── deformableconv2d_x86.h
│   │   │   ├── dequantize_x86.cpp
│   │   │   ├── dequantize_x86.h
│   │   │   ├── dropout_x86.cpp
│   │   │   ├── dropout_x86.h
│   │   │   ├── eltwise_x86.cpp
│   │   │   ├── eltwise_x86.h
│   │   │   ├── elu_x86.cpp
│   │   │   ├── elu_x86.h
│   │   │   ├── erf_x86.cpp
│   │   │   ├── erf_x86.h
│   │   │   ├── flatten_x86.cpp
│   │   │   ├── flatten_x86.h
│   │   │   ├── gelu_x86.cpp
│   │   │   ├── gelu_x86.h
│   │   │   ├── gemm_bf16s.h
│   │   │   ├── gemm_int8.h
│   │   │   ├── gemm_x86.cpp
│   │   │   ├── gemm_x86.h
│   │   │   ├── gemm_x86_avx2.cpp
│   │   │   ├── gemm_x86_avx512vnni.cpp
│   │   │   ├── gemm_x86_avxvnni.cpp
│   │   │   ├── gemm_x86_avxvnniint8.cpp
│   │   │   ├── gemm_x86_xop.cpp
│   │   │   ├── gridsample_bicubic_apply_interpolation.h
│   │   │   ├── gridsample_bicubic_compute_blob.h
│   │   │   ├── gridsample_bilinear_apply_interpolation.h
│   │   │   ├── gridsample_bilinear_compute_blob.h
│   │   │   ├── gridsample_compute_blob.h
│   │   │   ├── gridsample_nearest_apply_interpolation.h
│   │   │   ├── gridsample_nearest_compute_blob.h
│   │   │   ├── gridsample_x86.cpp
│   │   │   ├── gridsample_x86.h
│   │   │   ├── groupnorm_bf16s.h
│   │   │   ├── groupnorm_x86.cpp
│   │   │   ├── groupnorm_x86.h
│   │   │   ├── groupnorm_x86_avx512bf16.cpp
│   │   │   ├── hardsigmoid_x86.cpp
│   │   │   ├── hardsigmoid_x86.h
│   │   │   ├── hardswish_x86.cpp
│   │   │   ├── hardswish_x86.h
│   │   │   ├── innerproduct_fp.h
│   │   │   ├── innerproduct_gemm_fp.h
│   │   │   ├── innerproduct_x86.cpp
│   │   │   ├── innerproduct_x86.h
│   │   │   ├── innerproduct_x86_f16c.cpp
│   │   │   ├── instancenorm_bf16s.h
│   │   │   ├── instancenorm_x86.cpp
│   │   │   ├── instancenorm_x86.h
│   │   │   ├── instancenorm_x86_avx512bf16.cpp
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_pack16.h
│   │   │   ├── interp_bicubic_pack4.h
│   │   │   ├── interp_bicubic_pack8.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_pack16.h
│   │   │   ├── interp_bilinear_pack4.h
│   │   │   ├── interp_bilinear_pack8.h
│   │   │   ├── interp_x86.cpp
│   │   │   ├── interp_x86.h
│   │   │   ├── interp_x86_avx2.cpp
│   │   │   ├── layernorm_bf16s.h
│   │   │   ├── layernorm_x86.cpp
│   │   │   ├── layernorm_x86.h
│   │   │   ├── layernorm_x86_avx512bf16.cpp
│   │   │   ├── lrn_x86.cpp
│   │   │   ├── lrn_x86.h
│   │   │   ├── lstm_int8.h
│   │   │   ├── lstm_x86.cpp
│   │   │   ├── lstm_x86.h
│   │   │   ├── lstm_x86_avx2.cpp
│   │   │   ├── lstm_x86_avx512vnni.cpp
│   │   │   ├── lstm_x86_avxvnni.cpp
│   │   │   ├── lstm_x86_xop.cpp
│   │   │   ├── matmul_x86.cpp
│   │   │   ├── matmul_x86.h
│   │   │   ├── mish_x86.cpp
│   │   │   ├── mish_x86.h
│   │   │   ├── multiheadattention_x86.cpp
│   │   │   ├── multiheadattention_x86.h
│   │   │   ├── packing_x86.cpp
│   │   │   ├── packing_x86.h
│   │   │   ├── padding_pack16.h
│   │   │   ├── padding_pack16_bf16s_fp16s.h
│   │   │   ├── padding_pack4.h
│   │   │   ├── padding_pack4_bf16s_fp16s.h
│   │   │   ├── padding_pack8.h
│   │   │   ├── padding_pack8_bf16s_fp16s.h
│   │   │   ├── padding_pack8_int8.h
│   │   │   ├── padding_x86.cpp
│   │   │   ├── padding_x86.h
│   │   │   ├── pooling_2x2.h
│   │   │   ├── pooling_2x2_pack16.h
│   │   │   ├── pooling_2x2_pack4.h
│   │   │   ├── pooling_2x2_pack8.h
│   │   │   ├── pooling_3x3_pack16.h
│   │   │   ├── pooling_3x3_pack4.h
│   │   │   ├── pooling_3x3_pack8.h
│   │   │   ├── pooling_x86.cpp
│   │   │   ├── pooling_x86.h
│   │   │   ├── prelu_bf16s.h
│   │   │   ├── prelu_x86.cpp
│   │   │   ├── prelu_x86.h
│   │   │   ├── prelu_x86_avx512bf16.cpp
│   │   │   ├── quantize_x86.cpp
│   │   │   ├── quantize_x86.h
│   │   │   ├── relu_bf16s.h
│   │   │   ├── relu_x86.cpp
│   │   │   ├── relu_x86.h
│   │   │   ├── relu_x86_avx512bf16.cpp
│   │   │   ├── requantize_x86.cpp
│   │   │   ├── requantize_x86.h
│   │   │   ├── reshape_x86.cpp
│   │   │   ├── reshape_x86.h
│   │   │   ├── rmsnorm_bf16s.h
│   │   │   ├── rmsnorm_x86.cpp
│   │   │   ├── rmsnorm_x86.h
│   │   │   ├── rmsnorm_x86_avx512bf16.cpp
│   │   │   ├── roialign_x86.cpp
│   │   │   ├── roialign_x86.h
│   │   │   ├── rotaryembed_x86.cpp
│   │   │   ├── rotaryembed_x86.h
│   │   │   ├── scale_bf16s.h
│   │   │   ├── scale_x86.cpp
│   │   │   ├── scale_x86.h
│   │   │   ├── scale_x86_avx512bf16.cpp
│   │   │   ├── sdpa_x86.cpp
│   │   │   ├── sdpa_x86.h
│   │   │   ├── selu_x86.cpp
│   │   │   ├── selu_x86.h
│   │   │   ├── shufflechannel_x86.cpp
│   │   │   ├── shufflechannel_x86.h
│   │   │   ├── sigmoid_bf16s.h
│   │   │   ├── sigmoid_x86.cpp
│   │   │   ├── sigmoid_x86.h
│   │   │   ├── sigmoid_x86_avx512bf16.cpp
│   │   │   ├── slice_x86.cpp
│   │   │   ├── slice_x86.h
│   │   │   ├── softmax_bf16s.h
│   │   │   ├── softmax_x86.cpp
│   │   │   ├── softmax_x86.h
│   │   │   ├── softmax_x86_avx512bf16.cpp
│   │   │   ├── sse_mathfun.h
│   │   │   ├── swish_bf16s.h
│   │   │   ├── swish_x86.cpp
│   │   │   ├── swish_x86.h
│   │   │   ├── swish_x86_avx512bf16.cpp
│   │   │   ├── tanh_x86.cpp
│   │   │   ├── tanh_x86.h
│   │   │   ├── unaryop_bf16s.h
│   │   │   ├── unaryop_functor.h
│   │   │   ├── unaryop_x86.cpp
│   │   │   ├── unaryop_x86.h
│   │   │   ├── unaryop_x86_avx512bf16.cpp
│   │   │   ├── x86_activation.h
│   │   │   ├── x86_usability.h
│   │   │   ├── yolov3detectionoutput_x86.cpp
│   │   │   └── yolov3detectionoutput_x86.h
│   │   ├── yolodetectionoutput.cpp
│   │   ├── yolodetectionoutput.h
│   │   ├── yolov3detectionoutput.cpp
│   │   └── yolov3detectionoutput.h
│   ├── layer.cpp
│   ├── layer.h
│   ├── layer_declaration.h.in
│   ├── layer_registry.h.in
│   ├── layer_shader_registry.h.in
│   ├── layer_shader_spv_data.h.in
│   ├── layer_shader_type.h
│   ├── layer_shader_type_enum.h.in
│   ├── layer_type.h
│   ├── layer_type_enum.h.in
│   ├── mat.cpp
│   ├── mat.h
│   ├── mat_pixel.cpp
│   ├── mat_pixel_affine.cpp
│   ├── mat_pixel_android.cpp
│   ├── mat_pixel_drawing.cpp
│   ├── mat_pixel_drawing_font.h
│   ├── mat_pixel_resize.cpp
│   ├── mat_pixel_rotate.cpp
│   ├── modelbin.cpp
│   ├── modelbin.h
│   ├── ncnn.pc.in
│   ├── net.cpp
│   ├── net.h
│   ├── option.cpp
│   ├── option.h
│   ├── paramdict.cpp
│   ├── paramdict.h
│   ├── pipeline.cpp
│   ├── pipeline.h
│   ├── pipelinecache.cpp
│   ├── pipelinecache.h
│   ├── platform.h.in
│   ├── ruapu.h
│   ├── simplemath.cpp
│   ├── simplemath.h
│   ├── simpleocv.cpp
│   ├── simpleocv.h
│   ├── simpleomp.cpp
│   ├── simpleomp.h
│   ├── simplestl.cpp
│   ├── simplestl.h
│   ├── simplevk.cpp
│   ├── simplevk.h
│   ├── simplevk.tbd
│   ├── stb_image.h
│   ├── stb_image_write.h
│   └── vulkan_header_fix.h
├── tests/
│   ├── CMakeLists.txt
│   ├── perf/
│   │   ├── CMakeLists.txt
│   │   ├── perf_batchnorm.cpp
│   │   ├── perf_binaryop.cpp
│   │   ├── perf_concat.cpp
│   │   ├── perf_convolution.cpp
│   │   ├── perf_convolutiondepthwise.cpp
│   │   ├── perf_deconvolution.cpp
│   │   ├── perf_innerproduct.cpp
│   │   ├── perf_pooling.cpp
│   │   ├── perf_relu.cpp
│   │   ├── perf_sigmoid.cpp
│   │   ├── perf_softmax.cpp
│   │   ├── perfutil.cpp
│   │   └── perfutil.h
│   ├── prng.h
│   ├── test_absval.cpp
│   ├── test_batchnorm.cpp
│   ├── test_bias.cpp
│   ├── test_binaryop.cpp
│   ├── test_binaryop_1.cpp
│   ├── test_binaryop_2.cpp
│   ├── test_binaryop_3.cpp
│   ├── test_binaryop_4.cpp
│   ├── test_bnll.cpp
│   ├── test_c_api.cpp
│   ├── test_cast.cpp
│   ├── test_celu.cpp
│   ├── test_clip.cpp
│   ├── test_command.cpp
│   ├── test_concat.cpp
│   ├── test_concat_oom.cpp
│   ├── test_convolution.cpp
│   ├── test_convolution1d.cpp
│   ├── test_convolution3d.cpp
│   ├── test_convolution_1.cpp
│   ├── test_convolution_2.cpp
│   ├── test_convolution_3.cpp
│   ├── test_convolution_oom.cpp
│   ├── test_convolutiondepthwise.cpp
│   ├── test_convolutiondepthwise1d.cpp
│   ├── test_convolutiondepthwise3d.cpp
│   ├── test_convolutiondepthwise_1.cpp
│   ├── test_copyto.cpp
│   ├── test_copyto_1.cpp
│   ├── test_cpu.cpp
│   ├── test_crop.cpp
│   ├── test_crop_1.cpp
│   ├── test_crop_2.cpp
│   ├── test_crop_3.cpp
│   ├── test_crop_oom.cpp
│   ├── test_cumulativesum.cpp
│   ├── test_deconvolution.cpp
│   ├── test_deconvolution1d.cpp
│   ├── test_deconvolution3d.cpp
│   ├── test_deconvolutiondepthwise.cpp
│   ├── test_deconvolutiondepthwise1d.cpp
│   ├── test_deconvolutiondepthwise3d.cpp
│   ├── test_deconvolutiondepthwise_1.cpp
│   ├── test_deepcopy.cpp
│   ├── test_deformableconv2d.cpp
│   ├── test_deformableconv2d_1.cpp
│   ├── test_deformableconv2d_2.cpp
│   ├── test_deformableconv2d_3.cpp
│   ├── test_deformableconv2d_4.cpp
│   ├── test_dequantize.cpp
│   ├── test_diag.cpp
│   ├── test_dropout.cpp
│   ├── test_einsum.cpp
│   ├── test_eltwise.cpp
│   ├── test_elu.cpp
│   ├── test_embed.cpp
│   ├── test_erf.cpp
│   ├── test_expanddims.cpp
│   ├── test_expression.cpp
│   ├── test_flatten.cpp
│   ├── test_flip.cpp
│   ├── test_fold.cpp
│   ├── test_gelu.cpp
│   ├── test_gemm_0.h
│   ├── test_gemm_0a.cpp
│   ├── test_gemm_0b.cpp
│   ├── test_gemm_0c.cpp
│   ├── test_gemm_0d.cpp
│   ├── test_gemm_0e.cpp
│   ├── test_gemm_0f.cpp
│   ├── test_gemm_1.h
│   ├── test_gemm_1a.cpp
│   ├── test_gemm_1b.cpp
│   ├── test_gemm_2.h
│   ├── test_gemm_2a.cpp
│   ├── test_gemm_2b.cpp
│   ├── test_gemm_2c.cpp
│   ├── test_gemm_2d.cpp
│   ├── test_gemm_2e.cpp
│   ├── test_gemm_3.cpp
│   ├── test_gemm_4.cpp
│   ├── test_gemm_nt.cpp
│   ├── test_gemm_oom.cpp
│   ├── test_glu.cpp
│   ├── test_gridsample.cpp
│   ├── test_groupnorm.cpp
│   ├── test_gru.cpp
│   ├── test_hardsigmoid.cpp
│   ├── test_hardswish.cpp
│   ├── test_innerproduct.cpp
│   ├── test_instancenorm.cpp
│   ├── test_interp.cpp
│   ├── test_interp_1.cpp
│   ├── test_inversespectrogram.cpp
│   ├── test_layernorm.cpp
│   ├── test_lrn.cpp
│   ├── test_lstm.cpp
│   ├── test_mat_pixel.cpp
│   ├── test_mat_pixel_affine.cpp
│   ├── test_mat_pixel_drawing.cpp
│   ├── test_mat_pixel_resize.cpp
│   ├── test_mat_pixel_rotate.cpp
│   ├── test_matmul.cpp
│   ├── test_memorydata.cpp
│   ├── test_mish.cpp
│   ├── test_multiheadattention.cpp
│   ├── test_multiheadattention_1.cpp
│   ├── test_multiheadattention_kvcache.cpp
│   ├── test_multiheadattention_oom.cpp
│   ├── test_noop.cpp
│   ├── test_normalize.cpp
│   ├── test_packing.cpp
│   ├── test_padding.cpp
│   ├── test_paramdict.cpp
│   ├── test_permute.cpp
│   ├── test_pixelshuffle.cpp
│   ├── test_pooling.cpp
│   ├── test_pooling1d.cpp
│   ├── test_pooling3d.cpp
│   ├── test_power.cpp
│   ├── test_prelu.cpp
│   ├── test_priorbox.cpp
│   ├── test_quantize.cpp
│   ├── test_quantize_oom.cpp
│   ├── test_reduction.cpp
│   ├── test_relu.cpp
│   ├── test_reorg.cpp
│   ├── test_requantize.cpp
│   ├── test_requantize_oom.cpp
│   ├── test_reshape.cpp
│   ├── test_reshape_1.cpp
│   ├── test_reshape_oom.cpp
│   ├── test_rmsnorm.cpp
│   ├── test_rnn.cpp
│   ├── test_roialign.cpp
│   ├── test_roipooling.cpp
│   ├── test_rotaryembed.cpp
│   ├── test_rotaryembed_oom.cpp
│   ├── test_scale.cpp
│   ├── test_sdpa.cpp
│   ├── test_sdpa_kvcache.cpp
│   ├── test_sdpa_oom.cpp
│   ├── test_selu.cpp
│   ├── test_shrink.cpp
│   ├── test_shufflechannel.cpp
│   ├── test_sigmoid.cpp
│   ├── test_slice.cpp
│   ├── test_slice_oom.cpp
│   ├── test_softmax.cpp
│   ├── test_softmax_oom.cpp
│   ├── test_softplus.cpp
│   ├── test_spectrogram.cpp
│   ├── test_squeeze.cpp
│   ├── test_squeezenet.cpp
│   ├── test_swish.cpp
│   ├── test_tanh.cpp
│   ├── test_tile.cpp
│   ├── test_tile_oom.cpp
│   ├── test_unaryop.cpp
│   ├── test_unfold.cpp
│   ├── test_yolov3detectionoutput.cpp
│   ├── testutil.cpp
│   └── testutil.h
├── toolchains/
│   ├── aarch64-linux-gnu-c.toolchain.cmake
│   ├── aarch64-linux-gnu.toolchain.cmake
│   ├── aarch64-qnx.toolchain.cmake
│   ├── anykav500.toolchain.cmake
│   ├── arm-linux-gnueabi-c.toolchain.cmake
│   ├── arm-linux-gnueabi.toolchain.cmake
│   ├── arm-linux-gnueabihf-vfpv3-d16.toolchain.cmake
│   ├── arm-linux-gnueabihf.toolchain.cmake
│   ├── c906-v310.toolchain.cmake
│   ├── c907-rv32-v310.toolchain.cmake
│   ├── c907-v310.toolchain.cmake
│   ├── c908-v310.toolchain.cmake
│   ├── c910-v310.toolchain.cmake
│   ├── esp32.toolchain.cmake
│   ├── himix100.toolchain.cmake
│   ├── himix200.toolchain.cmake
│   ├── himix210.toolchain.cmake
│   ├── hisiv300.toolchain.cmake
│   ├── hisiv500.toolchain.cmake
│   ├── hisiv600.toolchain.cmake
│   ├── host-c.clang.toolchain.cmake
│   ├── host-c.gcc.toolchain.cmake
│   ├── host.clang-m32.toolchain.cmake
│   ├── host.gcc-c++03.toolchain.cmake
│   ├── host.gcc-m32.toolchain.cmake
│   ├── host.gcc.toolchain.cmake
│   ├── ingenic-x2000.toolchain.cmake
│   ├── ios.toolchain.cmake
│   ├── iossimxc-x64.toolchain.cmake
│   ├── iossimxc.toolchain.cmake
│   ├── iosxc-arm64.toolchain.cmake
│   ├── iosxc.toolchain.cmake
│   ├── jetson.toolchain.cmake
│   ├── k1.llvm.toolchain.cmake
│   ├── k1.toolchain.cmake
│   ├── loongarch64-linux-gnu.toolchain.cmake
│   ├── loongarch64-unknown-linux-gnu.toolchain.cmake
│   ├── loongson2f-linux-gnuabi64.toolchain.cmake
│   ├── mips-mti-linux-gnu.toolchain.cmake
│   ├── mips32r2-linux-gnu.toolchain.cmake
│   ├── mips64el-linux-gnuabi64.toolchain.cmake
│   ├── mipsel-linux-gnu.toolchain.cmake
│   ├── mipsisa32r6el-linux-gnu.toolchain.cmake
│   ├── mipsisa64r6el-linux-gnuabi64.toolchain.cmake
│   ├── pi3.toolchain.cmake
│   ├── power8le-linux-gnu-vsx.clang.toolchain.cmake
│   ├── power8le-linux-gnu-vsx.toolchain.cmake
│   ├── power9le-linux-gnu-vsx.clang.toolchain.cmake
│   ├── power9le-linux-gnu-vsx.toolchain.cmake
│   ├── powerpc-linux-gnu.toolchain.cmake
│   ├── powerpc64le-linux-gnu.toolchain.cmake
│   ├── riscv32-unknown-elf.toolchain.cmake
│   ├── riscv64-linux-gnu.toolchain.cmake
│   ├── riscv64-unknown-elf.toolchain.cmake
│   ├── riscv64-unknown-linux-gnu.llvm-toolchain.cmake
│   ├── riscv64-unknown-linux-gnu.toolchain.cmake
│   ├── v831.toolchain.cmake
│   ├── windows-xp-clang.toolchain.cmake
│   ├── windows-xp-mingw.toolchain.cmake
│   └── windows-xp-msvc.toolchain.cmake
└── tools/
    ├── CMakeLists.txt
    ├── caffe/
    │   ├── CMakeLists.txt
    │   ├── caffe.proto
    │   └── caffe2ncnn.cpp
    ├── darknet/
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   └── darknet2ncnn.cpp
    ├── keras/
    │   └── readme.md
    ├── mlir/
    │   ├── CMakeLists.txt
    │   ├── fix_td.sh
    │   ├── mlir2ncnn.cpp
    │   ├── ncnn_dialect.cpp
    │   ├── ncnn_dialect.h
    │   ├── ncnn_ops.td
    │   ├── ncnn_rewriter.cpp
    │   ├── ncnn_rewriter.td
    │   ├── tf_attributes.cc
    │   ├── tf_attributes.h
    │   ├── tf_dialect.cpp
    │   ├── tf_dialect.h
    │   ├── tf_generated_ops.td
    │   ├── tf_op_base.td
    │   ├── tf_ops.td
    │   ├── tf_side_effects.h
    │   ├── tf_traits.h
    │   ├── tf_types.cc
    │   ├── tf_types.def
    │   └── tf_types.h
    ├── modelwriter.h
    ├── mxnet/
    │   ├── CMakeLists.txt
    │   └── mxnet2ncnn.cpp
    ├── ncnn2mem.cpp
    ├── ncnnmerge.cpp
    ├── ncnnoptimize.cpp
    ├── onnx/
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── onnx.proto
    │   └── onnx2ncnn.cpp
    ├── plugin/
    │   ├── ImageWatchNCNN.natvis
    │   ├── ImageWatchNNIE.natvis
    │   └── README.md
    ├── pnnx/
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── cmake/
    │   │   └── PNNXPyTorch.cmake
    │   ├── python/
    │   │   ├── README.md
    │   │   ├── examples/
    │   │   │   ├── convert.py
    │   │   │   └── export.py
    │   │   ├── pnnx/
    │   │   │   ├── __init__.py
    │   │   │   └── utils/
    │   │   │       ├── __init__.py
    │   │   │       ├── convert.py
    │   │   │       ├── export.py
    │   │   │       └── utils.py
    │   │   ├── requirements.txt
    │   │   ├── setup.py
    │   │   └── tests/
    │   │       ├── test_convert.py
    │   │       ├── test_dynamicinput_convert.py
    │   │       ├── test_dynamicinput_export.py
    │   │       ├── test_export.py
    │   │       ├── test_naiveinput_convert.py
    │   │       └── test_naiveinput_export.py
    │   ├── src/
    │   │   ├── CMakeLists.txt
    │   │   ├── ir.cpp
    │   │   ├── ir.h
    │   │   ├── load_onnx.cpp
    │   │   ├── load_onnx.h
    │   │   ├── load_tnn.cpp
    │   │   ├── load_tnn.h
    │   │   ├── load_torchscript.cpp
    │   │   ├── load_torchscript.h
    │   │   ├── main.cpp
    │   │   ├── onnx-data.proto
    │   │   ├── onnx-ml.proto
    │   │   ├── onnx-operators-ml.proto
    │   │   ├── pass_level0/
    │   │   │   ├── constant_unpooling.cpp
    │   │   │   ├── constant_unpooling.h
    │   │   │   ├── convert_half_to_float.cpp
    │   │   │   ├── convert_half_to_float.h
    │   │   │   ├── flatten_input.cpp
    │   │   │   ├── flatten_input.h
    │   │   │   ├── inline_block.cpp
    │   │   │   ├── inline_block.h
    │   │   │   ├── reset_device.cpp
    │   │   │   ├── reset_device.h
    │   │   │   ├── shape_inference.cpp
    │   │   │   └── shape_inference.h
    │   │   ├── pass_level0.cpp
    │   │   ├── pass_level0.h
    │   │   ├── pass_level1/
    │   │   │   ├── fuse_module_pass.cpp
    │   │   │   ├── fuse_module_pass.h
    │   │   │   ├── nn_AdaptiveAvgPool1d.cpp
    │   │   │   ├── nn_AdaptiveAvgPool2d.cpp
    │   │   │   ├── nn_AdaptiveAvgPool3d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool1d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool2d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool3d.cpp
    │   │   │   ├── nn_AlphaDropout.cpp
    │   │   │   ├── nn_AvgPool1d.cpp
    │   │   │   ├── nn_AvgPool2d.cpp
    │   │   │   ├── nn_AvgPool3d.cpp
    │   │   │   ├── nn_BatchNorm1d.cpp
    │   │   │   ├── nn_BatchNorm2d.cpp
    │   │   │   ├── nn_BatchNorm3d.cpp
    │   │   │   ├── nn_CELU.cpp
    │   │   │   ├── nn_ChannelShuffle.cpp
    │   │   │   ├── nn_ConstantPad1d.cpp
    │   │   │   ├── nn_ConstantPad2d.cpp
    │   │   │   ├── nn_ConstantPad3d.cpp
    │   │   │   ├── nn_Conv1d.cpp
    │   │   │   ├── nn_Conv2d.cpp
    │   │   │   ├── nn_Conv3d.cpp
    │   │   │   ├── nn_ConvTranspose1d.cpp
    │   │   │   ├── nn_ConvTranspose2d.cpp
    │   │   │   ├── nn_ConvTranspose3d.cpp
    │   │   │   ├── nn_Dropout.cpp
    │   │   │   ├── nn_Dropout2d.cpp
    │   │   │   ├── nn_Dropout3d.cpp
    │   │   │   ├── nn_ELU.cpp
    │   │   │   ├── nn_Embedding.cpp
    │   │   │   ├── nn_Fold.cpp
    │   │   │   ├── nn_GELU.cpp
    │   │   │   ├── nn_GLU.cpp
    │   │   │   ├── nn_GRU.cpp
    │   │   │   ├── nn_GroupNorm.cpp
    │   │   │   ├── nn_Hardshrink.cpp
    │   │   │   ├── nn_Hardsigmoid.cpp
    │   │   │   ├── nn_Hardswish.cpp
    │   │   │   ├── nn_Hardtanh.cpp
    │   │   │   ├── nn_InstanceNorm1d.cpp
    │   │   │   ├── nn_InstanceNorm2d.cpp
    │   │   │   ├── nn_InstanceNorm3d.cpp
    │   │   │   ├── nn_LPPool1d.cpp
    │   │   │   ├── nn_LPPool2d.cpp
    │   │   │   ├── nn_LSTM.cpp
    │   │   │   ├── nn_LayerNorm.cpp
    │   │   │   ├── nn_LeakyReLU.cpp
    │   │   │   ├── nn_Linear.cpp
    │   │   │   ├── nn_LocalResponseNorm.cpp
    │   │   │   ├── nn_LogSigmoid.cpp
    │   │   │   ├── nn_LogSoftmax.cpp
    │   │   │   ├── nn_MaxPool1d.cpp
    │   │   │   ├── nn_MaxPool2d.cpp
    │   │   │   ├── nn_MaxPool3d.cpp
    │   │   │   ├── nn_Mish.cpp
    │   │   │   ├── nn_MultiheadAttention.cpp
    │   │   │   ├── nn_PReLU.cpp
    │   │   │   ├── nn_PixelShuffle.cpp
    │   │   │   ├── nn_PixelUnshuffle.cpp
    │   │   │   ├── nn_RMSNorm.cpp
    │   │   │   ├── nn_RNN.cpp
    │   │   │   ├── nn_RReLU.cpp
    │   │   │   ├── nn_ReLU.cpp
    │   │   │   ├── nn_ReLU6.cpp
    │   │   │   ├── nn_ReflectionPad1d.cpp
    │   │   │   ├── nn_ReflectionPad2d.cpp
    │   │   │   ├── nn_ReplicationPad1d.cpp
    │   │   │   ├── nn_ReplicationPad2d.cpp
    │   │   │   ├── nn_ReplicationPad3d.cpp
    │   │   │   ├── nn_SELU.cpp
    │   │   │   ├── nn_SiLU.cpp
    │   │   │   ├── nn_Sigmoid.cpp
    │   │   │   ├── nn_Softmax.cpp
    │   │   │   ├── nn_Softmax2d.cpp
    │   │   │   ├── nn_Softmin.cpp
    │   │   │   ├── nn_Softplus.cpp
    │   │   │   ├── nn_Softshrink.cpp
    │   │   │   ├── nn_Softsign.cpp
    │   │   │   ├── nn_Tanh.cpp
    │   │   │   ├── nn_Tanhshrink.cpp
    │   │   │   ├── nn_Threshold.cpp
    │   │   │   ├── nn_Unfold.cpp
    │   │   │   ├── nn_Upsample.cpp
    │   │   │   ├── nn_UpsamplingBilinear2d.cpp
    │   │   │   ├── nn_UpsamplingNearest2d.cpp
    │   │   │   ├── nn_ZeroPad2d.cpp
    │   │   │   ├── nn_maxunpool2d.cpp
    │   │   │   ├── nn_quantized_Conv2d.cpp
    │   │   │   ├── nn_quantized_DeQuantize.cpp
    │   │   │   ├── nn_quantized_Linear.cpp
    │   │   │   ├── nn_quantized_Quantize.cpp
    │   │   │   ├── torchvision_DeformConv2d.cpp
    │   │   │   └── torchvision_RoIAlign.cpp
    │   │   ├── pass_level1.cpp
    │   │   ├── pass_level1.h
    │   │   ├── pass_level2/
    │   │   │   ├── F_adaptive_avg_pool1d.cpp
    │   │   │   ├── F_adaptive_avg_pool2d.cpp
    │   │   │   ├── F_adaptive_avg_pool3d.cpp
    │   │   │   ├── F_adaptive_max_pool1d.cpp
    │   │   │   ├── F_adaptive_max_pool2d.cpp
    │   │   │   ├── F_adaptive_max_pool3d.cpp
    │   │   │   ├── F_affine_grid.cpp
    │   │   │   ├── F_alpha_dropout.cpp
    │   │   │   ├── F_avg_pool1d.cpp
    │   │   │   ├── F_avg_pool2d.cpp
    │   │   │   ├── F_avg_pool3d.cpp
    │   │   │   ├── F_batch_norm.cpp
    │   │   │   ├── F_celu.cpp
    │   │   │   ├── F_conv1d.cpp
    │   │   │   ├── F_conv2d.cpp
    │   │   │   ├── F_conv3d.cpp
    │   │   │   ├── F_conv_transpose1d.cpp
    │   │   │   ├── F_conv_transpose2d.cpp
    │   │   │   ├── F_conv_transpose3d.cpp
    │   │   │   ├── F_dropout.cpp
    │   │   │   ├── F_dropout23d.cpp
    │   │   │   ├── F_elu.cpp
    │   │   │   ├── F_embedding.cpp
    │   │   │   ├── F_feature_alpha_dropout.cpp
    │   │   │   ├── F_fold.cpp
    │   │   │   ├── F_gelu.cpp
    │   │   │   ├── F_glu.cpp
    │   │   │   ├── F_grid_sample.cpp
    │   │   │   ├── F_group_norm.cpp
    │   │   │   ├── F_hardshrink.cpp
    │   │   │   ├── F_hardsigmoid.cpp
    │   │   │   ├── F_hardswish.cpp
    │   │   │   ├── F_hardtanh.cpp
    │   │   │   ├── F_instance_norm.cpp
    │   │   │   ├── F_interpolate.cpp
    │   │   │   ├── F_layer_norm.cpp
    │   │   │   ├── F_leaky_relu.cpp
    │   │   │   ├── F_linear.cpp
    │   │   │   ├── F_local_response_norm.cpp
    │   │   │   ├── F_log_softmax.cpp
    │   │   │   ├── F_logsigmoid.cpp
    │   │   │   ├── F_lp_pool1d.cpp
    │   │   │   ├── F_lp_pool2d.cpp
    │   │   │   ├── F_max_pool1d.cpp
    │   │   │   ├── F_max_pool2d.cpp
    │   │   │   ├── F_max_pool3d.cpp
    │   │   │   ├── F_mish.cpp
    │   │   │   ├── F_normalize.cpp
    │   │   │   ├── F_pad.cpp
    │   │   │   ├── F_pairwise_distance.cpp
    │   │   │   ├── F_pixel_shuffle.cpp
    │   │   │   ├── F_pixel_unshuffle.cpp
    │   │   │   ├── F_prelu.cpp
    │   │   │   ├── F_relu.cpp
    │   │   │   ├── F_relu6.cpp
    │   │   │   ├── F_rms_norm.cpp
    │   │   │   ├── F_rrelu.cpp
    │   │   │   ├── F_scaled_dot_product_attention.cpp
    │   │   │   ├── F_selu.cpp
    │   │   │   ├── F_sigmoid.cpp
    │   │   │   ├── F_silu.cpp
    │   │   │   ├── F_softmax.cpp
    │   │   │   ├── F_softmin.cpp
    │   │   │   ├── F_softplus.cpp
    │   │   │   ├── F_softshrink.cpp
    │   │   │   ├── F_softsign.cpp
    │   │   │   ├── F_tanh.cpp
    │   │   │   ├── F_tanhshrink.cpp
    │   │   │   ├── F_threshold.cpp
    │   │   │   ├── F_unfold.cpp
    │   │   │   ├── F_upsample.cpp
    │   │   │   ├── F_upsample_bilinear.cpp
    │   │   │   ├── F_upsample_nearest.cpp
    │   │   │   ├── README.md
    │   │   │   ├── Tensor_copy.cpp
    │   │   │   ├── Tensor_expand.cpp
    │   │   │   ├── Tensor_expand_as.cpp
    │   │   │   ├── Tensor_fill.cpp
    │   │   │   ├── Tensor_index.cpp
    │   │   │   ├── Tensor_index_put.cpp
    │   │   │   ├── Tensor_masked_fill.cpp
    │   │   │   ├── Tensor_new_empty.cpp
    │   │   │   ├── Tensor_new_ones.cpp
    │   │   │   ├── Tensor_new_zeros.cpp
    │   │   │   ├── Tensor_permute.cpp
    │   │   │   ├── Tensor_repeat.cpp
    │   │   │   ├── Tensor_reshape.cpp
    │   │   │   ├── Tensor_reshape_as.cpp
    │   │   │   ├── Tensor_select.cpp
    │   │   │   ├── Tensor_size.cpp
    │   │   │   ├── Tensor_slice.cpp
    │   │   │   ├── Tensor_to.cpp
    │   │   │   ├── Tensor_type_as.cpp
    │   │   │   ├── Tensor_unflatten.cpp
    │   │   │   ├── eliminate_contiguous.cpp
    │   │   │   ├── eliminate_contiguous.h
    │   │   │   ├── eliminate_size_numtotensor_int.cpp
    │   │   │   ├── eliminate_size_numtotensor_int.h
    │   │   │   ├── functionize.cpp
    │   │   │   ├── functionize.h
    │   │   │   ├── fuse_constantlist.cpp
    │   │   │   ├── fuse_constantlist.h
    │   │   │   ├── nn_GRU.cpp
    │   │   │   ├── nn_LSTM.cpp
    │   │   │   ├── nn_RNN.cpp
    │   │   │   ├── nn_quantized_FloatFunctional.cpp
    │   │   │   ├── torch_addmm.cpp
    │   │   │   ├── torch_amax.cpp
    │   │   │   ├── torch_amin.cpp
    │   │   │   ├── torch_arange.cpp
    │   │   │   ├── torch_argmax.cpp
    │   │   │   ├── torch_argmin.cpp
    │   │   │   ├── torch_as_strided.cpp
    │   │   │   ├── torch_baddbmm.cpp
    │   │   │   ├── torch_bitwise_and.cpp
    │   │   │   ├── torch_bitwise_left_shift.cpp
    │   │   │   ├── torch_bitwise_not.cpp
    │   │   │   ├── torch_bitwise_or.cpp
    │   │   │   ├── torch_bitwise_right_shift.cpp
    │   │   │   ├── torch_bitwise_xor.cpp
    │   │   │   ├── torch_bmm.cpp
    │   │   │   ├── torch_cat.cpp
    │   │   │   ├── torch_chunk.cpp
    │   │   │   ├── torch_clamp.cpp
    │   │   │   ├── torch_clone.cpp
    │   │   │   ├── torch_complex.cpp
    │   │   │   ├── torch_cross.cpp
    │   │   │   ├── torch_cumprod.cpp
    │   │   │   ├── torch_cumsum.cpp
    │   │   │   ├── torch_dequantize.cpp
    │   │   │   ├── torch_diag.cpp
    │   │   │   ├── torch_einsum.cpp
    │   │   │   ├── torch_empty.cpp
    │   │   │   ├── torch_empty_like.cpp
    │   │   │   ├── torch_eq.cpp
    │   │   │   ├── torch_fft_fft.cpp
    │   │   │   ├── torch_fft_fft2.cpp
    │   │   │   ├── torch_fft_fftn.cpp
    │   │   │   ├── torch_fft_hfft.cpp
    │   │   │   ├── torch_fft_hfft2.cpp
    │   │   │   ├── torch_fft_hfftn.cpp
    │   │   │   ├── torch_fft_ifft.cpp
    │   │   │   ├── torch_fft_ifft2.cpp
    │   │   │   ├── torch_fft_ifftn.cpp
    │   │   │   ├── torch_fft_ihfft.cpp
    │   │   │   ├── torch_fft_ihfft2.cpp
    │   │   │   ├── torch_fft_ihfftn.cpp
    │   │   │   ├── torch_fft_irfft.cpp
    │   │   │   ├── torch_fft_irfft2.cpp
    │   │   │   ├── torch_fft_irfftn.cpp
    │   │   │   ├── torch_fft_rfft.cpp
    │   │   │   ├── torch_fft_rfft2.cpp
    │   │   │   ├── torch_fft_rfftn.cpp
    │   │   │   ├── torch_flatten.cpp
    │   │   │   ├── torch_flip.cpp
    │   │   │   ├── torch_full.cpp
    │   │   │   ├── torch_full_like.cpp
    │   │   │   ├── torch_gather.cpp
    │   │   │   ├── torch_ge.cpp
    │   │   │   ├── torch_gt.cpp
    │   │   │   ├── torch_imag.cpp
    │   │   │   ├── torch_index_select.cpp
    │   │   │   ├── torch_istft.cpp
    │   │   │   ├── torch_le.cpp
    │   │   │   ├── torch_lgamma.cpp
    │   │   │   ├── torch_logical_and.cpp
    │   │   │   ├── torch_logical_not.cpp
    │   │   │   ├── torch_logical_or.cpp
    │   │   │   ├── torch_logical_xor.cpp
    │   │   │   ├── torch_logsumexp.cpp
    │   │   │   ├── torch_lt.cpp
    │   │   │   ├── torch_masked_select.cpp
    │   │   │   ├── torch_matmul.cpp
    │   │   │   ├── torch_max.cpp
    │   │   │   ├── torch_mean.cpp
    │   │   │   ├── torch_min.cpp
    │   │   │   ├── torch_mm.cpp
    │   │   │   ├── torch_mv.cpp
    │   │   │   ├── torch_narrow.cpp
    │   │   │   ├── torch_ne.cpp
    │   │   │   ├── torch_norm.cpp
    │   │   │   ├── torch_normal.cpp
    │   │   │   ├── torch_ones.cpp
    │   │   │   ├── torch_ones_like.cpp
    │   │   │   ├── torch_positive.cpp
    │   │   │   ├── torch_prod.cpp
    │   │   │   ├── torch_quantize_per_tensor.cpp
    │   │   │   ├── torch_randn.cpp
    │   │   │   ├── torch_randn_like.cpp
    │   │   │   ├── torch_real.cpp
    │   │   │   ├── torch_repeat_interleave.cpp
    │   │   │   ├── torch_roll.cpp
    │   │   │   ├── torch_scatter_add.cpp
    │   │   │   ├── torch_slice_scatter.cpp
    │   │   │   ├── torch_split.cpp
    │   │   │   ├── torch_squeeze.cpp
    │   │   │   ├── torch_stack.cpp
    │   │   │   ├── torch_std.cpp
    │   │   │   ├── torch_stft.cpp
    │   │   │   ├── torch_sum.cpp
    │   │   │   ├── torch_t.cpp
    │   │   │   ├── torch_tensor_split.cpp
    │   │   │   ├── torch_tile.cpp
    │   │   │   ├── torch_topk.cpp
    │   │   │   ├── torch_transpose.cpp
    │   │   │   ├── torch_unbind.cpp
    │   │   │   ├── torch_unsqueeze.cpp
    │   │   │   ├── torch_var.cpp
    │   │   │   ├── torch_view_as_complex.cpp
    │   │   │   ├── torch_view_as_real.cpp
    │   │   │   ├── torch_where.cpp
    │   │   │   ├── torch_zeros.cpp
    │   │   │   ├── torch_zeros_like.cpp
    │   │   │   ├── torchaudio_F_inverse_spectrogram.cpp
    │   │   │   └── torchaudio_F_spectrogram.cpp
    │   │   ├── pass_level2.cpp
    │   │   ├── pass_level2.h
    │   │   ├── pass_level3/
    │   │   │   ├── assign_unique_name.cpp
    │   │   │   ├── assign_unique_name.h
    │   │   │   ├── eliminate_noop_math.cpp
    │   │   │   ├── eliminate_noop_math.h
    │   │   │   ├── eliminate_squeeze_unsqueeze_pair.cpp
    │   │   │   ├── eliminate_squeeze_unsqueeze_pair.h
    │   │   │   ├── eliminate_tuple_pair.cpp
    │   │   │   ├── eliminate_tuple_pair.h
    │   │   │   ├── expand_quantization_modules.cpp
    │   │   │   ├── expand_quantization_modules.h
    │   │   │   ├── fuse_dynamic_adaptive_pool.cpp
    │   │   │   ├── fuse_dynamic_adaptive_pool.h
    │   │   │   ├── fuse_einsum_operands.cpp
    │   │   │   ├── fuse_einsum_operands.h
    │   │   │   ├── fuse_expression.cpp
    │   │   │   ├── fuse_expression.h
    │   │   │   ├── fuse_index_expression.cpp
    │   │   │   ├── fuse_index_expression.h
    │   │   │   ├── fuse_maxpool_unpack.cpp
    │   │   │   ├── fuse_maxpool_unpack.h
    │   │   │   ├── fuse_multiheadattention_unpack.cpp
    │   │   │   ├── fuse_multiheadattention_unpack.h
    │   │   │   ├── fuse_op1ton_unpack.cpp
    │   │   │   ├── fuse_op1ton_unpack.h
    │   │   │   ├── fuse_opnto1_tensors.cpp
    │   │   │   ├── fuse_opnto1_tensors.h
    │   │   │   ├── fuse_rnn_unpack.cpp
    │   │   │   ├── fuse_rnn_unpack.h
    │   │   │   ├── rename_F_dropoutnd.cpp
    │   │   │   └── rename_F_dropoutnd.h
    │   │   ├── pass_level3.cpp
    │   │   ├── pass_level3.h
    │   │   ├── pass_level4/
    │   │   │   ├── attribute_pooling.cpp
    │   │   │   ├── attribute_pooling.h
    │   │   │   ├── canonicalize.cpp
    │   │   │   ├── canonicalize.h
    │   │   │   ├── dead_code_elimination.cpp
    │   │   │   ├── dead_code_elimination.h
    │   │   │   ├── fuse_custom_op.cpp
    │   │   │   └── fuse_custom_op.h
    │   │   ├── pass_level4.cpp
    │   │   ├── pass_level4.h
    │   │   ├── pass_level5/
    │   │   │   ├── attribute_unpooling.cpp
    │   │   │   ├── attribute_unpooling.h
    │   │   │   ├── eliminate_dropout.cpp
    │   │   │   ├── eliminate_dropout.h
    │   │   │   ├── eliminate_identity_operator.cpp
    │   │   │   ├── eliminate_identity_operator.h
    │   │   │   ├── eliminate_maxpool_indices.cpp
    │   │   │   ├── eliminate_maxpool_indices.h
    │   │   │   ├── eliminate_noop_cat.cpp
    │   │   │   ├── eliminate_noop_cat.h
    │   │   │   ├── eliminate_noop_einsum.cpp
    │   │   │   ├── eliminate_noop_einsum.h
    │   │   │   ├── eliminate_noop_expand.cpp
    │   │   │   ├── eliminate_noop_expand.h
    │   │   │   ├── eliminate_noop_expression.cpp
    │   │   │   ├── eliminate_noop_expression.h
    │   │   │   ├── eliminate_noop_pad.cpp
    │   │   │   ├── eliminate_noop_pad.h
    │   │   │   ├── eliminate_noop_permute.cpp
    │   │   │   ├── eliminate_noop_permute.h
    │   │   │   ├── eliminate_noop_reshape.cpp
    │   │   │   ├── eliminate_noop_reshape.h
    │   │   │   ├── eliminate_noop_slice.cpp
    │   │   │   ├── eliminate_noop_slice.h
    │   │   │   ├── eliminate_noop_upsample.cpp
    │   │   │   ├── eliminate_noop_upsample.h
    │   │   │   ├── eliminate_reshape_shape_expression.cpp
    │   │   │   ├── eliminate_reshape_shape_expression.h
    │   │   │   ├── eliminate_type_as.cpp
    │   │   │   ├── eliminate_type_as.h
    │   │   │   ├── eval_expression.cpp
    │   │   │   ├── eval_expression.h
    │   │   │   ├── fold_constants.cpp
    │   │   │   ├── fold_constants.h
    │   │   │   ├── fuse_adjacent_permute.cpp
    │   │   │   ├── fuse_adjacent_permute.h
    │   │   │   ├── fuse_adjacent_reshape.cpp
    │   │   │   ├── fuse_adjacent_reshape.h
    │   │   │   ├── fuse_channel_shuffle.cpp
    │   │   │   ├── fuse_channel_shuffle.h
    │   │   │   ├── fuse_constant_expression.cpp
    │   │   │   ├── fuse_constant_expression.h
    │   │   │   ├── fuse_conv1d_batchnorm1d.cpp
    │   │   │   ├── fuse_conv1d_batchnorm1d.h
    │   │   │   ├── fuse_conv2d_batchnorm2d.cpp
    │   │   │   ├── fuse_conv2d_batchnorm2d.h
    │   │   │   ├── fuse_conv3d_batchnorm3d.cpp
    │   │   │   ├── fuse_conv3d_batchnorm3d.h
    │   │   │   ├── fuse_convtranspose1d_batchnorm1d.cpp
    │   │   │   ├── fuse_convtranspose1d_batchnorm1d.h
    │   │   │   ├── fuse_convtranspose2d_batchnorm2d.cpp
    │   │   │   ├── fuse_convtranspose2d_batchnorm2d.h
    │   │   │   ├── fuse_convtranspose3d_batchnorm3d.cpp
    │   │   │   ├── fuse_convtranspose3d_batchnorm3d.h
    │   │   │   ├── fuse_layernorm.cpp
    │   │   │   ├── fuse_layernorm.h
    │   │   │   ├── fuse_linear_batchnorm1d.cpp
    │   │   │   ├── fuse_linear_batchnorm1d.h
    │   │   │   ├── fuse_multiheadattention.cpp
    │   │   │   ├── fuse_multiheadattention.h
    │   │   │   ├── fuse_multiheadattention_sameqkv.cpp
    │   │   │   ├── fuse_multiheadattention_sameqkv.h
    │   │   │   ├── fuse_pad_conv1d.cpp
    │   │   │   ├── fuse_pad_conv1d.h
    │   │   │   ├── fuse_pad_conv2d.cpp
    │   │   │   ├── fuse_pad_conv2d.h
    │   │   │   ├── fuse_pixel_shuffle.cpp
    │   │   │   ├── fuse_pixel_shuffle.h
    │   │   │   ├── fuse_pixel_unshuffle.cpp
    │   │   │   ├── fuse_pixel_unshuffle.h
    │   │   │   ├── fuse_rmsnorm.cpp
    │   │   │   ├── fuse_rmsnorm.h
    │   │   │   ├── fuse_scaled_dot_product_attention.cpp
    │   │   │   ├── fuse_scaled_dot_product_attention.h
    │   │   │   ├── fuse_select_to_unbind.cpp
    │   │   │   ├── fuse_select_to_unbind.h
    │   │   │   ├── fuse_silu.cpp
    │   │   │   ├── fuse_silu.h
    │   │   │   ├── fuse_slice_copy.cpp
    │   │   │   ├── fuse_slice_copy.h
    │   │   │   ├── fuse_slice_indices.cpp
    │   │   │   ├── fuse_slice_indices.h
    │   │   │   ├── fuse_slice_squeeze_to_select.cpp
    │   │   │   ├── fuse_slice_squeeze_to_select.h
    │   │   │   ├── fuse_slice_to_tensor_split.cpp
    │   │   │   ├── fuse_slice_to_tensor_split.h
    │   │   │   ├── fuse_static_batchnorm.cpp
    │   │   │   ├── fuse_static_batchnorm.h
    │   │   │   ├── fuse_static_conv.cpp
    │   │   │   ├── fuse_static_conv.h
    │   │   │   ├── fuse_static_convtranspose.cpp
    │   │   │   ├── fuse_static_convtranspose.h
    │   │   │   ├── fuse_static_embedding.cpp
    │   │   │   ├── fuse_static_embedding.h
    │   │   │   ├── fuse_static_groupnorm.cpp
    │   │   │   ├── fuse_static_groupnorm.h
    │   │   │   ├── fuse_static_instancenorm.cpp
    │   │   │   ├── fuse_static_instancenorm.h
    │   │   │   ├── fuse_static_layernorm.cpp
    │   │   │   ├── fuse_static_layernorm.h
    │   │   │   ├── fuse_static_linear.cpp
    │   │   │   ├── fuse_static_linear.h
    │   │   │   ├── fuse_static_prelu.cpp
    │   │   │   ├── fuse_static_prelu.h
    │   │   │   ├── fuse_static_rmsnorm.cpp
    │   │   │   ├── fuse_static_rmsnorm.h
    │   │   │   ├── fuse_transformers_multiheadattention.cpp
    │   │   │   ├── fuse_transformers_multiheadattention.h
    │   │   │   ├── fuse_transformers_scaled_dot_product_attention.cpp
    │   │   │   ├── fuse_transformers_scaled_dot_product_attention.h
    │   │   │   ├── normalize_einsum_equation.cpp
    │   │   │   ├── normalize_einsum_equation.h
    │   │   │   ├── unroll_rnn_op.cpp
    │   │   │   └── unroll_rnn_op.h
    │   │   ├── pass_level5.cpp
    │   │   ├── pass_level5.h
    │   │   ├── pass_ncnn/
    │   │   │   ├── F_adaptive_avg_pool1d.cpp
    │   │   │   ├── F_adaptive_avg_pool2d.cpp
    │   │   │   ├── F_adaptive_avg_pool3d.cpp
    │   │   │   ├── F_adaptive_max_pool1d.cpp
    │   │   │   ├── F_adaptive_max_pool2d.cpp
    │   │   │   ├── F_adaptive_max_pool3d.cpp
    │   │   │   ├── F_avg_pool1d.cpp
    │   │   │   ├── F_avg_pool2d.cpp
    │   │   │   ├── F_avg_pool3d.cpp
    │   │   │   ├── F_batch_norm.cpp
    │   │   │   ├── F_celu.cpp
    │   │   │   ├── F_conv1d.cpp
    │   │   │   ├── F_conv2d.cpp
    │   │   │   ├── F_conv3d.cpp
    │   │   │   ├── F_conv_transpose1d.cpp
    │   │   │   ├── F_conv_transpose2d.cpp
    │   │   │   ├── F_conv_transpose3d.cpp
    │   │   │   ├── F_elu.cpp
    │   │   │   ├── F_embedding.cpp
    │   │   │   ├── F_fold.cpp
    │   │   │   ├── F_gelu.cpp
    │   │   │   ├── F_glu.cpp
    │   │   │   ├── F_grid_sample.cpp
    │   │   │   ├── F_group_norm.cpp
    │   │   │   ├── F_hardshrink.cpp
    │   │   │   ├── F_hardsigmoid.cpp
    │   │   │   ├── F_hardswish.cpp
    │   │   │   ├── F_hardtanh.cpp
    │   │   │   ├── F_instance_norm.cpp
    │   │   │   ├── F_interpolate.cpp
    │   │   │   ├── F_layer_norm.cpp
    │   │   │   ├── F_leaky_relu.cpp
    │   │   │   ├── F_linear.cpp
    │   │   │   ├── F_local_response_norm.cpp
    │   │   │   ├── F_log_softmax.cpp
    │   │   │   ├── F_logsigmoid.cpp
    │   │   │   ├── F_max_pool1d.cpp
    │   │   │   ├── F_max_pool2d.cpp
    │   │   │   ├── F_max_pool3d.cpp
    │   │   │   ├── F_mish.cpp
    │   │   │   ├── F_normalize.cpp
    │   │   │   ├── F_pad.cpp
    │   │   │   ├── F_pixel_shuffle.cpp
    │   │   │   ├── F_pixel_unshuffle.cpp
    │   │   │   ├── F_prelu.cpp
    │   │   │   ├── F_relu.cpp
    │   │   │   ├── F_relu6.cpp
    │   │   │   ├── F_rms_norm.cpp
    │   │   │   ├── F_scaled_dot_product_attention.cpp
    │   │   │   ├── F_selu.cpp
    │   │   │   ├── F_sigmoid.cpp
    │   │   │   ├── F_silu.cpp
    │   │   │   ├── F_softmax.cpp
    │   │   │   ├── F_softplus.cpp
    │   │   │   ├── F_softshrink.cpp
    │   │   │   ├── F_tanh.cpp
    │   │   │   ├── F_unfold.cpp
    │   │   │   ├── F_upsample.cpp
    │   │   │   ├── F_upsample_bilinear.cpp
    │   │   │   ├── F_upsample_nearest.cpp
    │   │   │   ├── Tensor_expand.cpp
    │   │   │   ├── Tensor_permute.cpp
    │   │   │   ├── Tensor_repeat.cpp
    │   │   │   ├── Tensor_reshape.cpp
    │   │   │   ├── Tensor_reshape_as.cpp
    │   │   │   ├── Tensor_unflatten.cpp
    │   │   │   ├── chain_multi_output.cpp
    │   │   │   ├── chain_multi_output.h
    │   │   │   ├── convert_Tensor_select.cpp
    │   │   │   ├── convert_Tensor_select.h
    │   │   │   ├── convert_Tensor_slice.cpp
    │   │   │   ├── convert_Tensor_slice.h
    │   │   │   ├── convert_Tensor_slice_copy.cpp
    │   │   │   ├── convert_Tensor_slice_copy.h
    │   │   │   ├── convert_attribute.cpp
    │   │   │   ├── convert_attribute.h
    │   │   │   ├── convert_custom_op.cpp
    │   │   │   ├── convert_custom_op.h
    │   │   │   ├── convert_half_to_float.cpp
    │   │   │   ├── convert_half_to_float.h
    │   │   │   ├── convert_input.cpp
    │   │   │   ├── convert_input.h
    │   │   │   ├── convert_module_op.cpp
    │   │   │   ├── convert_module_op.h
    │   │   │   ├── convert_reshape_interp_expression.cpp
    │   │   │   ├── convert_reshape_interp_expression.h
    │   │   │   ├── convert_slice_expression.cpp
    │   │   │   ├── convert_slice_expression.h
    │   │   │   ├── convert_torch_cat.cpp
    │   │   │   ├── convert_torch_cat.h
    │   │   │   ├── convert_torch_chunk.cpp
    │   │   │   ├── convert_torch_chunk.h
    │   │   │   ├── convert_torch_einsum.cpp
    │   │   │   ├── convert_torch_einsum.h
    │   │   │   ├── convert_torch_split.cpp
    │   │   │   ├── convert_torch_split.h
    │   │   │   ├── convert_torch_stack.cpp
    │   │   │   ├── convert_torch_stack.h
    │   │   │   ├── convert_torch_tensor_split.cpp
    │   │   │   ├── convert_torch_tensor_split.h
    │   │   │   ├── convert_torch_unbind.cpp
    │   │   │   ├── convert_torch_unbind.h
    │   │   │   ├── eliminate_noop.cpp
    │   │   │   ├── eliminate_noop.h
    │   │   │   ├── eliminate_output.cpp
    │   │   │   ├── eliminate_output.h
    │   │   │   ├── expand_expression.cpp
    │   │   │   ├── expand_expression.h
    │   │   │   ├── fuse_binaryop_eltwise.cpp
    │   │   │   ├── fuse_binaryop_eltwise.h
    │   │   │   ├── fuse_convert_rotaryembed.cpp
    │   │   │   ├── fuse_convert_rotaryembed.h
    │   │   │   ├── fuse_convert_shufflechannel_slice.cpp
    │   │   │   ├── fuse_convert_shufflechannel_slice.h
    │   │   │   ├── fuse_convolution1d_activation.cpp
    │   │   │   ├── fuse_convolution1d_activation.h
    │   │   │   ├── fuse_convolution_activation.cpp
    │   │   │   ├── fuse_convolution_activation.h
    │   │   │   ├── fuse_convolutiondepthwise1d_activation.cpp
    │   │   │   ├── fuse_convolutiondepthwise1d_activation.h
    │   │   │   ├── fuse_convolutiondepthwise_activation.cpp
    │   │   │   ├── fuse_convolutiondepthwise_activation.h
    │   │   │   ├── fuse_deconvolution_activation.cpp
    │   │   │   ├── fuse_deconvolution_activation.h
    │   │   │   ├── fuse_deconvolutiondepthwise_activation.cpp
    │   │   │   ├── fuse_deconvolutiondepthwise_activation.h
    │   │   │   ├── fuse_innerproduct_activation.cpp
    │   │   │   ├── fuse_innerproduct_activation.h
    │   │   │   ├── fuse_padding_convolution.cpp
    │   │   │   ├── fuse_padding_convolution.h
    │   │   │   ├── fuse_padding_convolutiondepthwise.cpp
    │   │   │   ├── fuse_padding_convolutiondepthwise.h
    │   │   │   ├── fuse_transpose_matmul.cpp
    │   │   │   ├── fuse_transpose_matmul.h
    │   │   │   ├── insert_reshape_global_pooling.cpp
    │   │   │   ├── insert_reshape_global_pooling.h
    │   │   │   ├── insert_reshape_linear.cpp
    │   │   │   ├── insert_reshape_linear.h
    │   │   │   ├── insert_reshape_numpy_binaryop_broadcast.cpp
    │   │   │   ├── insert_reshape_numpy_binaryop_broadcast.h
    │   │   │   ├── insert_reshape_pooling.cpp
    │   │   │   ├── insert_reshape_pooling.h
    │   │   │   ├── insert_split.cpp
    │   │   │   ├── insert_split.h
    │   │   │   ├── nn_AdaptiveAvgPool1d.cpp
    │   │   │   ├── nn_AdaptiveAvgPool2d.cpp
    │   │   │   ├── nn_AdaptiveAvgPool3d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool1d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool2d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool3d.cpp
    │   │   │   ├── nn_AvgPool1d.cpp
    │   │   │   ├── nn_AvgPool2d.cpp
    │   │   │   ├── nn_AvgPool3d.cpp
    │   │   │   ├── nn_BatchNorm1d.cpp
    │   │   │   ├── nn_BatchNorm2d.cpp
    │   │   │   ├── nn_BatchNorm3d.cpp
    │   │   │   ├── nn_CELU.cpp
    │   │   │   ├── nn_ChannelShuffle.cpp
    │   │   │   ├── nn_ConstantPad1d.cpp
    │   │   │   ├── nn_ConstantPad2d.cpp
    │   │   │   ├── nn_ConstantPad3d.cpp
    │   │   │   ├── nn_Conv1d.cpp
    │   │   │   ├── nn_Conv2d.cpp
    │   │   │   ├── nn_Conv3d.cpp
    │   │   │   ├── nn_ConvTranspose1d.cpp
    │   │   │   ├── nn_ConvTranspose2d.cpp
    │   │   │   ├── nn_ConvTranspose3d.cpp
    │   │   │   ├── nn_ELU.cpp
    │   │   │   ├── nn_Embedding.cpp
    │   │   │   ├── nn_Fold.cpp
    │   │   │   ├── nn_GELU.cpp
    │   │   │   ├── nn_GLU.cpp
    │   │   │   ├── nn_GRU.cpp
    │   │   │   ├── nn_GroupNorm.cpp
    │   │   │   ├── nn_Hardshrink.cpp
    │   │   │   ├── nn_Hardsigmoid.cpp
    │   │   │   ├── nn_Hardswish.cpp
    │   │   │   ├── nn_Hardtanh.cpp
    │   │   │   ├── nn_InstanceNorm2d.cpp
    │   │   │   ├── nn_LSTM.cpp
    │   │   │   ├── nn_LayerNorm.cpp
    │   │   │   ├── nn_LeakyReLU.cpp
    │   │   │   ├── nn_Linear.cpp
    │   │   │   ├── nn_LocalResponseNorm.cpp
    │   │   │   ├── nn_LogSigmoid.cpp
    │   │   │   ├── nn_LogSoftmax.cpp
    │   │   │   ├── nn_MaxPool1d.cpp
    │   │   │   ├── nn_MaxPool2d.cpp
    │   │   │   ├── nn_MaxPool3d.cpp
    │   │   │   ├── nn_Mish.cpp
    │   │   │   ├── nn_MultiheadAttention.cpp
    │   │   │   ├── nn_PReLU.cpp
    │   │   │   ├── nn_PixelShuffle.cpp
    │   │   │   ├── nn_PixelUnshuffle.cpp
    │   │   │   ├── nn_RMSNorm.cpp
    │   │   │   ├── nn_RNN.cpp
    │   │   │   ├── nn_ReLU.cpp
    │   │   │   ├── nn_ReLU6.cpp
    │   │   │   ├── nn_ReflectionPad1d.cpp
    │   │   │   ├── nn_ReflectionPad2d.cpp
    │   │   │   ├── nn_ReplicationPad1d.cpp
    │   │   │   ├── nn_ReplicationPad2d.cpp
    │   │   │   ├── nn_ReplicationPad3d.cpp
    │   │   │   ├── nn_SELU.cpp
    │   │   │   ├── nn_SiLU.cpp
    │   │   │   ├── nn_Sigmoid.cpp
    │   │   │   ├── nn_Softmax.cpp
    │   │   │   ├── nn_Softmax2d.cpp
    │   │   │   ├── nn_Softplus.cpp
    │   │   │   ├── nn_Softshrink.cpp
    │   │   │   ├── nn_Tanh.cpp
    │   │   │   ├── nn_Unfold.cpp
    │   │   │   ├── nn_Upsample.cpp
    │   │   │   ├── nn_UpsamplingBilinear2d.cpp
    │   │   │   ├── nn_UpsamplingNearest2d.cpp
    │   │   │   ├── nn_ZeroPad2d.cpp
    │   │   │   ├── solve_batch_index.cpp
    │   │   │   ├── solve_batch_index.h
    │   │   │   ├── torch_addmm.cpp
    │   │   │   ├── torch_amax.cpp
    │   │   │   ├── torch_amin.cpp
    │   │   │   ├── torch_bmm.cpp
    │   │   │   ├── torch_clamp.cpp
    │   │   │   ├── torch_clone.cpp
    │   │   │   ├── torch_cumsum.cpp
    │   │   │   ├── torch_diag.cpp
    │   │   │   ├── torch_flatten.cpp
    │   │   │   ├── torch_flip.cpp
    │   │   │   ├── torch_istft.cpp
    │   │   │   ├── torch_logsumexp.cpp
    │   │   │   ├── torch_matmul.cpp
    │   │   │   ├── torch_max.cpp
    │   │   │   ├── torch_mean.cpp
    │   │   │   ├── torch_min.cpp
    │   │   │   ├── torch_mm.cpp
    │   │   │   ├── torch_norm.cpp
    │   │   │   ├── torch_prod.cpp
    │   │   │   ├── torch_roll.cpp
    │   │   │   ├── torch_slice_scatter.cpp
    │   │   │   ├── torch_squeeze.cpp
    │   │   │   ├── torch_stft.cpp
    │   │   │   ├── torch_sum.cpp
    │   │   │   ├── torch_t.cpp
    │   │   │   ├── torch_transpose.cpp
    │   │   │   ├── torch_unsqueeze.cpp
    │   │   │   ├── torchaudio_F_inverse_spectrogram.cpp
    │   │   │   ├── torchaudio_F_spectrogram.cpp
    │   │   │   └── torchvision_DeformConv2d.cpp
    │   │   ├── pass_ncnn.cpp
    │   │   ├── pass_ncnn.h
    │   │   ├── pass_onnx/
    │   │   │   ├── canonicalize.cpp
    │   │   │   ├── canonicalize.h
    │   │   │   ├── dead_code_elimination.cpp
    │   │   │   ├── dead_code_elimination.h
    │   │   │   ├── eliminate_initializer_input.cpp
    │   │   │   ├── eliminate_initializer_input.h
    │   │   │   ├── eliminate_noop.cpp
    │   │   │   ├── eliminate_noop.h
    │   │   │   ├── fold_constants.cpp
    │   │   │   ├── fold_constants.h
    │   │   │   ├── fuse_constant_as_attribute.cpp
    │   │   │   ├── fuse_constant_as_attribute.h
    │   │   │   ├── inline_containers.cpp
    │   │   │   ├── inline_containers.h
    │   │   │   ├── inline_if_graph.cpp
    │   │   │   ├── inline_if_graph.h
    │   │   │   ├── model_stat.cpp
    │   │   │   ├── model_stat.h
    │   │   │   ├── shape_inference.cpp
    │   │   │   └── shape_inference.h
    │   │   ├── pass_onnx.cpp
    │   │   ├── pass_onnx.h
    │   │   ├── pass_tnn/
    │   │   │   ├── fuse_shape_list_construct.cpp
    │   │   │   ├── fuse_shape_list_construct.h
    │   │   │   ├── fuse_shape_size.cpp
    │   │   │   ├── fuse_shape_size.h
    │   │   │   ├── lower_concat.cpp
    │   │   │   ├── lower_concat.h
    │   │   │   ├── lower_convolution_activation.cpp
    │   │   │   ├── lower_convolution_activation.h
    │   │   │   ├── lower_power.cpp
    │   │   │   └── lower_power.h
    │   │   ├── save_ncnn.cpp
    │   │   ├── save_ncnn.h
    │   │   ├── save_onnx.cpp
    │   │   ├── save_onnx.h
    │   │   ├── storezip.cpp
    │   │   ├── storezip.h
    │   │   ├── utils.cpp
    │   │   └── utils.h
    │   └── tests/
    │       ├── CMakeLists.txt
    │       ├── ncnn/
    │       │   ├── CMakeLists.txt
    │       │   ├── test_F_adaptive_avg_pool1d.py
    │       │   ├── test_F_adaptive_avg_pool2d.py
    │       │   ├── test_F_adaptive_avg_pool3d.py
    │       │   ├── test_F_adaptive_max_pool1d.py
    │       │   ├── test_F_adaptive_max_pool2d.py
    │       │   ├── test_F_adaptive_max_pool3d.py
    │       │   ├── test_F_alpha_dropout.py
    │       │   ├── test_F_avg_pool1d.py
    │       │   ├── test_F_avg_pool2d.py
    │       │   ├── test_F_avg_pool3d.py
    │       │   ├── test_F_batch_norm.py
    │       │   ├── test_F_celu.py
    │       │   ├── test_F_conv1d.py
    │       │   ├── test_F_conv2d.py
    │       │   ├── test_F_conv3d.py
    │       │   ├── test_F_conv_transpose1d.py
    │       │   ├── test_F_conv_transpose2d.py
    │       │   ├── test_F_conv_transpose3d.py
    │       │   ├── test_F_dropout.py
    │       │   ├── test_F_dropout2d.py
    │       │   ├── test_F_dropout3d.py
    │       │   ├── test_F_elu.py
    │       │   ├── test_F_embedding.py
    │       │   ├── test_F_feature_alpha_dropout.py
    │       │   ├── test_F_fold.py
    │       │   ├── test_F_gelu.py
    │       │   ├── test_F_glu.py
    │       │   ├── test_F_grid_sample.py
    │       │   ├── test_F_group_norm.py
    │       │   ├── test_F_hardshrink.py
    │       │   ├── test_F_hardsigmoid.py
    │       │   ├── test_F_hardswish.py
    │       │   ├── test_F_hardtanh.py
    │       │   ├── test_F_interpolate.py
    │       │   ├── test_F_layer_norm.py
    │       │   ├── test_F_leaky_relu.py
    │       │   ├── test_F_local_response_norm.py
    │       │   ├── test_F_log_softmax.py
    │       │   ├── test_F_logsigmoid.py
    │       │   ├── test_F_max_pool1d.py
    │       │   ├── test_F_max_pool2d.py
    │       │   ├── test_F_max_pool3d.py
    │       │   ├── test_F_mish.py
    │       │   ├── test_F_normalize.py
    │       │   ├── test_F_pad.py
    │       │   ├── test_F_pixel_shuffle.py
    │       │   ├── test_F_pixel_unshuffle.py
    │       │   ├── test_F_prelu.py
    │       │   ├── test_F_relu.py
    │       │   ├── test_F_relu6.py
    │       │   ├── test_F_rms_norm.py
    │       │   ├── test_F_scaled_dot_product_attention.py
    │       │   ├── test_F_selu.py
    │       │   ├── test_F_sigmoid.py
    │       │   ├── test_F_silu.py
    │       │   ├── test_F_softmax.py
    │       │   ├── test_F_softshrink.py
    │       │   ├── test_F_tanh.py
    │       │   ├── test_F_unfold.py
    │       │   ├── test_F_upsample.py
    │       │   ├── test_F_upsample_bilinear.py
    │       │   ├── test_F_upsample_nearest.py
    │       │   ├── test_Tensor_expand.py
    │       │   ├── test_Tensor_permute.py
    │       │   ├── test_Tensor_repeat.py
    │       │   ├── test_Tensor_reshape.py
    │       │   ├── test_Tensor_reshape_as.py
    │       │   ├── test_Tensor_slice.py
    │       │   ├── test_Tensor_slice_copy.py
    │       │   ├── test_Tensor_unflatten.py
    │       │   ├── test_Tensor_view.py
    │       │   ├── test_convnext_tiny.py
    │       │   ├── test_mobilenet_v2.py
    │       │   ├── test_mobilenet_v3_small.py
    │       │   ├── test_ncnn_fuse_binaryop_eltwise.py
    │       │   ├── test_ncnn_fuse_pad_conv.py
    │       │   ├── test_ncnn_fuse_shufflechannel_slice.py
    │       │   ├── test_ncnn_fuse_transpose_matmul.py
    │       │   ├── test_ncnn_interp_expr.py
    │       │   ├── test_ncnn_numpy_binaryop_broadcast.py
    │       │   ├── test_ncnn_reshape_expr.py
    │       │   ├── test_ncnn_slice_expr.py
    │       │   ├── test_ncnn_solve_batch_index.py
    │       │   ├── test_nn_AdaptiveAvgPool1d.py
    │       │   ├── test_nn_AdaptiveAvgPool2d.py
    │       │   ├── test_nn_AdaptiveAvgPool3d.py
    │       │   ├── test_nn_AdaptiveMaxPool1d.py
    │       │   ├── test_nn_AdaptiveMaxPool2d.py
    │       │   ├── test_nn_AdaptiveMaxPool3d.py
    │       │   ├── test_nn_AlphaDropout.py
    │       │   ├── test_nn_AvgPool1d.py
    │       │   ├── test_nn_AvgPool2d.py
    │       │   ├── test_nn_AvgPool3d.py
    │       │   ├── test_nn_BatchNorm1d.py
    │       │   ├── test_nn_BatchNorm2d.py
    │       │   ├── test_nn_BatchNorm3d.py
    │       │   ├── test_nn_CELU.py
    │       │   ├── test_nn_ChannelShuffle.py
    │       │   ├── test_nn_ConstantPad1d.py
    │       │   ├── test_nn_ConstantPad2d.py
    │       │   ├── test_nn_ConstantPad3d.py
    │       │   ├── test_nn_Conv1d.py
    │       │   ├── test_nn_Conv2d.py
    │       │   ├── test_nn_Conv3d.py
    │       │   ├── test_nn_ConvTranspose1d.py
    │       │   ├── test_nn_ConvTranspose2d.py
    │       │   ├── test_nn_ConvTranspose3d.py
    │       │   ├── test_nn_Dropout.py
    │       │   ├── test_nn_Dropout2d.py
    │       │   ├── test_nn_Dropout3d.py
    │       │   ├── test_nn_ELU.py
    │       │   ├── test_nn_Embedding.py
    │       │   ├── test_nn_Fold.py
    │       │   ├── test_nn_GELU.py
    │       │   ├── test_nn_GLU.py
    │       │   ├── test_nn_GRU.py
    │       │   ├── test_nn_GroupNorm.py
    │       │   ├── test_nn_Hardshrink.py
    │       │   ├── test_nn_Hardsigmoid.py
    │       │   ├── test_nn_Hardswish.py
    │       │   ├── test_nn_Hardtanh.py
    │       │   ├── test_nn_Identity.py
    │       │   ├── test_nn_InstanceNorm2d.py
    │       │   ├── test_nn_LSTM.py
    │       │   ├── test_nn_LayerNorm.py
    │       │   ├── test_nn_LeakyReLU.py
    │       │   ├── test_nn_Linear.py
    │       │   ├── test_nn_LocalResponseNorm.py
    │       │   ├── test_nn_LogSigmoid.py
    │       │   ├── test_nn_LogSoftmax.py
    │       │   ├── test_nn_MaxPool1d.py
    │       │   ├── test_nn_MaxPool2d.py
    │       │   ├── test_nn_MaxPool3d.py
    │       │   ├── test_nn_Mish.py
    │       │   ├── test_nn_MultiheadAttention.py
    │       │   ├── test_nn_PReLU.py
    │       │   ├── test_nn_PixelShuffle.py
    │       │   ├── test_nn_PixelUnshuffle.py
    │       │   ├── test_nn_RMSNorm.py
    │       │   ├── test_nn_RNN.py
    │       │   ├── test_nn_ReLU.py
    │       │   ├── test_nn_ReLU6.py
    │       │   ├── test_nn_ReflectionPad1d.py
    │       │   ├── test_nn_ReflectionPad2d.py
    │       │   ├── test_nn_ReplicationPad1d.py
    │       │   ├── test_nn_ReplicationPad2d.py
    │       │   ├── test_nn_ReplicationPad3d.py
    │       │   ├── test_nn_SELU.py
    │       │   ├── test_nn_SiLU.py
    │       │   ├── test_nn_Sigmoid.py
    │       │   ├── test_nn_Softmax.py
    │       │   ├── test_nn_Softmax2d.py
    │       │   ├── test_nn_Softshrink.py
    │       │   ├── test_nn_Tanh.py
    │       │   ├── test_nn_Unfold.py
    │       │   ├── test_nn_Upsample.py
    │       │   ├── test_nn_UpsamplingBilinear2d.py
    │       │   ├── test_nn_UpsamplingNearest2d.py
    │       │   ├── test_nn_ZeroPad2d.py
    │       │   ├── test_resnet18.py
    │       │   ├── test_shufflenet_v2_x1_0.py
    │       │   ├── test_squeezenet1_1.py
    │       │   ├── test_torch_abs.py
    │       │   ├── test_torch_acos.py
    │       │   ├── test_torch_addmm.py
    │       │   ├── test_torch_amax.py
    │       │   ├── test_torch_amin.py
    │       │   ├── test_torch_asin.py
    │       │   ├── test_torch_atan.py
    │       │   ├── test_torch_atan2.py
    │       │   ├── test_torch_bmm.py
    │       │   ├── test_torch_cat.py
    │       │   ├── test_torch_ceil.py
    │       │   ├── test_torch_chunk.py
    │       │   ├── test_torch_clamp.py
    │       │   ├── test_torch_clone.py
    │       │   ├── test_torch_cos.py
    │       │   ├── test_torch_cumsum.py
    │       │   ├── test_torch_diag.py
    │       │   ├── test_torch_einsum.py
    │       │   ├── test_torch_exp.py
    │       │   ├── test_torch_flatten.py
    │       │   ├── test_torch_flip.py
    │       │   ├── test_torch_floor.py
    │       │   ├── test_torch_istft.py
    │       │   ├── test_torch_log.py
    │       │   ├── test_torch_log10.py
    │       │   ├── test_torch_logsumexp.py
    │       │   ├── test_torch_matmul.py
    │       │   ├── test_torch_max.py
    │       │   ├── test_torch_maximum.py
    │       │   ├── test_torch_mean.py
    │       │   ├── test_torch_min.py
    │       │   ├── test_torch_minimum.py
    │       │   ├── test_torch_mm.py
    │       │   ├── test_torch_neg.py
    │       │   ├── test_torch_norm.py
    │       │   ├── test_torch_pow.py
    │       │   ├── test_torch_prod.py
    │       │   ├── test_torch_reciprocal.py
    │       │   ├── test_torch_roll.py
    │       │   ├── test_torch_round.py
    │       │   ├── test_torch_rsqrt.py
    │       │   ├── test_torch_sin.py
    │       │   ├── test_torch_slice_scatter.py
    │       │   ├── test_torch_sqrt.py
    │       │   ├── test_torch_square.py
    │       │   ├── test_torch_squeeze.py
    │       │   ├── test_torch_stack.py
    │       │   ├── test_torch_stft.py
    │       │   ├── test_torch_sum.py
    │       │   ├── test_torch_t.py
    │       │   ├── test_torch_tan.py
    │       │   ├── test_torch_tanh.py
    │       │   ├── test_torch_tensor_split.py
    │       │   ├── test_torch_transpose.py
    │       │   ├── test_torch_trunc.py
    │       │   ├── test_torch_unbind.py
    │       │   ├── test_torch_unsqueeze.py
    │       │   ├── test_torchaudio_F_inverse_spectrogram.py
    │       │   ├── test_torchaudio_F_spectrogram.py
    │       │   ├── test_torchaudio_InverseSpectrogram.py
    │       │   ├── test_torchaudio_Spectrogram.py
    │       │   ├── test_torchvision_DeformConv2d.py
    │       │   ├── test_transformers_deepseek_v3_attention.py
    │       │   ├── test_transformers_qwen2_attention.py
    │       │   ├── test_transformers_qwen3_attention.py
    │       │   └── test_vit_b_32.py
    │       ├── onnx/
    │       │   ├── CMakeLists.txt
    │       │   ├── test_F_adaptive_avg_pool1d.py
    │       │   ├── test_F_adaptive_avg_pool2d.py
    │       │   ├── test_F_adaptive_avg_pool3d.py
    │       │   ├── test_F_adaptive_max_pool1d.py
    │       │   ├── test_F_adaptive_max_pool2d.py
    │       │   ├── test_F_adaptive_max_pool3d.py
    │       │   ├── test_F_avg_pool1d.py
    │       │   ├── test_F_avg_pool2d.py
    │       │   ├── test_F_avg_pool3d.py
    │       │   ├── test_F_batch_norm.py
    │       │   ├── test_F_celu.py
    │       │   ├── test_F_conv1d.py
    │       │   ├── test_F_conv2d.py
    │       │   ├── test_F_conv3d.py
    │       │   ├── test_F_conv_transpose1d.py
    │       │   ├── test_F_conv_transpose2d.py
    │       │   ├── test_F_conv_transpose3d.py
    │       │   ├── test_F_elu.py
    │       │   ├── test_F_gelu.py
    │       │   ├── test_F_group_norm.py
    │       │   ├── test_F_hardshrink.py
    │       │   ├── test_F_hardsigmoid.py
    │       │   ├── test_F_hardswish.py
    │       │   ├── test_F_hardtanh.py
    │       │   ├── test_F_interpolate.py
    │       │   ├── test_F_layer_norm.py
    │       │   ├── test_F_leaky_relu.py
    │       │   ├── test_F_linear.py
    │       │   ├── test_F_local_response_norm.py
    │       │   ├── test_F_log_softmax.py
    │       │   ├── test_F_logsigmoid.py
    │       │   ├── test_F_max_pool1d.py
    │       │   ├── test_F_max_pool2d.py
    │       │   ├── test_F_max_pool3d.py
    │       │   ├── test_F_mish.py
    │       │   ├── test_F_normalize.py
    │       │   ├── test_F_pad.py
    │       │   ├── test_F_pixel_shuffle.py
    │       │   ├── test_F_pixel_unshuffle.py
    │       │   ├── test_F_prelu.py
    │       │   ├── test_F_relu.py
    │       │   ├── test_F_relu6.py
    │       │   ├── test_F_scaled_dot_product_attention.py
    │       │   ├── test_F_selu.py
    │       │   ├── test_F_sigmoid.py
    │       │   ├── test_F_silu.py
    │       │   ├── test_F_softmax.py
    │       │   ├── test_F_softmin.py
    │       │   ├── test_F_softplus.py
    │       │   ├── test_F_softshrink.py
    │       │   ├── test_F_softsign.py
    │       │   ├── test_F_tanh.py
    │       │   ├── test_F_tanhshrink.py
    │       │   ├── test_F_upsample.py
    │       │   ├── test_F_upsample_bilinear.py
    │       │   ├── test_F_upsample_nearest.py
    │       │   ├── test_Tensor_expand.py
    │       │   ├── test_Tensor_permute.py
    │       │   ├── test_Tensor_repeat.py
    │       │   ├── test_Tensor_reshape.py
    │       │   ├── test_Tensor_reshape_as.py
    │       │   ├── test_Tensor_select.py
    │       │   ├── test_Tensor_slice.py
    │       │   ├── test_Tensor_unflatten.py
    │       │   ├── test_Tensor_view.py
    │       │   ├── test_convnext_tiny.py
    │       │   ├── test_mobilenet_v2.py
    │       │   ├── test_mobilenet_v3_small.py
    │       │   ├── test_nn_AdaptiveAvgPool1d.py
    │       │   ├── test_nn_AdaptiveAvgPool2d.py
    │       │   ├── test_nn_AdaptiveAvgPool3d.py
    │       │   ├── test_nn_AdaptiveMaxPool1d.py
    │       │   ├── test_nn_AdaptiveMaxPool2d.py
    │       │   ├── test_nn_AdaptiveMaxPool3d.py
    │       │   ├── test_nn_AvgPool1d.py
    │       │   ├── test_nn_AvgPool2d.py
    │       │   ├── test_nn_AvgPool3d.py
    │       │   ├── test_nn_BatchNorm1d.py
    │       │   ├── test_nn_BatchNorm2d.py
    │       │   ├── test_nn_BatchNorm3d.py
    │       │   ├── test_nn_CELU.py
    │       │   ├── test_nn_ConstantPad1d.py
    │       │   ├── test_nn_ConstantPad2d.py
    │       │   ├── test_nn_ConstantPad3d.py
    │       │   ├── test_nn_Conv1d.py
    │       │   ├── test_nn_Conv2d.py
    │       │   ├── test_nn_Conv3d.py
    │       │   ├── test_nn_ConvTranspose1d.py
    │       │   ├── test_nn_ConvTranspose2d.py
    │       │   ├── test_nn_ConvTranspose3d.py
    │       │   ├── test_nn_ELU.py
    │       │   ├── test_nn_GELU.py
    │       │   ├── test_nn_GRU.py
    │       │   ├── test_nn_GroupNorm.py
    │       │   ├── test_nn_Hardshrink.py
    │       │   ├── test_nn_Hardsigmoid.py
    │       │   ├── test_nn_Hardswish.py
    │       │   ├── test_nn_Hardtanh.py
    │       │   ├── test_nn_InstanceNorm1d.py
    │       │   ├── test_nn_InstanceNorm2d.py
    │       │   ├── test_nn_InstanceNorm3d.py
    │       │   ├── test_nn_LSTM.py
    │       │   ├── test_nn_LayerNorm.py
    │       │   ├── test_nn_LeakyReLU.py
    │       │   ├── test_nn_Linear.py
    │       │   ├── test_nn_LocalResponseNorm.py
    │       │   ├── test_nn_LogSigmoid.py
    │       │   ├── test_nn_LogSoftmax.py
    │       │   ├── test_nn_MaxPool1d.py
    │       │   ├── test_nn_MaxPool2d.py
    │       │   ├── test_nn_MaxPool3d.py
    │       │   ├── test_nn_Mish.py
    │       │   ├── test_nn_MultiheadAttention.py
    │       │   ├── test_nn_PReLU.py
    │       │   ├── test_nn_PixelShuffle.py
    │       │   ├── test_nn_PixelUnshuffle.py
    │       │   ├── test_nn_RNN.py
    │       │   ├── test_nn_ReLU.py
    │       │   ├── test_nn_ReLU6.py
    │       │   ├── test_nn_ReflectionPad1d.py
    │       │   ├── test_nn_ReflectionPad2d.py
    │       │   ├── test_nn_ReplicationPad1d.py
    │       │   ├── test_nn_ReplicationPad2d.py
    │       │   ├── test_nn_ReplicationPad3d.py
    │       │   ├── test_nn_SELU.py
    │       │   ├── test_nn_SiLU.py
    │       │   ├── test_nn_Sigmoid.py
    │       │   ├── test_nn_Softmax.py
    │       │   ├── test_nn_Softmin.py
    │       │   ├── test_nn_Softplus.py
    │       │   ├── test_nn_Softshrink.py
    │       │   ├── test_nn_Softsign.py
    │       │   ├── test_nn_Tanh.py
    │       │   ├── test_nn_Tanhshrink.py
    │       │   ├── test_nn_Upsample.py
    │       │   ├── test_nn_UpsamplingBilinear2d.py
    │       │   ├── test_nn_UpsamplingNearest2d.py
    │       │   ├── test_nn_ZeroPad2d.py
    │       │   ├── test_onnx_activation_ops.py
    │       │   ├── test_onnx_conv_ops.py
    │       │   ├── test_onnx_dense_ops.py
    │       │   ├── test_onnx_fuse_channel_shuffle.py
    │       │   ├── test_onnx_fuse_pixel_shuffle.py
    │       │   ├── test_onnx_fuse_pixel_unshuffle.py
    │       │   ├── test_onnx_layout_ops.py
    │       │   ├── test_onnx_math_ops.py
    │       │   ├── test_onnx_normalize_ops.py
    │       │   ├── test_onnx_opset21_ops.py
    │       │   ├── test_onnx_pool_ops.py
    │       │   ├── test_onnx_reduce_ops.py
    │       │   ├── test_onnx_rnn_ops.py
    │       │   ├── test_resnet18.py
    │       │   ├── test_shufflenet_v2_x1_0.py
    │       │   ├── test_squeezenet1_1.py
    │       │   ├── test_swin_t.py
    │       │   ├── test_torch_cat.py
    │       │   ├── test_torch_ceil.py
    │       │   ├── test_torch_chunk.py
    │       │   ├── test_torch_clamp.py
    │       │   ├── test_torch_flatten.py
    │       │   ├── test_torch_flip.py
    │       │   ├── test_torch_floor.py
    │       │   ├── test_torch_logical_and.py
    │       │   ├── test_torch_logical_not.py
    │       │   ├── test_torch_logical_or.py
    │       │   ├── test_torch_logical_xor.py
    │       │   ├── test_torch_max.py
    │       │   ├── test_torch_maximum.py
    │       │   ├── test_torch_mean.py
    │       │   ├── test_torch_min.py
    │       │   ├── test_torch_minimum.py
    │       │   ├── test_torch_norm.py
    │       │   ├── test_torch_prod.py
    │       │   ├── test_torch_roll.py
    │       │   ├── test_torch_split.py
    │       │   ├── test_torch_squeeze.py
    │       │   ├── test_torch_stack.py
    │       │   ├── test_torch_sum.py
    │       │   ├── test_torch_transpose.py
    │       │   ├── test_torch_unbind.py
    │       │   ├── test_torch_unsqueeze.py
    │       │   ├── test_transformers_albert_attention.py
    │       │   ├── test_transformers_bart_attention.py
    │       │   ├── test_transformers_bert_attention.py
    │       │   ├── test_transformers_bert_generation_attention.py
    │       │   ├── test_transformers_blenderbot_attention.py
    │       │   ├── test_transformers_camembert_attention.py
    │       │   ├── test_transformers_chinese_clip_attention.py
    │       │   ├── test_transformers_clip_attention.py
    │       │   ├── test_transformers_ctrl_attention.py
    │       │   ├── test_transformers_deberta_attention.py
    │       │   ├── test_transformers_distilbert_attention.py
    │       │   ├── test_transformers_electra_attention.py
    │       │   ├── test_transformers_flaubert_attention.py
    │       │   ├── test_transformers_fsmt_attention.py
    │       │   ├── test_transformers_funnel_attention.py
    │       │   ├── test_transformers_gpt2_attention.py
    │       │   ├── test_transformers_layoutlm_attention.py
    │       │   ├── test_transformers_lxmert_attention.py
    │       │   ├── test_transformers_m2m_100_attention.py
    │       │   ├── test_transformers_marian_attention.py
    │       │   ├── test_transformers_mbart_attention.py
    │       │   ├── test_transformers_mobilebert_attention.py
    │       │   ├── test_transformers_mt5_attention.py
    │       │   ├── test_transformers_openai_attention.py
    │       │   ├── test_transformers_pegasus_attention.py
    │       │   ├── test_transformers_prophetnet_attention.py
    │       │   ├── test_transformers_reformer_attention.py
    │       │   ├── test_transformers_roberta_attention.py
    │       │   ├── test_transformers_squeezebert_attention.py
    │       │   ├── test_transformers_t5_attention.py
    │       │   ├── test_transformers_xlm_attention.py
    │       │   ├── test_transformers_xlm_roberta_attention.py
    │       │   └── test_vit_b_32.py
    │       ├── run_test.cmake
    │       ├── test_F_adaptive_avg_pool1d.py
    │       ├── test_F_adaptive_avg_pool2d.py
    │       ├── test_F_adaptive_avg_pool3d.py
    │       ├── test_F_adaptive_max_pool1d.py
    │       ├── test_F_adaptive_max_pool2d.py
    │       ├── test_F_adaptive_max_pool3d.py
    │       ├── test_F_affine_grid.py
    │       ├── test_F_alpha_dropout.py
    │       ├── test_F_avg_pool1d.py
    │       ├── test_F_avg_pool2d.py
    │       ├── test_F_avg_pool3d.py
    │       ├── test_F_batch_norm.py
    │       ├── test_F_celu.py
    │       ├── test_F_conv1d.py
    │       ├── test_F_conv2d.py
    │       ├── test_F_conv3d.py
    │       ├── test_F_conv_transpose1d.py
    │       ├── test_F_conv_transpose2d.py
    │       ├── test_F_conv_transpose3d.py
    │       ├── test_F_dropout.py
    │       ├── test_F_dropout2d.py
    │       ├── test_F_dropout3d.py
    │       ├── test_F_elu.py
    │       ├── test_F_embedding.py
    │       ├── test_F_feature_alpha_dropout.py
    │       ├── test_F_fold.py
    │       ├── test_F_gelu.py
    │       ├── test_F_glu.py
    │       ├── test_F_grid_sample.py
    │       ├── test_F_group_norm.py
    │       ├── test_F_hardshrink.py
    │       ├── test_F_hardsigmoid.py
    │       ├── test_F_hardswish.py
    │       ├── test_F_hardtanh.py
    │       ├── test_F_instance_norm.py
    │       ├── test_F_interpolate.py
    │       ├── test_F_layer_norm.py
    │       ├── test_F_leaky_relu.py
    │       ├── test_F_linear.py
    │       ├── test_F_local_response_norm.py
    │       ├── test_F_log_softmax.py
    │       ├── test_F_logsigmoid.py
    │       ├── test_F_lp_pool1d.py
    │       ├── test_F_lp_pool2d.py
    │       ├── test_F_max_pool1d.py
    │       ├── test_F_max_pool2d.py
    │       ├── test_F_max_pool3d.py
    │       ├── test_F_mish.py
    │       ├── test_F_normalize.py
    │       ├── test_F_pad.py
    │       ├── test_F_pairwise_distance.py
    │       ├── test_F_pixel_shuffle.py
    │       ├── test_F_pixel_unshuffle.py
    │       ├── test_F_prelu.py
    │       ├── test_F_relu.py
    │       ├── test_F_relu6.py
    │       ├── test_F_rms_norm.py
    │       ├── test_F_rrelu.py
    │       ├── test_F_scaled_dot_product_attention.py
    │       ├── test_F_selu.py
    │       ├── test_F_sigmoid.py
    │       ├── test_F_silu.py
    │       ├── test_F_softmax.py
    │       ├── test_F_softmin.py
    │       ├── test_F_softplus.py
    │       ├── test_F_softshrink.py
    │       ├── test_F_softsign.py
    │       ├── test_F_tanh.py
    │       ├── test_F_tanhshrink.py
    │       ├── test_F_threshold.py
    │       ├── test_F_unfold.py
    │       ├── test_F_upsample.py
    │       ├── test_F_upsample_bilinear.py
    │       ├── test_F_upsample_nearest.py
    │       ├── test_Tensor_expand.py
    │       ├── test_Tensor_fill.py
    │       ├── test_Tensor_index.py
    │       ├── test_Tensor_index_put.py
    │       ├── test_Tensor_masked_fill.py
    │       ├── test_Tensor_new_empty.py
    │       ├── test_Tensor_new_full.py
    │       ├── test_Tensor_new_ones.py
    │       ├── test_Tensor_new_zeros.py
    │       ├── test_Tensor_permute.py
    │       ├── test_Tensor_repeat.py
    │       ├── test_Tensor_reshape.py
    │       ├── test_Tensor_reshape_as.py
    │       ├── test_Tensor_select.py
    │       ├── test_Tensor_slice.py
    │       ├── test_Tensor_slice_copy.py
    │       ├── test_Tensor_to.py
    │       ├── test_Tensor_type_as.py
    │       ├── test_Tensor_unflatten.py
    │       ├── test_Tensor_view.py
    │       ├── test_convnext_tiny.py
    │       ├── test_ir_complex.py
    │       ├── test_mobilenet_v2.py
    │       ├── test_mobilenet_v3_small.py
    │       ├── test_nn_AdaptiveAvgPool1d.py
    │       ├── test_nn_AdaptiveAvgPool2d.py
    │       ├── test_nn_AdaptiveAvgPool3d.py
    │       ├── test_nn_AdaptiveMaxPool1d.py
    │       ├── test_nn_AdaptiveMaxPool2d.py
    │       ├── test_nn_AdaptiveMaxPool3d.py
    │       ├── test_nn_AlphaDropout.py
    │       ├── test_nn_AvgPool1d.py
    │       ├── test_nn_AvgPool2d.py
    │       ├── test_nn_AvgPool3d.py
    │       ├── test_nn_BatchNorm1d.py
    │       ├── test_nn_BatchNorm2d.py
    │       ├── test_nn_BatchNorm3d.py
    │       ├── test_nn_CELU.py
    │       ├── test_nn_ChannelShuffle.py
    │       ├── test_nn_ConstantPad1d.py
    │       ├── test_nn_ConstantPad2d.py
    │       ├── test_nn_ConstantPad3d.py
    │       ├── test_nn_Conv1d.py
    │       ├── test_nn_Conv2d.py
    │       ├── test_nn_Conv3d.py
    │       ├── test_nn_ConvTranspose1d.py
    │       ├── test_nn_ConvTranspose2d.py
    │       ├── test_nn_ConvTranspose3d.py
    │       ├── test_nn_Dropout.py
    │       ├── test_nn_Dropout2d.py
    │       ├── test_nn_Dropout3d.py
    │       ├── test_nn_ELU.py
    │       ├── test_nn_Embedding.py
    │       ├── test_nn_Fold.py
    │       ├── test_nn_GELU.py
    │       ├── test_nn_GLU.py
    │       ├── test_nn_GRU.py
    │       ├── test_nn_GroupNorm.py
    │       ├── test_nn_Hardshrink.py
    │       ├── test_nn_Hardsigmoid.py
    │       ├── test_nn_Hardswish.py
    │       ├── test_nn_Hardtanh.py
    │       ├── test_nn_Identity.py
    │       ├── test_nn_InstanceNorm1d.py
    │       ├── test_nn_InstanceNorm2d.py
    │       ├── test_nn_InstanceNorm3d.py
    │       ├── test_nn_LPPool1d.py
    │       ├── test_nn_LPPool2d.py
    │       ├── test_nn_LSTM.py
    │       ├── test_nn_LayerNorm.py
    │       ├── test_nn_LeakyReLU.py
    │       ├── test_nn_Linear.py
    │       ├── test_nn_LocalResponseNorm.py
    │       ├── test_nn_LogSigmoid.py
    │       ├── test_nn_LogSoftmax.py
    │       ├── test_nn_MaxPool1d.py
    │       ├── test_nn_MaxPool2d.py
    │       ├── test_nn_MaxPool3d.py
    │       ├── test_nn_Mish.py
    │       ├── test_nn_MultiheadAttention.py
    │       ├── test_nn_PReLU.py
    │       ├── test_nn_PixelShuffle.py
    │       ├── test_nn_PixelUnshuffle.py
    │       ├── test_nn_RMSNorm.py
    │       ├── test_nn_RNN.py
    │       ├── test_nn_RReLU.py
    │       ├── test_nn_ReLU.py
    │       ├── test_nn_ReLU6.py
    │       ├── test_nn_ReflectionPad1d.py
    │       ├── test_nn_ReflectionPad2d.py
    │       ├── test_nn_ReplicationPad1d.py
    │       ├── test_nn_ReplicationPad2d.py
    │       ├── test_nn_ReplicationPad3d.py
    │       ├── test_nn_SELU.py
    │       ├── test_nn_SiLU.py
    │       ├── test_nn_Sigmoid.py
    │       ├── test_nn_Softmax.py
    │       ├── test_nn_Softmax2d.py
    │       ├── test_nn_Softmin.py
    │       ├── test_nn_Softplus.py
    │       ├── test_nn_Softshrink.py
    │       ├── test_nn_Softsign.py
    │       ├── test_nn_Tanh.py
    │       ├── test_nn_Tanhshrink.py
    │       ├── test_nn_Threshold.py
    │       ├── test_nn_Unfold.py
    │       ├── test_nn_Upsample.py
    │       ├── test_nn_UpsamplingBilinear2d.py
    │       ├── test_nn_UpsamplingNearest2d.py
    │       ├── test_nn_ZeroPad2d.py
    │       ├── test_pnnx_eliminate_noop_cat.py
    │       ├── test_pnnx_eliminate_noop_expand.py
    │       ├── test_pnnx_eliminate_noop_math.py
    │       ├── test_pnnx_eliminate_noop_upsample.py
    │       ├── test_pnnx_expression.py
    │       ├── test_pnnx_fold_constant.py
    │       ├── test_pnnx_fuse_adjacent_permute.py
    │       ├── test_pnnx_fuse_adjacent_reshape.py
    │       ├── test_pnnx_fuse_channel_shuffle.py
    │       ├── test_pnnx_fuse_conv1d_batchnorm1d.py
    │       ├── test_pnnx_fuse_conv2d_batchnorm2d.py
    │       ├── test_pnnx_fuse_conv3d_batchnorm3d.py
    │       ├── test_pnnx_fuse_convtranspose1d_batchnorm1d.py
    │       ├── test_pnnx_fuse_convtranspose2d_batchnorm2d.py
    │       ├── test_pnnx_fuse_convtranspose3d_batchnorm3d.py
    │       ├── test_pnnx_fuse_input_unpack.py
    │       ├── test_pnnx_fuse_layernorm.py
    │       ├── test_pnnx_fuse_linear_batchnorm1d.py
    │       ├── test_pnnx_fuse_multiheadattention.py
    │       ├── test_pnnx_fuse_pad_conv1d.py
    │       ├── test_pnnx_fuse_pad_conv2d.py
    │       ├── test_pnnx_fuse_pixel_shuffle.py
    │       ├── test_pnnx_fuse_pixel_unshuffle.py
    │       ├── test_pnnx_fuse_rmsnorm.py
    │       ├── test_pnnx_fuse_scaled_dot_product_attention.py
    │       ├── test_pnnx_fuse_select_to_unbind.py
    │       ├── test_pnnx_fuse_slice_to_tensor_split.py
    │       ├── test_quantization_shufflenet_v2_x1_0.py
    │       ├── test_resnet18.py
    │       ├── test_shufflenet_v2_x1_0.py
    │       ├── test_squeezenet1_1.py
    │       ├── test_swin_t.py
    │       ├── test_torch_abs.py
    │       ├── test_torch_acos.py
    │       ├── test_torch_acosh.py
    │       ├── test_torch_addmm.py
    │       ├── test_torch_amax.py
    │       ├── test_torch_amin.py
    │       ├── test_torch_arange.py
    │       ├── test_torch_argmax.py
    │       ├── test_torch_argmin.py
    │       ├── test_torch_asin.py
    │       ├── test_torch_asinh.py
    │       ├── test_torch_atan.py
    │       ├── test_torch_atan2.py
    │       ├── test_torch_atanh.py
    │       ├── test_torch_bitwise_and.py
    │       ├── test_torch_bitwise_left_shift.py
    │       ├── test_torch_bitwise_not.py
    │       ├── test_torch_bitwise_or.py
    │       ├── test_torch_bitwise_right_shift.py
    │       ├── test_torch_bitwise_xor.py
    │       ├── test_torch_bmm.py
    │       ├── test_torch_cat.py
    │       ├── test_torch_ceil.py
    │       ├── test_torch_chunk.py
    │       ├── test_torch_clamp.py
    │       ├── test_torch_clone.py
    │       ├── test_torch_complex.py
    │       ├── test_torch_cos.py
    │       ├── test_torch_cosh.py
    │       ├── test_torch_cross.py
    │       ├── test_torch_cumprod.py
    │       ├── test_torch_cumsum.py
    │       ├── test_torch_diag.py
    │       ├── test_torch_einsum.py
    │       ├── test_torch_eq.py
    │       ├── test_torch_exp.py
    │       ├── test_torch_fft_fft.py
    │       ├── test_torch_fft_fft2.py
    │       ├── test_torch_fft_fftn.py
    │       ├── test_torch_fft_hfft.py
    │       ├── test_torch_fft_hfft2.py
    │       ├── test_torch_fft_hfftn.py
    │       ├── test_torch_fft_ifft.py
    │       ├── test_torch_fft_ifft2.py
    │       ├── test_torch_fft_ifftn.py
    │       ├── test_torch_fft_ihfft.py
    │       ├── test_torch_fft_ihfft2.py
    │       ├── test_torch_fft_ihfftn.py
    │       ├── test_torch_fft_irfft.py
    │       ├── test_torch_fft_irfft2.py
    │       ├── test_torch_fft_irfftn.py
    │       ├── test_torch_fft_rfft.py
    │       ├── test_torch_fft_rfft2.py
    │       ├── test_torch_fft_rfftn.py
    │       ├── test_torch_flatten.py
    │       ├── test_torch_flip.py
    │       ├── test_torch_floor.py
    │       ├── test_torch_full.py
    │       ├── test_torch_full_like.py
    │       ├── test_torch_gather.py
    │       ├── test_torch_ge.py
    │       ├── test_torch_gt.py
    │       ├── test_torch_imag.py
    │       ├── test_torch_index_select.py
    │       ├── test_torch_istft.py
    │       ├── test_torch_le.py
    │       ├── test_torch_lgamma.py
    │       ├── test_torch_log.py
    │       ├── test_torch_log10.py
    │       ├── test_torch_logaddexp.py
    │       ├── test_torch_logical_and.py
    │       ├── test_torch_logical_not.py
    │       ├── test_torch_logical_or.py
    │       ├── test_torch_logical_xor.py
    │       ├── test_torch_logsumexp.py
    │       ├── test_torch_lt.py
    │       ├── test_torch_masked_select.py
    │       ├── test_torch_matmul.py
    │       ├── test_torch_max.py
    │       ├── test_torch_maximum.py
    │       ├── test_torch_mean.py
    │       ├── test_torch_min.py
    │       ├── test_torch_minimum.py
    │       ├── test_torch_mm.py
    │       ├── test_torch_mv.py
    │       ├── test_torch_narrow.py
    │       ├── test_torch_ne.py
    │       ├── test_torch_neg.py
    │       ├── test_torch_norm.py
    │       ├── test_torch_ones.py
    │       ├── test_torch_ones_like.py
    │       ├── test_torch_positive.py
    │       ├── test_torch_pow.py
    │       ├── test_torch_prod.py
    │       ├── test_torch_real.py
    │       ├── test_torch_reciprocal.py
    │       ├── test_torch_repeat_interleave.py
    │       ├── test_torch_roll.py
    │       ├── test_torch_round.py
    │       ├── test_torch_rsqrt.py
    │       ├── test_torch_scatter_add.py
    │       ├── test_torch_sign.py
    │       ├── test_torch_sin.py
    │       ├── test_torch_sinh.py
    │       ├── test_torch_slice_scatter.py
    │       ├── test_torch_split.py
    │       ├── test_torch_sqrt.py
    │       ├── test_torch_square.py
    │       ├── test_torch_squeeze.py
    │       ├── test_torch_stack.py
    │       ├── test_torch_std.py
    │       ├── test_torch_stft.py
    │       ├── test_torch_sum.py
    │       ├── test_torch_t.py
    │       ├── test_torch_tan.py
    │       ├── test_torch_tanh.py
    │       ├── test_torch_tensor_split.py
    │       ├── test_torch_tile.py
    │       ├── test_torch_topk.py
    │       ├── test_torch_transpose.py
    │       ├── test_torch_trunc.py
    │       ├── test_torch_unbind.py
    │       ├── test_torch_unsqueeze.py
    │       ├── test_torch_view_as_complex.py
    │       ├── test_torch_view_as_real.py
    │       ├── test_torch_where.py
    │       ├── test_torch_zeros.py
    │       ├── test_torch_zeros_like.py
    │       ├── test_torchaudio_F_inverse_spectrogram.py
    │       ├── test_torchaudio_F_spectrogram.py
    │       ├── test_torchaudio_InverseSpectrogram.py
    │       ├── test_torchaudio_Spectrogram.py
    │       ├── test_torchvision_DeformConv2d.py
    │       ├── test_torchvision_RoIAlign.py
    │       ├── test_transformers_albert_attention.py
    │       ├── test_transformers_bart_attention.py
    │       ├── test_transformers_bert_attention.py
    │       ├── test_transformers_bert_generation_attention.py
    │       ├── test_transformers_blenderbot_attention.py
    │       ├── test_transformers_camembert_attention.py
    │       ├── test_transformers_chinese_clip_attention.py
    │       ├── test_transformers_clip_attention.py
    │       ├── test_transformers_ctrl_attention.py
    │       ├── test_transformers_deberta_attention.py
    │       ├── test_transformers_deepseek_v3_attention.py
    │       ├── test_transformers_distilbert_attention.py
    │       ├── test_transformers_electra_attention.py
    │       ├── test_transformers_flaubert_attention.py
    │       ├── test_transformers_fsmt_attention.py
    │       ├── test_transformers_funnel_attention.py
    │       ├── test_transformers_gpt2_attention.py
    │       ├── test_transformers_layoutlm_attention.py
    │       ├── test_transformers_longformer_attention.py
    │       ├── test_transformers_lxmert_attention.py
    │       ├── test_transformers_m2m_100_attention.py
    │       ├── test_transformers_marian_attention.py
    │       ├── test_transformers_mbart_attention.py
    │       ├── test_transformers_mobilebert_attention.py
    │       ├── test_transformers_mt5_attention.py
    │       ├── test_transformers_openai_attention.py
    │       ├── test_transformers_pegasus_attention.py
    │       ├── test_transformers_prophetnet_attention.py
    │       ├── test_transformers_qwen2_attention.py
    │       ├── test_transformers_qwen3_attention.py
    │       ├── test_transformers_reformer_attention.py
    │       ├── test_transformers_roberta_attention.py
    │       ├── test_transformers_squeezebert_attention.py
    │       ├── test_transformers_t5_attention.py
    │       ├── test_transformers_xlm_attention.py
    │       ├── test_transformers_xlm_roberta_attention.py
    │       ├── test_transformers_xlnet_attention.py
    │       └── test_vit_b_32.py
    ├── pytorch/
    │   └── README.md
    ├── quantize/
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── imreadwrite.cpp
    │   ├── imreadwrite.h
    │   ├── ncnn2int8.cpp
    │   ├── ncnn2table.cpp
    │   └── npy.hpp
    └── tensorflow/
        └── readme.txt

================================================
FILE CONTENTS
================================================

================================================
FILE: .astylerc
================================================
# astyle -n -r "benchmark/*.h,*.cpp" "src/*.h,*.cpp" "tests/*.h,*.cpp" "tools/*.h,*.cpp" "examples/*.h,*.cpp"

# brace style
--style=allman

# tab
--attach-namespaces
--attach-extern-c
--attach-closing-while

# indentation
--indent-preproc-define
--indent-col1-comments
--min-conditional-indent=0
--max-continuation-indent=120

# padding
--pad-oper
--pad-comma
--pad-header
--align-pointer=type
--align-reference=type

# formatting
--break-closing-braces
--attach-return-type
--attach-return-type-decl
--keep-one-line-blocks
--keep-one-line-statements
--convert-tabs
--max-code-length=200
--mode=c

# other
--lineend=linux


================================================
FILE: .clang-format
================================================
# find src/ tools/ tests/ examples/ benchmark/ -type f -name '*.c' -o -name '*.cpp' -o -name '*.h' | xargs -i clang-format -i {}

# need clang-format >= 10.0

AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
# AlignConsecutiveBitFields: true
AlignConsecutiveDeclarations: false
AlignConsecutiveMacros: true
AlignEscapedNewlines: Left
# AlignOperands: AlignAfterOperator
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Always
AllowShortCaseLabelsOnASingleLine: true
# AllowShortEnumsOnASingleLine: true
AllowShortFunctionsOnASingleLine: None
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLambdasOnASingleLine: All
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
  AfterCaseLabel: true
  AfterClass: true
  AfterControlStatement: Always
  AfterEnum: true
  AfterFunction: true
  AfterNamespace: false
  AfterObjCDeclaration: false
  AfterStruct: true
  AfterUnion: true
  AfterExternBlock: false
  BeforeCatch: true
  BeforeElse: true
#  BeforeLambdaBody: false
#  BeforeWhile: false
  IndentBraces: false
  SplitEmptyFunction: true
  SplitEmptyRecord: true
  SplitEmptyNamespace: false
BreakAfterJavaFieldAnnotations: true
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Custom
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
BreakInheritanceList: BeforeColon
BreakStringLiterals: false
ColumnLimit: 0
# CommentPragmas:
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: false
DerivePointerAlignment: false
# DisableFormat:
# ExperimentalAutoDetectBinPacking:
FixNamespaceComments: true
# ForEachMacros:
IncludeBlocks: Regroup
# IncludeCategories:
# IncludeIsMainRegex:
# IncludeIsMainSourceRegex:
# IndentCaseBlocks: false
IndentCaseLabels: false
# IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: None
IndentWidth: 4
# IndentWrappedFunctionNames: 4
# InsertTrailingCommas: None
# JavaImportGroups:
# JavaScriptQuotes
# JavaScriptWrapImports:
KeepEmptyLinesAtTheStartOfBlocks: false
Language: Cpp
# MacroBlockBegin:
# MacroBlockEnd:
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
# NamespaceMacros:
# ObjCBinPackProtocolList:
# ObjCBlockIndentWidth:
# ObjCBreakBeforeNestedBlockParam:
# ObjCSpaceAfterProperty:
# ObjCSpaceBeforeProtocolList:
# PenaltyBreakAssignment:
# PenaltyBreakBeforeFirstCallParameter:
# PenaltyBreakComment:
# PenaltyBreakFirstLessLess:
# PenaltyBreakString:
# PenaltyBreakTemplateDeclaration:
# PenaltyExcessCharacter:
# PenaltyReturnTypeOnItsOwnLine:
PointerAlignment: Left
# RawStringFormats:
ReflowComments: false
SortIncludes: false
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: c++03
#StatementMacros:
TabWidth: 4
# TypenameMacros:
UseCRLF: false
UseTab: Never


================================================
FILE: .gitattributes
================================================
*.comp linguist-language=GLSL


================================================
FILE: .github/ISSUE_TEMPLATE/bug.md
================================================
---
name: "\U0001F41B bug issue"
about: submit a bug report +_+
---

## error log | 日志或报错信息 | ログ

## context | 编译/运行环境 | バックグラウンド

## how to reproduce | 复现步骤 | 再現方法
1.
2.
3.

## more | 其他 | その他


================================================
FILE: .github/ISSUE_TEMPLATE/model-convert.md
================================================
---
name: "\U0001F6B8 model convert issue"
about: "Life is Short, Use pnnx and convertmodel.com"
---

## error log | 日志或报错信息 | ログ

## model | 模型 | モデル
1. original model

## how to reproduce | 复现步骤 | 再現方法
1.
2.
3.


================================================
FILE: .github/ISSUE_TEMPLATE/others.md
================================================
---
name: "\U0001F4DD others"
about: discussion, suggestion and question
---

## detail | 详细描述 | 詳細な説明


================================================
FILE: .github/ISSUE_TEMPLATE/quantization.md
================================================
---
name: "\U0001F4C8 quantization"
about: best wishes for your low bit quantization has a low accuracy loss...\(^▽^)/...2333... 
---

## expectation | 诉求 | 期待する
1. speed 
2. precision

## model | 模型 | モデル
1. model.param and model.bin

## detail | 详细描述 | 詳細な説明


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "daily"


================================================
FILE: .github/labeler.yml
================================================
cmake:
- changed-files:
  - any-glob-to-any-file: ['cmake/**', 'toolchains/**']

doc: 
- changed-files:
  - any-glob-to-any-file: docs/**

python: 
- changed-files:
  - any-glob-to-any-file: python/**

example: 
- changed-files:
  - any-glob-to-any-file: examples/**

test: 
- changed-files:
  - any-glob-to-any-file: tests/**

tool: 
- changed-files:
  - any-glob-to-any-file: tools/**
pnnx: 
- changed-files:
  - any-glob-to-any-file: tools/pnnx/**

core: 
- changed-files:
  - any-glob-to-any-file: src/*
layer: 
- changed-files:
  - any-glob-to-any-file: src/layer/*

arm: 
- changed-files:
  - any-glob-to-any-file: src/layer/arm/**
loongarch: 
- changed-files:
  - any-glob-to-any-file: src/layer/loongarch/**
mips: 
- changed-files:
  - any-glob-to-any-file: src/layer/mips/**
riscv: 
- changed-files:
  - any-glob-to-any-file: src/layer/riscv/**
vulkan: 
- changed-files:
  - any-glob-to-any-file: src/layer/vulkan/**
x86: 
- changed-files:
  - any-glob-to-any-file: src/layer/x86/**


================================================
FILE: .github/workflows/android.yml
================================================
name: android
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/android.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/riscv/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'glslang'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/android.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/riscv/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'glslang'
concurrency:
  group: android-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  build:
    runs-on: ubuntu-latest
    env:
      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_LATEST_HOME/build/cmake/android.toolchain.cmake \
        -DANDROID_PLATFORM=android-21 \
        -DANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DNCNN_VULKAN=ON \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: armeabi-v7a
      run: |
        mkdir build-armeabi-v7a && cd build-armeabi-v7a
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON ..
        cmake --build . -j $(nproc)
    - name: arm64-v8a
      run: |
        mkdir build-arm64-v8a && cd build-arm64-v8a
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="arm64-v8a" ..
        cmake --build . -j $(nproc)
    - name: x86
      run: |
        mkdir build-x86 && cd build-x86
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="x86" ..
        cmake --build . -j $(nproc)
    - name: x86_64
      run: |
        mkdir build-x86_64 && cd build-x86_64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="x86_64" ..
        cmake --build . -j $(nproc)
    - name: riscv64
      run: |
        mkdir build-riscv64 && cd build-riscv64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="riscv64" ..
        cmake --build . -j $(nproc)

    - name: armeabi-v7a-shared
      run: |
        mkdir build-armeabi-v7a-shared && cd build-armeabi-v7a-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: arm64-v8a-shared
      run: |
        mkdir build-arm64-v8a-shared && cd build-arm64-v8a-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="arm64-v8a" -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: x86-shared
      run: |
        mkdir build-x86-shared && cd build-x86-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="x86" -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: x86_64-shared
      run: |
        mkdir build-x86_64-shared && cd build-x86_64-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="x86_64" -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: riscv64-shared
      run: |
        mkdir build-riscv64-shared && cd build-riscv64-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="riscv64" -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)

  ndk-r16b:
    runs-on: ubuntu-latest
    env:
      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/android-ndk-r16b/build/cmake/android.toolchain.cmake \
        -DANDROID_PLATFORM=android-21 \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DNCNN_VULKAN=ON \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: ndk-r16b
      env:
        DEBIAN_FRONTEND: noninteractive
      run: |
        pushd /usr/lib/x86_64-linux-gnu/
        sudo ln -s libncurses.so.6 libncurses.so.5
        sudo ln -s libtinfo.so.6 libtinfo.so.5
        popd
        wget -q https://dl.google.com/android/repository/android-ndk-r16b-linux-x86_64.zip -O $GITHUB_WORKSPACE/android-ndk-r16b-linux-x86_64.zip
        cd $GITHUB_WORKSPACE && unzip -q android-ndk-r16b-linux-x86_64.zip

    - name: armeabi-v7a
      run: |
        mkdir build-armeabi-v7a && cd build-armeabi-v7a
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON ..
        cmake --build . -j $(nproc)
    - name: armeabi-v7a-no-neon
      run: |
        mkdir build-armeabi-v7a-no-neon && cd build-armeabi-v7a-no-neon
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=OFF ..
        cmake --build . -j $(nproc)
    - name: arm64-v8a
      run: |
        mkdir build-arm64-v8a && cd build-arm64-v8a
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="arm64-v8a" ..
        cmake --build . -j $(nproc)

    - name: armeabi-v7a-shared
      run: |
        mkdir build-armeabi-v7a-shared && cd build-armeabi-v7a-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: armeabi-v7a-no-neon-shared
      run: |
        mkdir build-armeabi-v7a-no-neon-shared && cd build-armeabi-v7a-no-neon-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=OFF -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: arm64-v8a-shared
      run: |
        mkdir build-arm64-v8a-shared && cd build-arm64-v8a-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DANDROID_ABI="arm64-v8a" -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)


================================================
FILE: .github/workflows/code-format-msg.yml
================================================
name: code-format-msg

on:
  workflow_run:
    workflows: [code-format]
    types: [completed]

concurrency:
  group: code-format-msg-${{ github.head_ref || github.run_id }}

permissions:
  contents: read
  pull-requests: write

jobs:
  pr-context:
    name: acquire-pr-context
    runs-on: ubuntu-latest
    outputs:
      PR_HEADSHA: ${{ steps.set-pr-context.outputs.head-sha }}
      PR_NUMBER:  ${{ steps.set-pr-context.outputs.number   }}
    if: ${{ github.event.workflow_run.event == 'pull_request' }}
    steps:
    - name: get-pr-context
      id: set-pr-context
      env:
        GH_TOKEN: ${{ github.token }}
        PR_TARGET_REPO: ${{ github.repository }}
        PR_BRANCH: |-
          ${{
            (github.event.workflow_run.head_repository.owner.login != github.event.workflow_run.repository.owner.login)
              && format('{0}:{1}', github.event.workflow_run.head_repository.owner.login, github.event.workflow_run.head_branch)
              || github.event.workflow_run.head_branch
          }}
      run: |
        gh pr view --repo "${PR_TARGET_REPO}" "${PR_BRANCH}" \
          --json 'number,headRefOid' \
          --jq '"number=\(.number)\nhead-sha=\(.headRefOid)"' \
          >> $GITHUB_OUTPUT

  remove-comment-if-success:
    if: ${{ github.event.workflow_run.conclusion == 'success' }}
    runs-on: ubuntu-latest
    needs: [pr-context]
    env:
      PR_HEADSHA: ${{ needs.pr-context.outputs.PR_HEADSHA }}
      PR_NUMBER:  ${{ needs.pr-context.outputs.PR_NUMBER  }}
    steps:
    - name: Remove existing "format check failed" comment
      uses: actions/github-script@v8
      with:
        script: |
          const owner = context.repo.owner;
          const repo = context.repo.repo;
          const { data: comments } = await github.rest.issues.listComments({
            owner,
            repo,
            issue_number: ${{ env.PR_NUMBER }},
          });

          const targetComment = comments.find(comment =>
            comment.body.includes("Please enable github action in **YOUR FORKED REPO** to make code-format workflow work")
          );

          if (targetComment) {
            await github.rest.issues.deleteComment({
              owner,
              repo,
              comment_id: targetComment.id,
            });
            console.log("Removed existing code-format failure comment.");
          } else {
            console.log("No existing format failure comment to remove.");
          }

  post-comment-if-failure:
    if: ${{ github.event.workflow_run.conclusion == 'failure' }}
    runs-on: ubuntu-latest
    needs: [pr-context]
    env:
      PR_HEADSHA: ${{ needs.pr-context.outputs.PR_HEADSHA }}
      PR_NUMBER:  ${{ needs.pr-context.outputs.PR_NUMBER  }}
    steps:
    - name: Post comment on failed code-format if not existing
      uses: actions/github-script@v8
      with:
        script: |
          const owner = context.repo.owner;
          const repo = context.repo.repo;
          const { data: comments } = await github.rest.issues.listComments({
            owner,
            repo,
            issue_number: ${{ env.PR_NUMBER }},
          });

          const existingComment = comments.find(comment =>
            comment.body.includes("Please enable github action in **YOUR FORKED REPO** to make code-format workflow work")
          );

          if (existingComment) {
            console.log("A code-format failure comment already exists.");
          } else {
            await github.rest.issues.createComment({
              owner,
              repo,
              issue_number: ${{ env.PR_NUMBER }},
              body: "Please enable github action in **YOUR FORKED REPO** to make code-format workflow work",
            });
            console.log("Created code-format failure comment.");
          }


================================================
FILE: .github/workflows/code-format.yml
================================================
name: code-format

on: [push, pull_request]

concurrency:
  group: code-format-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  code-format:
    runs-on: ubuntu-latest
    container: ubuntu:20.04
    steps:
    - name: astyle
      run: |
        export DEBIAN_FRONTEND=noninteractive
        apt-get update -y
        apt-get install -y astyle git

    - uses: actions/checkout@v6

    - name: cache-clang-format
      id: cache-clang-format
      uses: actions/cache@v5
      with:
        path: clang-format-install
        key: clang-format-install-5
    - name: clang-format
      if: steps.cache-clang-format.outputs.cache-hit != 'true'
      run: |
        export DEBIAN_FRONTEND=noninteractive
        apt-get update -y
        apt-get install -y build-essential wget curl cmake unzip zip python3-pip
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-10.0.1/llvm-project-10.0.1.tar.xz
        tar -xf llvm-project-10.0.1.tar.xz
        cd llvm-project-10.0.1
        mkdir build
        cd build
        cmake -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DLLVM_ENABLE_PROJECTS="clang" -DLLVM_TARGETS_TO_BUILD="" -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_DOCS=OFF ../llvm/
        make -j4 clang-format
        mkdir $GITHUB_WORKSPACE/clang-format-install
        cp -r bin/clang-format $GITHUB_WORKSPACE/clang-format-install
        cd ../../
        rm -rf llvm-project-10.0.1
        rm llvm-project-10.0.1.tar.xz

    - name: cache-clang-format-21
      id: cache-clang-format-21
      uses: actions/cache@v5
      with:
        path: clang-format-21-install
        key: clang-format-21-install
    - name: clang-format-21
      if: steps.cache-clang-format-21.outputs.cache-hit != 'true'
      run: |
        export DEBIAN_FRONTEND=noninteractive
        apt-get update -y
        apt-get install -y build-essential wget curl cmake unzip zip python3-pip
        pip install cmake
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-21.1.8/llvm-project-21.1.8.src.tar.xz
        tar -xf llvm-project-21.1.8.src.tar.xz
        cd llvm-project-21.1.8.src
        mkdir build
        cd build
        cmake -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DLLVM_ENABLE_PROJECTS="clang" -DLLVM_TARGETS_TO_BUILD="" -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_DOCS=OFF ../llvm/
        make -j4 clang-format
        mkdir $GITHUB_WORKSPACE/clang-format-21-install
        cp -r bin/clang-format $GITHUB_WORKSPACE/clang-format-21-install
        cd ../../
        rm -rf llvm-project-21.1.8.src
        rm llvm-project-21.1.8.src.tar.xz

    - name: code-format
      run: |
        mv $GITHUB_WORKSPACE/clang-format-install/clang-format /usr/local/bin/clang-format
        rm -rf $GITHUB_WORKSPACE/clang-format-install
        sh codeformat.sh

    - name: code-format-glsl
      run: |
        mv $GITHUB_WORKSPACE/clang-format-21-install/clang-format /usr/local/bin/clang-format-21
        rm -rf $GITHUB_WORKSPACE/clang-format-21-install
        cd src/layer/vulkan/shader
        find . -type f -name '*.comp' | xargs -i clang-format-21 -i -assume-filename=main.cpp {}

    - name: configure-git-safe-directory
      run: git config --global --add safe.directory /__w/ncnn/ncnn

    - uses: stefanzweifel/git-auto-commit-action@v7
      with:
        commit_message: apply code-format changes

    - name: restore-clang-format-cache
      run: |
        mkdir $GITHUB_WORKSPACE/clang-format-install
        cp -r /usr/local/bin/clang-format $GITHUB_WORKSPACE/clang-format-install
        mkdir $GITHUB_WORKSPACE/clang-format-21-install
        cp -r /usr/local/bin/clang-format-21 $GITHUB_WORKSPACE/clang-format-21-install/clang-format


================================================
FILE: .github/workflows/codeql-analysis.yml
================================================
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
name: "CodeQL"

on:
  push:
    branches: [master]
    paths-ignore: ['**.md']
  pull_request:
    # The branches below must be a subset of the branches above
    branches: [master]
    paths-ignore: ['**.md']
  schedule:
    - cron: '0 20 * * 4'

concurrency:
  group: CodeQL-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  analyze:
    permissions:
      actions: read  # for github/codeql-action/init to get workflow details
      contents: read  # for actions/checkout to fetch code
      security-events: write  # for github/codeql-action/autobuild to send a status report
    name: Analyze
    runs-on: ubuntu-latest

    strategy:
      fail-fast: false
      matrix:
        # Override automatic language detection by changing the below list
        # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python']
        language: ['cpp']
        # Learn more...
        # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection

    steps:
    - name: Checkout repository
      uses: actions/checkout@v6
      with:
        # We must fetch at least the immediate parents so that if this is
        # a pull request then we can checkout the head.
        fetch-depth: 2

    # If this run was triggered by a pull request event, then checkout
    # the head of the pull request instead of the merge commit.
    - run: git checkout HEAD^2
      if: ${{ github.event_name == 'pull_request' }}

    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
      uses: github/codeql-action/init@v4
      with:
        languages: ${{ matrix.language }}
        # If you wish to specify custom queries, you can do so here or in a config file.
        # By default, queries listed here will override any specified in a config file. 
        # Prefix the list here with "+" to use these queries and those in the config file.
        # queries: ./path/to/local/query, your-org/your-repo/queries@main

    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
    # If this step fails, then you should remove it and run the build manually (see below)
    - name: Autobuild
      uses: github/codeql-action/autobuild@v4

    # ℹ️ Command-line programs to run using the OS shell.
    # 📚 https://git.io/JvXDl

    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
    #    and modify them (or add more) to build your code if your project
    #    uses a compiled language

    #- run: |
    #   make bootstrap
    #   make release

    - name: Perform CodeQL Analysis
      uses: github/codeql-action/analyze@v4


================================================
FILE: .github/workflows/compare-binary-size-pr-comment.yml
================================================
name: compare-binary-size-pr-comment
on:
  workflow_run:
    workflows: ["compare-binary-size"]
    types:
      - completed

permissions:
  actions: read
  contents: read
  pull-requests: write

jobs:
  pr-comment:
    runs-on: ubuntu-latest
    steps:
    - name: Setup tools
      run: |
        sudo apt-get update
        sudo apt-get install -y jq unzip

    - name: Ensure workflow_run is for a PR
      id: validate
      run: |
        # Use the event payload file provided by GitHub Actions directly
        echo "Using event payload from: $GITHUB_EVENT_PATH"
        echo "Event file size: $(wc -c < "$GITHUB_EVENT_PATH") bytes"

        # Safely compute number of associated PRs (use // 0 to default if missing)
        PR_COUNT=$(jq -r '.workflow_run.pull_requests | length // 0' "$GITHUB_EVENT_PATH")
        echo "Associated pull_request count: $PR_COUNT"

        if [ "$PR_COUNT" -eq 0 ]; then
          echo "No pull_request associated with this workflow_run; nothing to do."
          echo "skip=true" >> $GITHUB_OUTPUT
          exit 0
        fi

        echo "skip=false" >> $GITHUB_OUTPUT

    - name: Download artifact zip for this run
      if: steps.validate.outputs.skip != 'true'
      env:
        RUN_ID: ${{ github.event.workflow_run.id }}
        OWNER: ${{ github.repository_owner }}
        REPO: ${{ github.repository }}
        TOKEN: ${{ secrets.COMMENTER_PAT }}
        ART_NAME: "compare-binary-size.md"
      run: |
        echo "Listing artifacts for run $RUN_ID"
        API="https://api.github.com/repos/$OWNER/${REPO#*/}/actions/runs/$RUN_ID/artifacts"

        # Save artifact list to a file (avoid pipe/echo issues)
        curl -s -H "Authorization: token $TOKEN" "$API" -o /tmp/art_list.json
        echo "Art list size: $(wc -c < /tmp/art_list.json) bytes"
        if ! jq . /tmp/art_list.json; then
          echo "Failed to parse /tmp/art_list.json with jq; aborting for safety."
          exit 1
        fi

        # find artifact archive_download_url by name (first match)
        ARCHIVE_URL=$(jq -r --arg name "$ART_NAME" '.artifacts[] | select(.name==$name) | .archive_download_url' /tmp/art_list.json | head -n1)
        if [ -z "$ARCHIVE_URL" ] || [ "$ARCHIVE_URL" = "null" ]; then
          echo "Artifact named '$ART_NAME' not found for run $RUN_ID. Exiting."
          exit 0
        fi
        echo "Downloading artifact from: $ARCHIVE_URL"

        # download and unzip to temp dir
        mkdir -p /tmp/artifact_contents
        curl -L -H "Authorization: token $TOKEN" -o /tmp/artifact.zip "$ARCHIVE_URL"
        if ! unzip -q /tmp/artifact.zip -d /tmp/artifact_contents; then
          echo "Failed to unzip /tmp/artifact.zip"; exit 1
        fi
        ls -la /tmp/artifact_contents

    - name: Read compare-binary-size.md content
      if: steps.validate.outputs.skip != 'true'
      id: read
      run: |
        # find file inside artifact_contents
        FILE=$(find /tmp/artifact_contents -type f -name "compare-binary-size.md" | head -n1 || true)
        if [ -z "$FILE" ]; then
          # If artifact name matched but internal filename differs, try any .md
          FILE=$(find /tmp/artifact_contents -type f -name "*.md" | head -n1 || true)
        fi

        if [ -z "$FILE" ]; then
          echo "compare_content<<EOF" >> $GITHUB_OUTPUT
          echo "No compare-binary-size.md found in artifact." >> $GITHUB_OUTPUT
          echo "EOF" >> $GITHUB_OUTPUT
        else
          # Truncate to avoid overly long comments (adjust lines as needed)
          head -n 1000 "$FILE" > /tmp/compare-truncated.md || true
          echo "compare_content<<EOF" >> $GITHUB_OUTPUT
          cat /tmp/compare-truncated.md >> $GITHUB_OUTPUT
          echo "EOF" >> $GITHUB_OUTPUT
        fi

    - name: Post or update PR comment via actions/github-script
      if: steps.validate.outputs.skip != 'true'
      uses: actions/github-script@v8
      with:
        github-token: ${{ secrets.COMMENTER_PAT }}
        script: |
          const pr = context.payload.workflow_run.pull_requests[0];
          if (!pr) {
            core.info("No pull request found in workflow_run payload; skipping.");
            return;
          }

          const owner = context.repo.owner;
          const repo = context.repo.repo;
          const issue_number = pr.number;
          const marker = '<!-- compare-binary-size-bot -->';

          // Read the compare content from env (set in previous step outputs)
          const compare = process.env.COMPARE_CONTENT || "";

          const body = `${marker}\n**Binary size comparison** (from artifact)\n\n\`\`\`markdown\n${compare}\n\`\`\``;

          // List existing comments and find our bot comment (by marker)
          const { data: comments } = await github.rest.issues.listComments({
            owner,
            repo,
            issue_number,
            per_page: 100
          });

          const existing = comments.find(c => c.body && c.body.includes(marker));

          if (existing) {
            await github.rest.issues.updateComment({
              owner,
              repo,
              comment_id: existing.id,
              body
            });
            core.info(`Updated comment id=${existing.id}`);
          } else {
            await github.rest.issues.createComment({
              owner,
              repo,
              issue_number,
              body
            });
            core.info("Created new comment");
          }
      env:
        # pass the content from previous step into the github-script environment
        COMPARE_CONTENT: ${{ steps.read.outputs.compare_content }}


================================================
FILE: .github/workflows/compare-binary-size.yml
================================================
name: compare-binary-size
on:
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/compare-binary-size.yml'
    - 'toolchains/**'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/**'
    - 'glslang'

concurrency:
  group: compare-binary-size-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read
  actions: read

jobs:
  compare-size:
    runs-on: ubuntu-latest
    steps:
    - name: checkout-pr-branch
      uses: actions/checkout@v6
      with:
        ref: refs/pull/${{ github.event.pull_request.number }}/merge
        submodules: true
        path: pr

    - name: checkout-base-branch
      uses: actions/checkout@v6
      with:
        ref: ${{ github.event.pull_request.base.ref }}
        repository: ${{ github.event.pull_request.base.repo.full_name }}
        submodules: true
        path: base

    - name: install-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-arm-linux-gnueabihf g++-aarch64-linux-gnu

    - name: compare-sizes
      env:
        COMMON_CMAKE_ARGS: -DNCNN_SHARED_LIB=ON -DNCNN_VULKAN=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_BENCHMARK=OFF
      run: |
        # define target architectures
        archs=("x86_64" "armhf" "aarch64")

        # generate table
        echo "The binary size change of libncnn.so (bytes)" >> compare-binary-size.md
        echo "| architecture | base size | pr size | difference |" >> compare-binary-size.md
        echo "|--------------|-----------|---------|------------|" >> compare-binary-size.md

        for arch in "${archs[@]}"; do

          mkdir -p pr/build_$arch
          pushd pr/build_$arch
          if [ "$arch" = "armhf" ]; then
            cmake ${{env.COMMON_CMAKE_ARGS}} -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake ..
          elif [ "$arch" = "aarch64" ]; then
            cmake ${{env.COMMON_CMAKE_ARGS}} -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake ..
          else
            cmake ${{env.COMMON_CMAKE_ARGS}} ..
          fi
          cmake --build . -j $(nproc)
          PR_SIZE=$(stat -c%s $(readlink -f src/libncnn.so))
          popd

          mkdir -p base/build_$arch
          pushd base/build_$arch
          if [ "$arch" = "armhf" ]; then
            cmake ${{env.COMMON_CMAKE_ARGS}} -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake ..
          elif [ "$arch" = "aarch64" ]; then
            cmake ${{env.COMMON_CMAKE_ARGS}} -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake ..
          else
            cmake ${{env.COMMON_CMAKE_ARGS}} ..
          fi
          cmake --build . -j $(nproc)
          BASE_SIZE=$(stat -c%s $(readlink -f src/libncnn.so))
          popd

          DIFF=$(($PR_SIZE - $BASE_SIZE))
          if [ $DIFF -gt 0 ]; then
            DIFF_STR="+$DIFF :warning:"
          else
            DIFF_STR="$DIFF :kissing_heart:"
          fi

          echo "| $arch | $BASE_SIZE | $PR_SIZE | $DIFF_STR |" >> compare-binary-size.md
        done

        cat compare-binary-size.md

    - name: upload-compare-binary-size-md
      uses: actions/upload-artifact@v6
      with:
        name: compare-binary-size.md
        path: compare-binary-size.md


================================================
FILE: .github/workflows/elf-riscv32.yml
================================================
name: elf-riscv32
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/elf-riscv32.yml'
    - 'toolchains/riscv32-unknown-elf.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/riscv/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/elf-riscv32.yml'
    - 'toolchains/riscv32-unknown-elf.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/riscv/**'
    - 'tests/**'
concurrency:
  group: elf-riscv32-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  rv32gc:
    runs-on: [self-hosted, linux, centos]
    steps:
    - uses: actions/checkout@v6

    #- name: riscv-gnu-toolchain
      #run: |
        #wget -c https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/2025.01.20/riscv32-elf-ubuntu-22.04-gcc-nightly-2025.01.20-nightly.tar.xz
        #tar -xf riscv32-elf-ubuntu-22.04-gcc-nightly-2025.01.20-nightly.tar.xz
        #mv riscv riscv32-elf

    #- name: checkout-riscv-pk
      #uses: actions/checkout@v6
      #with:
        #repository: riscv/riscv-pk
        #path: riscv-pk
        #ref: d8659a4e8e888bdc9caf840ad17bfe83239b1d64
    #- name: riscv-pk
      #run: |
        #cd riscv-pk
        #mkdir build && cd build
        #export PATH=$GITHUB_WORKSPACE/riscv32-elf/bin:$PATH
        #export CFLAGS="-O3"
        #export CXXFLAGS="-O3"
        #../configure --prefix=$GITHUB_WORKSPACE/riscv32-elf --with-arch=rv32gc_zicsr_zifencei --host=riscv32-unknown-elf --with-abi=ilp32d
        #make -j4
        #make install

    #- name: checkout-riscv-isa-sim
      #uses: actions/checkout@v6
      #with:
        #repository: riscv-software-src/riscv-isa-sim
        #path: riscv-isa-sim
        #ref: 5ef9a61f5fecdb9bf77da155172c8018ce820308
    #- name: riscv-isa-sim
      #run: |
        #cd riscv-isa-sim
        #mkdir build && cd build
        #export PATH=$GITHUB_WORKSPACE/riscv32-elf/bin:$PATH
        #export CFLAGS="-O3"
        #export CXXFLAGS="-O3"
        #../configure --prefix=$GITHUB_WORKSPACE/riscv32-elf
        #make -j4
        #make install

    #- name: riscv-strip-install
      #run: find $GITHUB_WORKSPACE/riscv32-elf -type f | xargs -i strip -g {} || true

    - name: build
      run: |
        export RISCV_ROOT_PATH=/data/action/osd/riscv32-elf
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv32-unknown-elf.toolchain.cmake -DNCNN_THREADS=OFF -DNCNN_OPENMP=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_RVV=OFF -DNCNN_XTHEADVECTOR=OFF -DNCNN_ZFH=OFF -DNCNN_ZVFH=OFF ..
        cmake --build . -j 4

    - name: test
      run: |
        export PATH=/data/action/osd/riscv32-elf/bin:$PATH
        cd build
        TESTS_EXECUTABLE_LOADER=spike TESTS_EXECUTABLE_LOADER_ARGUMENTS="--isa=rv32gc;/data/action/osd/riscv32-elf/riscv32-unknown-elf/bin/pk" ctest --output-on-failure -j 4


================================================
FILE: .github/workflows/elf-riscv64.yml
================================================
name: elf-riscv64
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/elf-riscv64.yml'
    - 'toolchains/riscv64-unknown-elf.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/riscv/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/elf-riscv64.yml'
    - 'toolchains/riscv64-unknown-elf.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/riscv/**'
    - 'tests/**'
concurrency:
  group: elf-riscv64-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  rv64gc:
    runs-on: [self-hosted, linux, centos]
    steps:
    - uses: actions/checkout@v6

    #- name: riscv-gnu-toolchain
      #run: |
        #wget -c https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/2025.01.20/riscv64-elf-ubuntu-22.04-gcc-nightly-2025.01.20-nightly.tar.xz
        #tar -xf riscv64-elf-ubuntu-22.04-gcc-nightly-2025.01.20-nightly.tar.xz
        #mv riscv riscv64-elf

    #- name: checkout-riscv-pk
      #uses: actions/checkout@v6
      #with:
        #repository: riscv/riscv-pk
        #path: riscv-pk
        #ref: d8659a4e8e888bdc9caf840ad17bfe83239b1d64
    #- name: riscv-pk
      #run: |
        #cd riscv-pk
        #mkdir build && cd build
        #export PATH=$GITHUB_WORKSPACE/riscv64-elf/bin:$PATH
        #export CFLAGS="-O3"
        #export CXXFLAGS="-O3"
        #../configure --prefix=$GITHUB_WORKSPACE/riscv64-elf --with-arch=rv64gc_zicsr_zifencei --host=riscv64-unknown-elf --with-abi=lp64d
        #make -j4
        #make install

    #- name: checkout-riscv-isa-sim
      #uses: actions/checkout@v6
      #with:
        #repository: riscv-software-src/riscv-isa-sim
        #path: riscv-isa-sim
        #ref: 5ef9a61f5fecdb9bf77da155172c8018ce820308
    #- name: riscv-isa-sim
      #run: |
        #cd riscv-isa-sim
        #mkdir build && cd build
        #export PATH=$GITHUB_WORKSPACE/riscv64-elf/bin:$PATH
        #export CFLAGS="-O3"
        #export CXXFLAGS="-O3"
        #../configure --prefix=$GITHUB_WORKSPACE/riscv64-elf
        #make -j4
        #make install

    #- name: riscv-strip-install
      #run: find $GITHUB_WORKSPACE/riscv64-elf -type f | xargs -i strip -g {} || true

    - name: build
      run: |
        export RISCV_ROOT_PATH=/data/action/osd/riscv64-elf
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv64-unknown-elf.toolchain.cmake -DNCNN_THREADS=OFF -DNCNN_OPENMP=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_XTHEADVECTOR=OFF ..
        cmake --build . -j 4

    - name: test
      run: |
        export PATH=/data/action/osd/riscv64-elf/bin:$PATH
        cd build
        TESTS_EXECUTABLE_LOADER=spike TESTS_EXECUTABLE_LOADER_ARGUMENTS="--isa=rv64gc;/data/action/osd/riscv64-elf/riscv64-unknown-elf/bin/pk" ctest --output-on-failure -j 4


================================================
FILE: .github/workflows/esp32.yml
================================================
name: ESP32
on:
  push:
    branches: [master]
    paths:
      - '.github/workflows/esp32.yml'
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'src/*'
      - 'src/layer/*'
  pull_request:
    branches: [master]
    paths:
      - '.github/workflows/esp32.yml'
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'src/*'
      - 'src/layer/*'

concurrency:
  group: esp32-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  build:
    name: ESP32
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v6
        with:
          submodules: true

      - name: Setup Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.8'

      - name: Install dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y cmake ninja-build ccache
            
      - name: Checkout ESP-IDF
        uses: actions/checkout@v6
        with:
          repository: espressif/esp-idf
          path: esp-idf-install
          ref: release/v5.3
          
      - name: Install ESP-IDF
        run: |
          cd esp-idf-install
          git submodule update --init --recursive
          ./install.sh

      - name: Set environment and build NCNN for ESP32
        run: |
          source esp-idf-install/export.sh
          echo "IDF_PATH=$IDF_PATH" >> $GITHUB_ENV
          echo "${IDF_PATH}/tools" >> $GITHUB_PATH
          echo "${IDF_PATH}/components" >> $GITHUB_PATH
          mkdir -p build-esp32 && cd build-esp32
          cmake -DCMAKE_TOOLCHAIN_FILE="../toolchains/esp32.toolchain.cmake" -DCMAKE_BUILD_TYPE=Release -DNCNN_BUILD_EXAMPLES=OFF ..
          make -j 4
          make install


================================================
FILE: .github/workflows/harmonyos.yml
================================================
name: harmonyos
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/harmonyos.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'glslang'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/harmonyos.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'glslang'
concurrency:
  group: harmonyos-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  build:
    runs-on: [self-hosted, linux, centos]

    env:
      OHOS_NDK_HOME: /data/action/osd/ohos-sdk/linux/native
      OHOS_NDK_CMAKE: /data/action/osd/ohos-sdk/linux/native/build-tools/cmake/bin/cmake
      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=/data/action/osd/ohos-sdk/linux/native/build/cmake/ohos.toolchain.cmake \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DNCNN_SIMPLEOMP=ON \
        -DNCNN_VULKAN=ON \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    # - name: setup-sdk
    #   run: |
    #     cd /data/action/osd
    #     wget -q https://repo.huaweicloud.com/harmonyos/os/4.1.1-Release/ohos-sdk-windows_linux-public.tar.gz
    #     tar -xf ohos-sdk-windows_linux-public.tar.gz
    #     cd ohos-sdk/linux
    #     unzip -q native-linux-x64-4.1.7.8-Release.zip

    - name: armeabi-v7a
      run: |
        mkdir build-armeabi-v7a && cd build-armeabi-v7a
        ${{ env.OHOS_NDK_CMAKE }} ${{ env.NCNN_CMAKE_OPTIONS }} -DOHOS_ARCH="armeabi-v7a" ..
        ${{ env.OHOS_NDK_CMAKE }} --build . -j 4
    - name: arm64-v8a
      run: |
        mkdir build-arm64-v8a && cd build-arm64-v8a
        ${{ env.OHOS_NDK_CMAKE }} ${{ env.NCNN_CMAKE_OPTIONS }} -DOHOS_ARCH="arm64-v8a" ..
        ${{ env.OHOS_NDK_CMAKE }} --build . -j 4
    - name: x86_64
      run: |
        mkdir build-x86_64 && cd build-x86_64
        ${{ env.OHOS_NDK_CMAKE }} ${{ env.NCNN_CMAKE_OPTIONS }} -DOHOS_ARCH="x86_64" ..
        ${{ env.OHOS_NDK_CMAKE }} --build . -j 4

    - name: armeabi-v7a-shared
      run: |
        mkdir build-armeabi-v7a-shared && cd build-armeabi-v7a-shared
        ${{ env.OHOS_NDK_CMAKE }} ${{ env.NCNN_CMAKE_OPTIONS }} -DOHOS_ARCH="armeabi-v7a" -DNCNN_SHARED_LIB=ON ..
        ${{ env.OHOS_NDK_CMAKE }} --build . -j 4
    - name: arm64-v8a-shared
      run: |
        mkdir build-arm64-v8a-shared && cd build-arm64-v8a-shared
        ${{ env.OHOS_NDK_CMAKE }} ${{ env.NCNN_CMAKE_OPTIONS }} -DOHOS_ARCH="arm64-v8a" -DNCNN_SHARED_LIB=ON ..
        ${{ env.OHOS_NDK_CMAKE }} --build . -j 4
    - name: x86_64-shared
      run: |
        mkdir build-x86_64-shared && cd build-x86_64-shared
        ${{ env.OHOS_NDK_CMAKE }} ${{ env.NCNN_CMAKE_OPTIONS }} -DOHOS_ARCH="x86_64" -DNCNN_SHARED_LIB=ON ..
        ${{ env.OHOS_NDK_CMAKE }} --build . -j 4


================================================
FILE: .github/workflows/ios.yml
================================================
name: ios
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/ios.yml'
    - 'toolchains/ios.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'glslang'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/ios.yml'
    - 'toolchains/ios.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'glslang'
concurrency:
  group: ios-${{ github.ref }}
  cancel-in-progress: true
env:
  DEVELOPER_DIR: /Applications/Xcode_16.4.0.app/Contents/Developer
  IOS_DEPLOYMENT_TARGET: '13.0'
  ENABLE_BITCODE: OFF
  ENABLE_ARC: OFF
  ENABLE_VISIBILITY: OFF
permissions:
  contents: read

jobs:
  build:
    runs-on: macos-15-intel
    env:
      OPENMP_VERSION: '18.1.2'
      OPENMP_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$IOS_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DLIBOMP_ENABLE_SHARED=OFF \
        -DLIBOMP_OMPT_SUPPORT=OFF \
        -DLIBOMP_USE_HWLOC=OFF \

      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$IOS_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
        -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
        -DOpenMP_libomp_LIBRARY="libomp.a" \
        -DNCNN_VULKAN=ON \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: cache-openmp
      id: cache-openmp
      uses: actions/cache@v5
      with:
        path: openmp-install
        key: openmp-ios-install-20251004
    - name: openmp
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        mv cmake-${{ env.OPENMP_VERSION }}.src/Modules/* openmp-${{ env.OPENMP_VERSION }}.src/cmake/
        cd openmp-${{ env.OPENMP_VERSION }}.src
        wget https://github.com/nihui/llvm-project/commit/ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        patch -p2 -i ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        wget https://github.com/nihui/llvm-project/commit/5c12711f9a21f41bea70566bf15a4026804d6b20.patch
        patch -p2 -i 5c12711f9a21f41bea70566bf15a4026804d6b20.patch
    - name: openmp-arm64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-arm64 && cd build-arm64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=OS64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install
    - name: openmp-simulator-x86_64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-simulator-x86_64 && cd build-simulator-x86_64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=SIMULATOR64 -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install
    - name: openmp-simulator-arm64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-simulator-arm64 && cd build-simulator-arm64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=SIMULATORARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install
    - name: openmp-merge-fat-library
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        mkdir -p $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install/ios
        mkdir -p $GITHUB_WORKSPACE/openmp-install/ios-simulator

        cp -a openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/include $GITHUB_WORKSPACE/openmp-install/ios
        mkdir -p $GITHUB_WORKSPACE/openmp-install/ios/lib
        cp openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/lib/libomp.a $GITHUB_WORKSPACE/openmp-install/ios/lib/libomp.a

        cp -a openmp-${{ env.OPENMP_VERSION }}.src/build-simulator-x86_64/install/include $GITHUB_WORKSPACE/openmp-install/ios-simulator
        mkdir -p $GITHUB_WORKSPACE/openmp-install/ios-simulator/lib
        lipo -create \
            openmp-${{ env.OPENMP_VERSION }}.src/build-simulator-x86_64/install/lib/libomp.a \
            openmp-${{ env.OPENMP_VERSION }}.src/build-simulator-arm64/install/lib/libomp.a \
            -o $GITHUB_WORKSPACE/openmp-install/ios-simulator/lib/libomp.a

    - name: install-openmp
      run: |
        sudo cp $GITHUB_WORKSPACE/openmp-install/ios/include/* $DEVELOPER_DIR/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/include
        sudo cp $GITHUB_WORKSPACE/openmp-install/ios/lib/libomp.a $DEVELOPER_DIR/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/lib

        sudo cp $GITHUB_WORKSPACE/openmp-install/ios-simulator/include/* $DEVELOPER_DIR/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/usr/include
        sudo cp $GITHUB_WORKSPACE/openmp-install/ios-simulator/lib/libomp.a $DEVELOPER_DIR/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/usr/lib

    - name: arm64
      run: |
        mkdir build-arm64 && cd build-arm64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=OS64 -DARCHS="arm64" ..
        cmake --build . -j 4
    - name: simulator-x86_64
      run: |
        mkdir build-simulator-x86_64 && cd build-simulator-x86_64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=SIMULATOR64 -DARCHS="x86_64" ..
        cmake --build . -j 4
    - name: simulator-arm64
      run: |
        mkdir build-simulator-arm64 && cd build-simulator-arm64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=SIMULATORARM64 -DARCHS="arm64" ..
        cmake --build . -j 4


================================================
FILE: .github/workflows/labeler.yml
================================================
name: labeler
on: [pull_request_target]

permissions:
  contents: read
  pull-requests: write

jobs:
  label:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/labeler@v6


================================================
FILE: .github/workflows/linux-aarch64.yml
================================================
name: linux-aarch64
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-aarch64.yml'
    - 'toolchains/aarch64-linux-gnu.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-aarch64.yml'
    - 'toolchains/aarch64-linux-gnu.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'tests/**'
concurrency:
  group: linux-aarch64-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  aarch64-native:
    runs-on: ubuntu-24.04-arm
    steps:
    - uses: actions/checkout@v6

    - name: build
      run: |
        mkdir build && cd build
        cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test
      run: cd build && ctest --output-on-failure -j $(nproc)

    - name: build-noint8
      run: |
        mkdir build-noint8 && cd build-noint8
        cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF ..
        cmake --build . -j $(nproc)
    - name: test-noint8
      run: cd build-noint8 && ctest --output-on-failure -j $(nproc)

    - name: build-simplestl-simplemath
      run: |
        mkdir build-simplestl-simplemath && cd build-simplestl-simplemath 
        cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEMATH=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test-simplestl-simplemath
      run: cd build-simplestl-simplemath && ctest --output-on-failure -j $(nproc)

  asan:
    runs-on: ubuntu-24.04-arm
    steps:
    - uses: actions/checkout@v6
    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_BUILD_TYPE=relwithdebinfo -DNCNN_ASAN=ON -DNCNN_BUILD_TESTS=ON -DNCNN_SHARED_LIB=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test
      run: |
        cd build
        ctest --output-on-failure -j $(nproc)

  aarch64:
    runs-on: ubuntu-24.04
    steps:
    - uses: actions/checkout@v6

    - name: aarch64-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-aarch64-linux-gnu qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)

    - name: test-a53
      run: cd build && TESTS_EXECUTABLE_LOADER=qemu-aarch64-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu;-cpu;cortex-a53" ctest --output-on-failure -j $(nproc)

    - name: test-a55
      run: cd build && TESTS_EXECUTABLE_LOADER=qemu-aarch64-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu;-cpu;cortex-a55" ctest --output-on-failure -j $(nproc)

    - name: test-a72
      run: cd build && TESTS_EXECUTABLE_LOADER=qemu-aarch64-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu;-cpu;cortex-a72" ctest --output-on-failure -j $(nproc)

    - name: test-a76
      run: cd build && TESTS_EXECUTABLE_LOADER=qemu-aarch64-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu;-cpu;cortex-a76" ctest --output-on-failure -j $(nproc)

    - name: test-a710
      run: cd build && TESTS_EXECUTABLE_LOADER=qemu-aarch64-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu;-cpu;cortex-a710" ctest --output-on-failure -j $(nproc)

    - name: test-max
      run: cd build && TESTS_EXECUTABLE_LOADER=qemu-aarch64-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu;-cpu;max" ctest --output-on-failure -j $(nproc)


================================================
FILE: .github/workflows/linux-arm.yml
================================================
name: linux-arm
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-arm.yml'
    - 'toolchains/arm-linux-gnueabi.toolchain.cmake'
    - 'toolchains/arm-linux-gnueabihf.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-arm.yml'
    - 'toolchains/arm-linux-gnueabi.toolchain.cmake'
    - 'toolchains/arm-linux-gnueabihf.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'tests/**'
concurrency:
  group: linux-arm-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  arm:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: arm-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-arm-linux-gnueabi qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-arm-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j $(nproc)

    - name: build-noint8
      run: |
        mkdir build-noint8 && cd build-noint8
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF ..
        cmake --build . -j $(nproc)
    - name: test-noint8
      run: |
        cd build-noint8
        TESTS_EXECUTABLE_LOADER=qemu-arm-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabi" ctest --output-on-failure -j $(nproc)

  armhf:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: arm-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-arm-linux-gnueabihf qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-arm-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j $(nproc)

    - name: build-noint8
      run: |
        mkdir build-noint8 && cd build-noint8
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF ..
        cmake --build . -j $(nproc)
    - name: test-noint8
      run: |
        cd build-noint8
        TESTS_EXECUTABLE_LOADER=qemu-arm-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j $(nproc)

  armhf-vfpv3-d16:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: arm-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-arm-linux-gnueabihf qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf-vfpv3-d16.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-arm-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j $(nproc)

    - name: build-noint8
      run: |
        mkdir build-noint8 && cd build-noint8
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf-vfpv3-d16.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF ..
        cmake --build . -j $(nproc)
    - name: test-noint8
      run: |
        cd build-noint8
        TESTS_EXECUTABLE_LOADER=qemu-arm-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/arm-linux-gnueabihf" ctest --output-on-failure -j $(nproc)


================================================
FILE: .github/workflows/linux-loongarch64.yml
================================================
name: linux-loongarch64
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-loongarch64.yml'
    - 'toolchains/loongarch64-linux-gnu.toolchain.cmake'
    - 'toolchains/loongarch64-unknown-linux-gnu.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/loongarch/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-loongarch64.yml'
    - 'toolchains/loongarch64-linux-gnu.toolchain.cmake'
    - 'toolchains/loongarch64-unknown-linux-gnu.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/loongarch/**'
    - 'tests/**'
concurrency:
  group: linux-loongarch64-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  gcc-loongarch64:
    runs-on: [self-hosted, linux, centos]

    steps:
    - uses: actions/checkout@v6

    # - name: qemu
    #   run: |
    #     sudo apt-get update
    #     sudo apt-get install -y qemu-user-static

    # - name: loongarch64-toolchain
    #   run: |
    #     wget https://github.com/sunhaiyong1978/CLFS-for-LoongArch/releases/download/8.0/loongarch64-clfs-8.0-cross-tools-gcc-full.tar.xz
    #     tar -xf loongarch64-clfs-8.0-cross-tools-gcc-full.tar.xz

    - name: build
      run: |
        export LOONGARCH64_ROOT_PATH=/data/action/osd/cross-tools
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/loongarch64-unknown-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j 4

    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-loongarch64-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/data/action/osd/cross-tools/target" ctest --output-on-failure -j 4


================================================
FILE: .github/workflows/linux-mips.yml
================================================
name: linux-mips
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-mips.yml'
    - 'toolchains/mipsel-linux-gnu.toolchain.cmake'
    - 'toolchains/mipsisa32r6el-linux-gnu.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/mips/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-mips.yml'
    - 'toolchains/mipsel-linux-gnu.toolchain.cmake'
    - 'toolchains/mipsisa32r6el-linux-gnu.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/mips/**'
    - 'tests/**'
concurrency:
  group: linux-mips-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  mipsel:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: mipsel-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-mipsel-linux-gnu qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/mipsel-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)

    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-mipsel-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/mipsel-linux-gnu" ctest --output-on-failure -j $(nproc)

  mipsisa32r6el:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: mipsisa32r6el-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-mipsisa32r6el-linux-gnu qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/mipsisa32r6el-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)

    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-mipsel-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/mipsisa32r6el-linux-gnu" ctest --output-on-failure -j $(nproc)


================================================
FILE: .github/workflows/linux-mips64.yml
================================================
name: linux-mips64
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-mips64.yml'
    - 'toolchains/mips64el-linux-gnuabi64.toolchain.cmake'
    - 'toolchains/mipsisa64r6el-linux-gnuabi64.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/mips/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-mips64.yml'
    - 'toolchains/mips64el-linux-gnuabi64.toolchain.cmake'
    - 'toolchains/mipsisa64r6el-linux-gnuabi64.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/mips/**'
    - 'tests/**'
concurrency:
  group: linux-mips64-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  mips64el:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: mips64el-gnuabi64-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-mips64el-linux-gnuabi64 qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/mips64el-linux-gnuabi64.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)

    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-mips64el-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/mips64el-linux-gnuabi64" ctest --output-on-failure -j $(nproc)

  mipsisa64r6el:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: mipsisa64r6el-gnuabi64-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-mipsisa64r6el-linux-gnuabi64 qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/mipsisa64r6el-linux-gnuabi64.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)

    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-mips64el-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/mipsisa64r6el-linux-gnuabi64" ctest --output-on-failure -j $(nproc)


================================================
FILE: .github/workflows/linux-ppc64.yml
================================================
name: linux-ppc64
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-ppc64.yml'
    - 'toolchains/powerpc64le-linux-gnu.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/*'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-ppc64.yml'
    - 'toolchains/powerpc64le-linux-gnu.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/*'
    - 'tests/**'
concurrency:
  group: linux-ppc64-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  ppc:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: powerpc-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-powerpc-linux-gnu qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/powerpc-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)

    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-ppc-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc-linux-gnu" ctest --output-on-failure -j $(nproc)

  ppc64le:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: powerpc64le-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-powerpc64le-linux-gnu qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/powerpc64le-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)

    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-ppc64le-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j $(nproc)

  power8le-vsx:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: powerpc64le-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-powerpc64le-linux-gnu qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/power8le-linux-gnu-vsx.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)

    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-ppc64le-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j $(nproc)

  power9le-vsx:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: powerpc64le-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-powerpc64le-linux-gnu qemu-user-static

    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/power9le-linux-gnu-vsx.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)

    - name: test
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-ppc64le-static TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu;-cpu;power9_v2.0" ctest --output-on-failure -j $(nproc)


================================================
FILE: .github/workflows/linux-riscv32.yml
================================================
name: linux-riscv32
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-riscv32.yml'
    - 'toolchains/c907-rv32-v310.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/riscv/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-riscv32.yml'
    - 'toolchains/c907-rv32-v310.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/riscv/**'
    - 'tests/**'
concurrency:
  group: linux-riscv32-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  xuantie:
    name: xuantie-${{ matrix.cpu }}
    runs-on: [self-hosted, linux, ubuntu]
    strategy:
      fail-fast: false
      matrix:
        include:
          - { cpu: c907-rv32, QEMU_CPU: c907fdv-rv32,   OPENMP: ON,  RVV: ON,  XTHEADVECTOR: OFF, ZFH: ON, ZVFH: ON  }

    steps:
    - uses: actions/checkout@v6

    - name: build
      run: |
        export RISCV_ROOT_PATH=/data/action/osd/Xuantie-900-gcc-linux-6.6.36-glibc-x86_64-V3.3.0
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/${{ matrix.cpu }}-v310.toolchain.cmake -DCMAKE_BUILD_TYPE=release \
            -DNCNN_OPENMP=${{ matrix.OPENMP }} -DNCNN_THREADS=${{ matrix.OPENMP }} \
            -DNCNN_RUNTIME_CPU=OFF \
            -DNCNN_RVV=${{ matrix.RVV }} \
            -DNCNN_XTHEADVECTOR=${{ matrix.XTHEADVECTOR }} \
            -DNCNN_ZFH=${{ matrix.ZFH }} \
            -DNCNN_ZVFH=${{ matrix.ZVFH }} \
            -DNCNN_SIMPLEOCV=ON -DNCNN_BUILD_EXAMPLES=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j 8

    - name: test
      run: |
        export PATH=/data/action/osd/Xuantie-qemu-x86_64-Ubuntu-20.04-V5.2.8-B20250721-0303/bin:$PATH
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-riscv32 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;${{ matrix.QEMU_CPU }}" ctest --output-on-failure -j 8


================================================
FILE: .github/workflows/linux-riscv64.yml
================================================
name: linux-riscv64
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-riscv64.yml'
    - 'toolchains/riscv64-linux-gnu.toolchain.cmake'
    - 'toolchains/riscv64-unknown-linux-gnu.toolchain.cmake'
    - 'toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake'
    - 'toolchains/c906-v310.toolchain.cmake'
    - 'toolchains/c908-v310.toolchain.cmake'
    - 'toolchains/c910-v310.toolchain.cmake'
    - 'toolchains/k1.toolchain.cmake'
    - 'toolchains/k1.llvm.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/riscv/**'
    - 'tests/**'
    - 'examples/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-riscv64.yml'
    - 'toolchains/riscv64-linux-gnu.toolchain.cmake'
    - 'toolchains/riscv64-unknown-linux-gnu.toolchain.cmake'
    - 'toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake'
    - 'toolchains/c906-v310.toolchain.cmake'
    - 'toolchains/c908-v310.toolchain.cmake'
    - 'toolchains/c910-v310.toolchain.cmake'
    - 'toolchains/k1.toolchain.cmake'
    - 'toolchains/k1.llvm.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/riscv/**'
    - 'tests/**'
    - 'examples/**'
concurrency:
  group: linux-riscv64-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  gcc-riscv64:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: cache-qemu
      id: cache-qemu
      uses: actions/cache@v5
      with:
        path: qemu-install
        key: qemu-riscv64-install-20220502-4
    - name: install-qemu-build-deps
      if: steps.cache-qemu.outputs.cache-hit != 'true'
      run: |
        sudo apt-get update
        sudo apt-get install autoconf automake autotools-dev ninja-build build-essential pkg-config libglib2.0-dev libpixman-1-dev zlib1g-dev python3
    - name: checkout-qemu
      if: steps.cache-qemu.outputs.cache-hit != 'true'
      uses: actions/checkout@v6
      with:
        repository: qemu/qemu
        path: qemu
        ref: f5643914a9e8f79c606a76e6a9d7ea82a3fc3e65
    - name: qemu
      if: steps.cache-qemu.outputs.cache-hit != 'true'
      run: |
        cd qemu
        wget https://raw.githubusercontent.com/nihui/ncnn-assets/master/qemu-patches/0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
        patch -p1 -i 0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
        ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system
        make -j$(nproc)
        make install

    - name: riscv64-gnu-toolchain
      run: |
        sudo apt-get update
        sudo apt-get install g++-riscv64-linux-gnu

    - name: configure
      run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv64-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
    - name: build
      run: cmake --build build -j $(nproc)

    - name: test
      run: |
        export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/riscv64-linux-gnu" ctest --output-on-failure -j $(nproc)

  xuantie:
    name: xuantie-${{ matrix.cpu }}
    runs-on: [self-hosted, linux, ubuntu]
    strategy:
      fail-fast: false
      matrix:
        include:
          - { cpu: c906, QEMU_CPU: c906fdv, OPENMP: OFF, RVV: OFF, XTHEADVECTOR: ON,  ZFH: ON, ZVFH: OFF }
          - { cpu: c910, QEMU_CPU: c910v,   OPENMP: ON,  RVV: OFF, XTHEADVECTOR: ON,  ZFH: ON, ZVFH: OFF }
          - { cpu: c908, QEMU_CPU: c908v,   OPENMP: ON,  RVV: ON,  XTHEADVECTOR: OFF, ZFH: ON, ZVFH: ON  }
          - { cpu: c907, QEMU_CPU: c907fdv-rv64,   OPENMP: ON,  RVV: ON,  XTHEADVECTOR: OFF, ZFH: ON, ZVFH: ON  }

    steps:
    - uses: actions/checkout@v6

    - name: build
      run: |
        export RISCV_ROOT_PATH=/data/action/osd/Xuantie-900-gcc-linux-6.6.36-glibc-x86_64-V3.3.0
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/${{ matrix.cpu }}-v310.toolchain.cmake -DCMAKE_BUILD_TYPE=release \
            -DNCNN_OPENMP=${{ matrix.OPENMP }} -DNCNN_THREADS=${{ matrix.OPENMP }} \
            -DNCNN_RUNTIME_CPU=OFF \
            -DNCNN_RVV=${{ matrix.RVV }} \
            -DNCNN_XTHEADVECTOR=${{ matrix.XTHEADVECTOR }} \
            -DNCNN_ZFH=${{ matrix.ZFH }} \
            -DNCNN_ZVFH=${{ matrix.ZVFH }} \
            -DNCNN_SIMPLEOCV=ON -DNCNN_BUILD_EXAMPLES=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j 8

    - name: test
      run: |
        export PATH=/data/action/osd/Xuantie-qemu-x86_64-Ubuntu-20.04-V5.2.8-B20250721-0303/bin:$PATH
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;${{ matrix.QEMU_CPU }}" ctest --output-on-failure -j 8

  spacemit:
    name: spacemit-${{ matrix.cpu }}
    runs-on: [self-hosted, linux, ubuntu]
    strategy:
      fail-fast: false
      matrix:
        include:
          - { cpu: x60, QEMU_CPU: "max,vlen=256,elen=64,vext_spec=v1.0", OPENMP: ON, RVV: ON, XTHEADVECTOR: OFF, ZFH: ON, ZVFH: ON }

    steps:
    - uses: actions/checkout@v6

    # https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v1.1.2.tar.xz
    - name: build-gcc
      run: |
        export RISCV_ROOT_PATH=/data/action/osd/spacemit-toolchain-linux-glibc-x86_64-v1.1.2
        mkdir build-gcc && cd build-gcc
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/k1.toolchain.cmake -DCMAKE_BUILD_TYPE=release \
            -DNCNN_OPENMP=${{ matrix.OPENMP }} -DNCNN_THREADS=${{ matrix.OPENMP }} \
            -DNCNN_RUNTIME_CPU=OFF \
            -DNCNN_RVV=${{ matrix.RVV }} \
            -DNCNN_XTHEADVECTOR=${{ matrix.XTHEADVECTOR }} \
            -DNCNN_ZFH=${{ matrix.ZFH }} \
            -DNCNN_ZVFH=${{ matrix.ZVFH }} \
            -DNCNN_SIMPLEOCV=ON -DNCNN_BUILD_EXAMPLES=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j 8

    - name: build-llvm
      run: |
        export RISCV_ROOT_PATH=/data/action/osd/spacemit-toolchain-linux-glibc-x86_64-v1.1.2
        mkdir build-llvm && cd build-llvm
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/k1.llvm.toolchain.cmake -DCMAKE_BUILD_TYPE=release \
            -DNCNN_OPENMP=${{ matrix.OPENMP }} -DNCNN_THREADS=${{ matrix.OPENMP }} \
            -DNCNN_RUNTIME_CPU=OFF \
            -DNCNN_RVV=${{ matrix.RVV }} \
            -DNCNN_XTHEADVECTOR=${{ matrix.XTHEADVECTOR }} \
            -DNCNN_ZFH=${{ matrix.ZFH }} \
            -DNCNN_ZVFH=${{ matrix.ZVFH }} \
            -DNCNN_SIMPLEOCV=ON -DNCNN_BUILD_EXAMPLES=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j 8

    # https://archive.spacemit.com/spacemit-ai/qemu/jdsk-qemu-v0.0.14.tar.gz
    - name: test-gcc
      run: |
        export RISCV_ROOT_PATH=/data/action/osd/spacemit-toolchain-linux-glibc-x86_64-v1.1.2
        export PATH=/data/action/osd/jdsk-qemu/bin:$PATH
        cd build-gcc
        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;${{ matrix.QEMU_CPU }};-L;${RISCV_ROOT_PATH}/sysroot" ctest --output-on-failure -j 8

    - name: test-llvm
      run: |
        export RISCV_ROOT_PATH=/data/action/osd/spacemit-toolchain-linux-glibc-x86_64-v1.1.2
        export PATH=/data/action/osd/jdsk-qemu/bin:$PATH
        cd build-llvm
        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;${{ matrix.QEMU_CPU }};-L;${RISCV_ROOT_PATH}/sysroot" ctest --output-on-failure -j 8

  gcc-rvv:
    runs-on: [self-hosted, linux, ubuntu]
    steps:
    - uses: actions/checkout@v6

    #- name: cache-qemu
      #id: cache-qemu
      #uses: actions/cache@v5
      #with:
        #path: qemu-install
        #key: qemu-riscv64-install-20241202
    #- name: install-qemu-build-deps
      #if: steps.cache-qemu.outputs.cache-hit != 'true'
      #run: |
        #sudo apt-get update
        #sudo apt-get install autoconf automake autotools-dev ninja-build
    #- name: checkout-qemu
      #if: steps.cache-qemu.outputs.cache-hit != 'true'
      #uses: actions/checkout@v6
      #with:
        #repository: qemu/qemu
        #path: qemu
        #ref: 72b88908d12ee9347d13539c7dd9a252625158d1
    #- name: qemu
      #if: steps.cache-qemu.outputs.cache-hit != 'true'
      #run: |
        #cd qemu
        #./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system
        #make -j4
        #make install

    #- name: cache-riscv
      #id: cache-riscv
      #uses: actions/cache@v5
      #with:
        #path: riscv-install
        #key: riscv-linux-install-20241202

    #- name: install-riscv-build-deps
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #run: |
        #sudo apt-get update
        #sudo apt-get install autoconf automake autotools-dev curl python3 libmpc-dev libmpfr-dev libgmp-dev gawk build-essential bison flex texinfo gperf libtool patchutils bc zlib1g-dev libexpat-dev device-tree-compiler

    #- name: checkout-riscv-gnu-toolchain
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #uses: actions/checkout@v6
      #with:
        #repository: riscv-collab/riscv-gnu-toolchain
        #path: riscv-gnu-toolchain
        #ref: 20f615317e2ce888dfc11b29ccde4a649494b654
    #- name: checkout-riscv-gnu-toolchain-submodules
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #run: |
        #cd riscv-gnu-toolchain
        #git submodule update --init --recursive --depth 1 glibc
        #git submodule update --init --recursive --depth 1 newlib
        #git submodule update --init --recursive --depth 1 riscv-binutils
        #git submodule update --init --recursive --depth 1 riscv-gcc
        #git submodule update --init --recursive --depth 1 riscv-dejagnu
        #git submodule update --init --recursive --depth 1 riscv-gdb
    #- name: riscv-gnu-toolchain
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #run: |
        #cd riscv-gnu-toolchain
        #./configure --prefix=$GITHUB_WORKSPACE/riscv
        #make linux -j4

    #- name: riscv-strip-install
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #run: find $GITHUB_WORKSPACE/riscv -type f | xargs -i strip -g {} || true

    - name: configure
      run: export RISCV_ROOT_PATH=/data/action/osd/riscv && mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv64-unknown-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
    - name: build
      run: cmake --build build -j 8

    - name: test-vlen256
      run: |
        export PATH=/data/action/osd/qemu-install/bin:$PATH
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,zfh=true,zvfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;/data/action/osd/riscv/sysroot" ctest --output-on-failure -j 8

    - name: test-vlen128
      run: |
        export PATH=/data/action/osd/qemu-install/bin:$PATH
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,zfh=true,zvfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;/data/action/osd/riscv/sysroot" ctest --output-on-failure -j 8

  clang-rvv:
    runs-on: [self-hosted, linux, ubuntu]
    steps:
    - uses: actions/checkout@v6

    #- name: cache-qemu
      #id: cache-qemu
      #uses: actions/cache@v5
      #with:
        #path: qemu-install
        #key: qemu-riscv64-install-20241202
    #- name: install-qemu-build-deps
      #if: steps.cache-qemu.outputs.cache-hit != 'true'
      #run: |
        #sudo apt-get update
        #sudo apt-get install autoconf automake autotools-dev ninja-build
    #- name: checkout-qemu
      #if: steps.cache-qemu.outputs.cache-hit != 'true'
      #uses: actions/checkout@v6
      #with:
        #repository: qemu/qemu
        #path: qemu
        #ref: 72b88908d12ee9347d13539c7dd9a252625158d1
    #- name: qemu
      #if: steps.cache-qemu.outputs.cache-hit != 'true'
      #run: |
        #cd qemu
        #./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system
        #make -j4
        #make install

    #- name: cache-riscv
      #id: cache-riscv
      #uses: actions/cache@v5
      #with:
        #path: riscv-install
        #key: riscv-linux-install-20241202

    #- name: install-riscv-build-deps
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #run: |
        #sudo apt-get update
        #sudo apt-get install autoconf automake autotools-dev curl python3 libmpc-dev libmpfr-dev libgmp-dev gawk build-essential bison flex texinfo gperf libtool patchutils bc zlib1g-dev libexpat-dev device-tree-compiler

    #- name: checkout-riscv-gnu-toolchain
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #uses: actions/checkout@v6
      #with:
        #repository: riscv-collab/riscv-gnu-toolchain
        #path: riscv-gnu-toolchain
        #ref: 20f615317e2ce888dfc11b29ccde4a649494b654
    #- name: checkout-riscv-gnu-toolchain-submodules
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #run: |
        #cd riscv-gnu-toolchain
        #git submodule update --init --recursive --depth 1 glibc
        #git submodule update --init --recursive --depth 1 newlib
        #git submodule update --init --recursive --depth 1 riscv-binutils
        #git submodule update --init --recursive --depth 1 riscv-gcc
        #git submodule update --init --recursive --depth 1 riscv-dejagnu
        #git submodule update --init --recursive --depth 1 riscv-gdb
    #- name: riscv-gnu-toolchain
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #run: |
        #cd riscv-gnu-toolchain
        #./configure --prefix=$GITHUB_WORKSPACE/riscv
        #make linux -j4

    #- name: riscv-strip-install
      #if: steps.cache-riscv.outputs.cache-hit != 'true'
      #run: find $GITHUB_WORKSPACE/riscv -type f | xargs -i strip -g {} || true

    # - name: install-clang
    #   run: |
    #     wget https://github.com/llvm/llvm-project/releases/download/llvmorg-19.1.4/llvm-project-19.1.4.src.tar.xz
    #     tar -xf llvm-project-19.1.4.src.tar.xz
    #     cd llvm-project-19.1.4.src
    #     mkdir build
    #     cd build
    #     cmake -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/riscv -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DLLVM_ENABLE_PROJECTS="clang" -DLLVM_TARGETS_TO_BUILD="RISCV" -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF ../llvm/
    #     make -j16
    #     make install

    - name: build
      run: |
        export RISCV_ROOT_PATH=/data/action/osd/riscv
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j 8

    - name: test-vlen256
      run: |
        export PATH=/data/action/osd/qemu-install/bin:$PATH
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,zfh=true,zvfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;/data/action/osd/riscv/sysroot" ctest --output-on-failure -j 8

    - name: test-vlen128
      run: |
        export PATH=/data/action/osd/qemu-install/bin:$PATH
        cd build
        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,zfh=true,zvfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;/data/action/osd/riscv/sysroot" ctest --output-on-failure -j 8


================================================
FILE: .github/workflows/linux-x64-cpu-clang.yml
================================================
name: linux-x64-cpu-clang
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-cpu-clang.yml'
    - 'toolchains/host-c.clang.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-cpu-clang.yml'
    - 'toolchains/host-c.clang.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
concurrency:
  group: linux-x64-cpu-clang-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  linux-clang:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6
    - name: update
      run: sudo apt-get update
    - name: protobuf
      run: sudo apt-get install libprotobuf-dev protobuf-compiler libopencv-dev
    - name: build-sse2
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-sse2 && cd build-sse2
        cmake -DNCNN_AVX=OFF -DNCNN_AVX2=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-sse2
      run: cd build-sse2 && ctest --output-on-failure -j $(nproc)
    - name: build-shared
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-shared && cd build-shared
        cmake -DNCNN_AVX2=ON -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: build-avx2
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-avx2 && cd build-avx2
        cmake -DNCNN_AVX2=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-avx2
      run: cd build-avx2 && ctest --output-on-failure -j $(nproc)
    - name: build-avx
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-avx && cd build-avx
        cmake -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-avx
      run: cd build-avx && ctest --output-on-failure -j $(nproc)
    - name: build-avx1-2
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-avx1-2 && cd build-avx1-2
        cmake -DNCNN_AVX2=ON -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-avx1-2
      run: cd build-avx1-2 && ctest --output-on-failure -j $(nproc)
    - name: build-noint8
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-noint8 && cd build-noint8
        cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-noint8
      run: cd build-noint8 && ctest --output-on-failure -j $(nproc)

  linux-clang-simplestl:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6
    - name: build-simplestl
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-simplestl && cd build-simplestl
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.clang.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test-simplestl
      run: cd build-simplestl && ctest --output-on-failure -j $(nproc)
    - name: build-simplestl-simpleomp
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-simplestl-simpleomp && cd build-simplestl-simpleomp
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.clang.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEOMP=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test-simplestl-simpleomp
      run: cd build-simplestl-simpleomp && ctest --output-on-failure -j $(nproc)


================================================
FILE: .github/workflows/linux-x64-cpu-gcc-musl.yml
================================================
name: linux-x64-cpu-gcc-musl
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-cpu-gcc-musl.yml'
    - 'toolchains/host-c.gcc.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-cpu-gcc-musl.yml'
    - 'toolchains/host-c.gcc.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
concurrency:
  group: linux-x64-cpu-gcc-musl-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  linux-gcc-musl:
    runs-on: ubuntu-latest
    steps:
    - uses: jirutka/setup-alpine@v1
      with:
        packages: >
          cmake
          clang
          clang-dev
          make
          gcc
          g++
          libc-dev
          linux-headers

    - uses: actions/checkout@v6
    - name: build
      shell: alpine.sh {0}
      run: |
        mkdir build && cd build
        cmake -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test
      shell: alpine.sh {0}
      run: cd build && ctest --output-on-failure -j $(nproc)
    - name: build-shared
      run: |
        mkdir build-shared && cd build-shared
        cmake -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)


================================================
FILE: .github/workflows/linux-x64-cpu-gcc.yml
================================================
name: linux-x64-cpu-gcc
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-cpu-gcc.yml'
    - 'toolchains/host-c.gcc.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-cpu-gcc.yml'
    - 'toolchains/host-c.gcc.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
concurrency:
  group: linux-x64-cpu-gcc-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  linux-gcc:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6
    - name: update
      run: sudo apt-get update
    - name: protobuf
      run: sudo apt-get install libprotobuf-dev protobuf-compiler libopencv-dev
    - name: build-sse2
      run: |
        mkdir build-sse2 && cd build-sse2
        cmake -DNCNN_AVX=OFF -DNCNN_AVX2=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-sse2
      run: cd build-sse2 && ctest --output-on-failure -j $(nproc)
    - name: build-shared
      run: |
        mkdir build-shared && cd build-shared
        cmake -DNCNN_AVX2=ON -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: build-avx2
      run: |
        mkdir build-avx2 && cd build-avx2
        cmake -DNCNN_AVX2=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-avx2
      run: cd build-avx2 && ctest --output-on-failure -j $(nproc)
    - name: build-avx
      run: |
        mkdir build-avx && cd build-avx
        cmake -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-avx
      run: cd build-avx && ctest --output-on-failure -j $(nproc)
    - name: build-avx1-2
      run: |
        mkdir build-avx1-2 && cd build-avx1-2
        cmake -DNCNN_AVX2=ON -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-avx1-2
      run: cd build-avx1-2 && ctest --output-on-failure -j $(nproc)
    - name: build-noint8
      run: |
        mkdir build-noint8 && cd build-noint8
        cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-noint8
      run: cd build-noint8 && ctest --output-on-failure -j $(nproc)

  asan:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6
    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_BUILD_TYPE=relwithdebinfo -DNCNN_ASAN=ON -DNCNN_BUILD_TESTS=ON -DNCNN_SHARED_LIB=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test
      run: |
        cd build
        ctest --output-on-failure -j $(nproc)

  linux-gcc-cpp03-nostdio-nostring-simplestl:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6
    - name: build-nostdio
      run: |
        mkdir build-nostdio && cd build-nostdio
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-c++03.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test-nostdio
      run: cd build-nostdio && ctest --output-on-failure -j $(nproc)
    - name: build-nostdio-nostring
      run: |
        mkdir build-nostdio-nostring && cd build-nostdio-nostring
        cmake -DNCNN_STDIO=OFF -DNCNN_STRING=OFF -DNCNN_BUILD_TESTS=OFF -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: build-simplestl
      run: |
        mkdir build-simplestl && cd build-simplestl
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test-simplestl
      run: cd build-simplestl && ctest --output-on-failure -j $(nproc)
    - name: build-simplestl-simpleomp
      run: |
        mkdir build-simplestl-simpleomp && cd build-simplestl-simpleomp
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEOMP=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test-simplestl-simpleomp
      run: cd build-simplestl-simpleomp && ctest --output-on-failure -j $(nproc)

  linux-gcc-avx512:
    runs-on: [self-hosted, linux, t4]
    steps:
    - uses: actions/checkout@v6
    - name: build
      env:
        CC: gcc
        CXX: g++
        LD_LIBRARY_PATH: /data/action/install/lib64
      run: |
        mkdir build && cd build
        cmake -DNCNN_AVX2=ON -DNCNN_AVX512=ON -DNCNN_AVX512VNNI=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j 4
    - name: test
      env:
        LD_LIBRARY_PATH: /data/action/install/lib64
      run: cd build && ctest --output-on-failure -j 4


================================================
FILE: .github/workflows/linux-x64-gpu-clang.yml
================================================
name: linux-x64-gpu-clang
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-gpu-clang.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
    - 'glslang'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-gpu-clang.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
    - 'glslang'
concurrency:
  group: linux-x64-gpu-clang-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  linux-clang-gpu:
    runs-on: [self-hosted, linux, ubuntu25]
    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: cache-swiftshader
      id: cache-swiftshader
      uses: actions/cache@v5
      with:
        path: swiftshader-install
        key: swiftshader-linux-install-20250508
    - name: checkout-swiftshader
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      uses: actions/checkout@v6
      with:
        repository: google/swiftshader
        path: swiftshader
        ref: 930d46d31b5d637f313fd5ef55da2bbf053c26c1
    - name: swiftshader
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
        cd swiftshader
        git -c submodule."third_party/git-hooks".update=none submodule update --init --recursive
        mkdir -p build; cd build
        cmake -DCMAKE_INSTALL_PREFIX=install -DSWIFTSHADER_BUILD_PVR=FALSE -DSWIFTSHADER_BUILD_TESTS=FALSE -DSWIFTSHADER_ENABLE_ASTC=FALSE -DSWIFTSHADER_WARNINGS_AS_ERRORS=FALSE -DREACTOR_BACKEND=Subzero -DREACTOR_DEFAULT_OPT_LEVEL=Default -DCMAKE_BUILD_TYPE=Release ..
        cmake --build . -j 8
        mkdir $GITHUB_WORKSPACE/swiftshader-install
        cp Linux/* $GITHUB_WORKSPACE/swiftshader-install

    - name: build
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build && cd build
        cmake -DNCNN_VULKAN=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j 8
    - name: test
      run: |
        printf "[Processor]\nThreadCount=1\n" > build/tests/SwiftShader.ini
        export VK_ICD_FILENAMES="$GITHUB_WORKSPACE/swiftshader-install/vk_swiftshader_icd.json"
        cd build && ctest --output-on-failure -j 8
    - name: build-shared
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-shared && cd build-shared
        cmake -DNCNN_VULKAN=ON -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j 8


================================================
FILE: .github/workflows/linux-x64-gpu-gcc.yml
================================================
name: linux-x64-gpu-gcc
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-gpu-gcc.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
    - 'glslang'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-gpu-gcc.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
    - 'glslang'
concurrency:
  group: linux-x64-gpu-gcc-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  linux-gcc-gpu:
    runs-on: [self-hosted, linux, ubuntu25]
    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: cache-swiftshader
      id: cache-swiftshader
      uses: actions/cache@v5
      with:
        path: swiftshader-install
        key: swiftshader-linux-install-20250508
    - name: checkout-swiftshader
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      uses: actions/checkout@v6
      with:
        repository: google/swiftshader
        path: swiftshader
        ref: 930d46d31b5d637f313fd5ef55da2bbf053c26c1
    - name: swiftshader
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
        cd swiftshader
        git -c submodule."third_party/git-hooks".update=none submodule update --init --recursive
        mkdir -p build; cd build
        cmake -DCMAKE_INSTALL_PREFIX=install -DSWIFTSHADER_BUILD_PVR=FALSE -DSWIFTSHADER_BUILD_TESTS=FALSE -DSWIFTSHADER_ENABLE_ASTC=FALSE -DSWIFTSHADER_WARNINGS_AS_ERRORS=FALSE -DREACTOR_BACKEND=Subzero -DREACTOR_DEFAULT_OPT_LEVEL=Default -DCMAKE_BUILD_TYPE=Release ..
        cmake --build . -j 8
        mkdir $GITHUB_WORKSPACE/swiftshader-install
        cp Linux/* $GITHUB_WORKSPACE/swiftshader-install

    - name: build
      run: |
        mkdir build && cd build
        cmake -DNCNN_VULKAN=ON -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j 8
    - name: test
      run: |
        printf "[Processor]\nThreadCount=1\n" > build/tests/SwiftShader.ini
        export VK_ICD_FILENAMES="$GITHUB_WORKSPACE/swiftshader-install/vk_swiftshader_icd.json"
        cd build && ctest --output-on-failure -j 8
    - name: build-shared
      run: |
        mkdir build-shared && cd build-shared
        cmake -DNCNN_VULKAN=ON -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j 8

  linux-gcc-gpu-system-glslang:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6

    - name: install-deps
      run: |
        sudo apt-get update
        sudo apt-get install libprotobuf-dev protobuf-compiler libopencv-dev libvulkan-dev glslang-dev glslang-tools spirv-tools

    - name: build
      run: |
        mkdir build && cd build
        cmake -DNCNN_VULKAN=ON -DNCNN_SYSTEM_GLSLANG=ON -DGLSLANG_TARGET_DIR=/usr/lib/x86_64-linux-gnu/cmake ..
        cmake --build . -j $(nproc)
    - name: build-shared
      run: |
        mkdir build-shared && cd build-shared
        cmake -DNCNN_VULKAN=ON -DNCNN_SYSTEM_GLSLANG=ON -DGLSLANG_TARGET_DIR=/usr/lib/x86_64-linux-gnu/cmake -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)

  linux-gcc-gpu-t4:
    runs-on: [self-hosted, linux, t4]
    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true
    - name: build
      env:
        CC: gcc
        CXX: g++
        LD_LIBRARY_PATH: /data/action/install/lib64
      run: |
        mkdir build && cd build
        cmake -DNCNN_VULKAN=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j 4
    - name: test
      env:
        LD_LIBRARY_PATH: /data/action/install/lib64
      run: |
        cd build && ctest --output-on-failure -j 4


================================================
FILE: .github/workflows/linux-x64-sde.yml
================================================
name: linux-x64-sde
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-sde.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-x64-sde.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
concurrency:
  group: linux-x64-sde-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  gcc-sde:
    runs-on: ubuntu-24.04
    steps:
    - uses: actions/checkout@v6
    - name: update
      run: sudo apt-get update
    - name: gcc14
      run: sudo apt-get install gcc-14 g++-14
    - name: Setup SDE binaries
      uses: petarpetrovt/setup-sde@v3.0
    - name: build
      env:
        CC: gcc-14
        CXX: g++-14
      run: |
        mkdir build && cd build
        cmake -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j $(nproc)
    - name: test-p4p
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=$SDE_PATH/sde64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-p4p;--" ctest --output-on-failure -j $(nproc)
    - name: test-snb
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=$SDE_PATH/sde64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-snb;--" ctest --output-on-failure -j $(nproc)
    - name: test-hsw
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=$SDE_PATH/sde64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-hsw;--" ctest --output-on-failure -j $(nproc)
    - name: test-adl
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=$SDE_PATH/sde64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-adl;--" ctest --output-on-failure -j $(nproc)
    - name: test-arl
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=$SDE_PATH/sde64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-arl;--" ctest --output-on-failure -j $(nproc)
    - name: test-skx
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=$SDE_PATH/sde64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-skx;--" ctest --output-on-failure -j $(nproc)
    - name: test-spr
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=$SDE_PATH/sde64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-spr;--" ctest --output-on-failure -j $(nproc)
    - name: test-gnr
      run: |
        cd build
        TESTS_EXECUTABLE_LOADER=$SDE_PATH/sde64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-gnr;--" ctest --output-on-failure -j $(nproc)


================================================
FILE: .github/workflows/linux-x86-cpu-clang.yml
================================================
name: linux-x86-cpu-clang
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-x86-cpu-clang.yml'
    - 'toolchains/host.clang-m32.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-x86-cpu-clang.yml'
    - 'toolchains/host.clang-m32.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
concurrency:
  group: linux-x86-cpu-clang-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  linux-clang:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6
    - name: update
      run: sudo apt-get update
    - name: gcc-multilib
      run: sudo apt-get install gcc-multilib g++-multilib
    - name: build
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test
      run: cd build && ctest --output-on-failure -j $(nproc)
    - name: build-shared
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-shared && cd build-shared
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: build-noint8
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build-noint8 && cd build-noint8
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.clang-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF ..
        cmake --build . -j $(nproc)
    - name: test-noint8
      run: cd build-noint8 && ctest --output-on-failure -j $(nproc)


================================================
FILE: .github/workflows/linux-x86-cpu-gcc.yml
================================================
name: linux-x86-cpu-gcc
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/linux-x86-cpu-gcc.yml'
    - 'toolchains/host.gcc-m32.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/linux-x86-cpu-gcc.yml'
    - 'toolchains/host.gcc-m32.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'tests/**'
concurrency:
  group: linux-x86-cpu-gcc-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

jobs:
  linux-gcc:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6
    - name: update
      run: sudo apt-get update
    - name: gcc-multilib
      run: sudo apt-get install gcc-multilib g++-multilib
    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test
      run: cd build && ctest --output-on-failure -j $(nproc)
    - name: build-nosse
      run: |
        mkdir build-nosse && cd build-nosse
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=OFF -DNCNN_AVX=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: test-nosse
      run: cd build-nosse && ctest --output-on-failure -j $(nproc)
    - name: build-shared
      run: |
        mkdir build-shared && cd build-shared
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j $(nproc)
    - name: build-noint8
      run: |
        mkdir build-noint8 && cd build-noint8
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-m32.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_INT8=OFF ..
        cmake --build . -j $(nproc)
    - name: test-noint8
      run: cd build-noint8 && ctest --output-on-failure -j $(nproc)


================================================
FILE: .github/workflows/mac-catalyst.yml
================================================
name: mac-catalyst
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/mac-catalyst.yml'
    - 'toolchains/ios.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'glslang'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/mac-catalyst.yml'
    - 'toolchains/ios.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'glslang'
concurrency:
  group: mac-catalyst-${{ github.ref }}
  cancel-in-progress: true
env:
  DEVELOPER_DIR: /Applications/Xcode_16.4.0.app/Contents/Developer
  MAC_CATALYST_DEPLOYMENT_TARGET: '13.1'
  ENABLE_BITCODE: OFF
  ENABLE_ARC: OFF
  ENABLE_VISIBILITY: OFF
permissions:
  contents: read

jobs:
  build:
    runs-on: macos-15-intel
    env:
      OPENMP_VERSION: '18.1.2'
      OPENMP_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$MAC_CATALYST_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DLIBOMP_ENABLE_SHARED=OFF \
        -DLIBOMP_OMPT_SUPPORT=OFF \
        -DLIBOMP_USE_HWLOC=OFF \

      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$MAC_CATALYST_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
        -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
        -DOpenMP_libomp_LIBRARY="libomp.a" \
        -DNCNN_VULKAN=ON \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: cache-openmp
      id: cache-openmp
      uses: actions/cache@v5
      with:
        path: openmp-install
        key: openmp-mac-catalyst-install-20251004
    - name: openmp
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        mv cmake-${{ env.OPENMP_VERSION }}.src/Modules/* openmp-${{ env.OPENMP_VERSION }}.src/cmake/
        cd openmp-${{ env.OPENMP_VERSION }}.src
        wget https://github.com/nihui/llvm-project/commit/ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        patch -p2 -i ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        wget https://github.com/nihui/llvm-project/commit/5c12711f9a21f41bea70566bf15a4026804d6b20.patch
        patch -p2 -i 5c12711f9a21f41bea70566bf15a4026804d6b20.patch
    - name: openmp-x86_64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-x86_64 && cd build-x86_64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC_CATALYST -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install
    - name: openmp-arm64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-arm64 && cd build-arm64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC_CATALYST_ARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install
    - name: openmp-merge-fat-library
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        mkdir -p $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install/mac-catalyst

        cp -a openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/include $GITHUB_WORKSPACE/openmp-install/mac-catalyst
        mkdir -p $GITHUB_WORKSPACE/openmp-install/mac-catalyst/lib
        lipo -create \
            openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/lib/libomp.a \
            openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/lib/libomp.a \
            -o $GITHUB_WORKSPACE/openmp-install/mac-catalyst/lib/libomp.a

    - name: install-openmp
      run: |
        sudo cp $GITHUB_WORKSPACE/openmp-install/mac-catalyst/include/* $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include
        sudo cp $GITHUB_WORKSPACE/openmp-install/mac-catalyst/lib/libomp.a $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/lib

    - name: x86_64
      run: |
        mkdir build-x86_64 && cd build-x86_64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC_CATALYST -DARCHS="x86_64" ..
        cmake --build . -j 4
    - name: arm64
      run: |
        mkdir build-arm64 && cd build-arm64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC_CATALYST_ARM64 -DARCHS="arm64" ..
        cmake --build . -j 4


================================================
FILE: .github/workflows/macos.yml
================================================
name: macos
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/macos.yml'
    - 'toolchains/ios.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
    - 'glslang'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/macos.yml'
    - 'toolchains/ios.toolchain.cmake'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/arm/**'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'tests/**'
    - 'tools/**'
    - '!tools/pnnx/**'
    - 'examples/**'
    - 'glslang'
concurrency:
  group: macos-${{ github.ref }}
  cancel-in-progress: true
env:
  DEVELOPER_DIR: /Applications/Xcode_16.4.0.app/Contents/Developer
  MAC_DEPLOYMENT_TARGET: '11.0'
  ENABLE_BITCODE: OFF
  ENABLE_ARC: OFF
  ENABLE_VISIBILITY: OFF
permissions:
  contents: read

jobs:
  build:
    runs-on: macos-15-intel
    env:
      OPENMP_VERSION: '18.1.2'
      OPENMP_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DLIBOMP_ENABLE_SHARED=OFF \
        -DLIBOMP_OMPT_SUPPORT=OFF \
        -DLIBOMP_USE_HWLOC=OFF \

      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
        -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
        -DOpenMP_libomp_LIBRARY="libomp.a" \
        -DNCNN_VULKAN=ON \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: cache-openmp
      id: cache-openmp
      uses: actions/cache@v5
      with:
        path: openmp-install
        key: openmp-macos-install-20251004
    - name: openmp
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        mv cmake-${{ env.OPENMP_VERSION }}.src/Modules/* openmp-${{ env.OPENMP_VERSION }}.src/cmake/
        cd openmp-${{ env.OPENMP_VERSION }}.src
        wget https://github.com/nihui/llvm-project/commit/ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        patch -p2 -i ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        wget https://github.com/nihui/llvm-project/commit/5c12711f9a21f41bea70566bf15a4026804d6b20.patch
        patch -p2 -i 5c12711f9a21f41bea70566bf15a4026804d6b20.patch
    - name: openmp-x86_64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-x86_64 && cd build-x86_64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install
    - name: openmp-arm64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-arm64 && cd build-arm64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC_ARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install
    - name: openmp-merge-fat-library
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        mkdir -p $GITHUB_WORKSPACE/openmp-install
        cp -a openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/include $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install/lib
        lipo -create \
            openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/lib/libomp.a \
            openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/lib/libomp.a \
            -o $GITHUB_WORKSPACE/openmp-install/lib/libomp.a

    - name: install-openmp
      run: |
        sudo cp $GITHUB_WORKSPACE/openmp-install/include/* $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include
        sudo cp $GITHUB_WORKSPACE/openmp-install/lib/libomp.a $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/lib

    - name: cache-swiftshader
      id: cache-swiftshader
      uses: actions/cache@v5
      with:
        path: swiftshader-install
        key: swiftshader-macos-install-20251004
    - name: checkout-swiftshader
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      uses: actions/checkout@v6
      with:
        repository: google/swiftshader
        path: swiftshader
        ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
    - name: checkout-swiftshader-submodules
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
        cd swiftshader
        git -c submodule."third_party/git-hooks".update=none submodule update --init --recursive
    - name: swiftshader
      if: steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
        cd swiftshader
        mkdir -p build; cd build
        cmake -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_INSTALL_PREFIX=install -DSWIFTSHADER_BUILD_EGL=FALSE -DSWIFTSHADER_BUILD_GLESv2=FALSE -DSWIFTSHADER_BUILD_GLES_CM=FALSE -DSWIFTSHADER_BUILD_VULKAN=TRUE -DSWIFTSHADER_BUILD_PVR=FALSE -DSWIFTSHADER_BUILD_TESTS=FALSE -DSWIFTSHADER_ENABLE_ASTC=FALSE -DSWIFTSHADER_WARNINGS_AS_ERRORS=FALSE -DREACTOR_BACKEND=Subzero -DREACTOR_DEFAULT_OPT_LEVEL=Default -DCMAKE_BUILD_TYPE=Release ..
        cmake --build . -j 4
        mkdir $GITHUB_WORKSPACE/swiftshader-install
        cp Darwin/* $GITHUB_WORKSPACE/swiftshader-install

    - name: arm64
      run: |
        mkdir build-arm64 && cd build-arm64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC_ARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
    - name: x86_64
      run: |
        mkdir build-x86_64 && cd build-x86_64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC -DARCHS="x86_64" -DNCNN_BUILD_TESTS=ON ..
        cmake --build . -j 4

    - name: arm64-shared
      run: |
        mkdir build-arm64-shared && cd build-arm64-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC_ARM64 -DARCHS="arm64" -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j 4
    - name: x86_64-shared
      run: |
        mkdir build-x86_64-shared && cd build-x86_64-shared
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC -DARCHS="x86_64" -DNCNN_SHARED_LIB=ON ..
        cmake --build . -j 4

    - name: x86_64-test
      run: |
        printf "[Processor]\nThreadCount=1\n" > build-x86_64/tests/SwiftShader.ini
        export VK_ICD_FILENAMES="$GITHUB_WORKSPACE/swiftshader-install/vk_swiftshader_icd.json"
        cd build-x86_64 && ctest --output-on-failure -j 4


================================================
FILE: .github/workflows/pnnx.yml
================================================
name: pnnx
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/pnnx.yml'
    - 'src/layer/*'
    - 'tools/pnnx/**'
    - '!tools/pnnx/README.md'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/pnnx.yml'
    - 'src/layer/*'
    - 'tools/pnnx/**'
    - '!tools/pnnx/README.md'
concurrency:
  group: pnnx-${{ github.ref }}
  cancel-in-progress: true
permissions:
  contents: read

env:
  LIBTORCH_VERSION: 2.10.0
  TORCHVISION_VERSION: 0.25.0
  PROTOBUF_VERSION: 21.12
  ONNXRUNTIME_VERSION: 1.24.3
  CACHE_DATE: 20260309
  SEGMENT_DOWNLOAD_TIMEOUT_MINS: 15

jobs:
  quick-test:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]

    env:
      PYTHONUSERBASE: ${{ github.workspace }}/torch
      UseMultiToolTask: true
    steps:
    - uses: actions/checkout@v6

    - uses: actions/setup-python@v6
      with:
        python-version: 3.12

    - name: setup-pytorch
      run: |
        python3 -m pip config set global.break-system-packages true
        pip3 install --user torch --index-url https://download.pytorch.org/whl/cpu
        pip3 install --user numpy packaging

    - name: build-pnnx
      run: |
        cd tools/pnnx
        mkdir build && cd build
        cmake -DCMAKE_BUILD_TYPE=Release ..
        cmake --build . --config Release -j 4

    - name: quick-test
      if: matrix.os != 'windows-latest'
      run: |
        cd tools/pnnx
        cd build && ctest -C Release --output-on-failure -R test_nn_Conv

  build:
    runs-on: [self-hosted, linux, ubuntu25]

    steps:
    - uses: actions/checkout@v6

    - name: local-cache-libtorch
      id: local-cache-libtorch
      uses: maxnowack/local-cache@v2
      with:
        path: libtorch-${{ env.LIBTORCH_VERSION }}-install
        key: libtorch-${{ env.LIBTORCH_VERSION }}-linux-install-${{ env.CACHE_DATE }}

    - name: local-cache-torchvision
      id: local-cache-torchvision
      uses: maxnowack/local-cache@v2
      with:
        path: torchvision-${{ env.TORCHVISION_VERSION }}-install
        key: torchvision-${{ env.TORCHVISION_VERSION }}-linux-install-${{ env.CACHE_DATE }}

    - name: local-cache-onnxruntime
      id: local-cache-onnxruntime
      uses: maxnowack/local-cache@v2
      with:
        path: onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install
        key: onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-linux-install-${{ env.CACHE_DATE }}

    - name: cache-libtorch
      id: cache-libtorch
      uses: actions/cache@v4
      with:
        path: libtorch-${{ env.LIBTORCH_VERSION }}-install
        key: libtorch-${{ env.LIBTORCH_VERSION }}-linux-install-${{ env.CACHE_DATE }}

    - name: cache-torchvision
      id: cache-torchvision
      uses: actions/cache@v4
      with:
        path: torchvision-${{ env.TORCHVISION_VERSION }}-install
        key: torchvision-${{ env.TORCHVISION_VERSION }}-linux-install-${{ env.CACHE_DATE }}

    - name: cache-onnxruntime
      id: cache-onnxruntime
      uses: actions/cache@v4
      with:
        path: onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install
        key: onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-linux-install-${{ env.CACHE_DATE }}

    - name: pnnx-patches
      if: (steps.local-cache-libtorch.outputs.cache-hit != 'true' && steps.cache-libtorch.outputs.cache-hit != 'true') || (steps.local-cache-torchvision.outputs.cache-hit != 'true' && steps.cache-torchvision.outputs.cache-hit != 'true') || (steps.local-cache-onnxruntime.outputs.cache-hit != 'true' && steps.cache-onnxruntime.outputs.cache-hit != 'true')
      uses: actions/checkout@v6
      with:
        repository: pnnx/pnnx
        path: pnnx-patches

    - name: libtorch
      if: steps.local-cache-libtorch.outputs.cache-hit != 'true' && steps.cache-libtorch.outputs.cache-hit != 'true'
      run: |
        wget -q https://github.com/pytorch/pytorch/releases/download/v${{ env.LIBTORCH_VERSION }}/pytorch-v${{ env.LIBTORCH_VERSION }}.tar.gz
        tar -xf pytorch-v${{ env.LIBTORCH_VERSION }}.tar.gz
        cd pytorch-v${{ env.LIBTORCH_VERSION }}
        pip3 install -r requirements.txt --break-system-packages
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/pytorch-v${{ env.LIBTORCH_VERSION }}-fix-mobile-build.patch
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/pytorch-v${{ env.LIBTORCH_VERSION }}-no-link-system-lib.patch
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/pytorch-v${{ env.LIBTORCH_VERSION }}-fix-eigen-build.patch
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/pytorch-v${{ env.LIBTORCH_VERSION }}-fix-link-local-sleef.patch
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/pytorch-v${{ env.LIBTORCH_VERSION }}-revert-nativert-api.patch
        mkdir -p build && cd build
        cmake -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/libtorch-${{ env.LIBTORCH_VERSION }}-install \
            -DCMAKE_BUILD_TYPE=MinSizeRel \
            -DBUILD_SHARED_LIBS=OFF \
            -DCMAKE_POLICY_VERSION_MINIMUM=3.5 \
            -DBUILD_CUSTOM_PROTOBUF=OFF \
            -DBUILD_LITE_INTERPRETER=OFF \
            -DBUILD_PYTHON=OFF \
            -DINTERN_BUILD_MOBILE=ON \
            -DINTERN_DISABLE_AUTOGRAD=ON \
            -DINTERN_DISABLE_ONNX=ON \
            -DUSE_CUDA=OFF \
            -DUSE_DISTRIBUTED=OFF \
            -DUSE_ITT=OFF \
            -DUSE_KINETO=OFF \
            -DUSE_LITE_INTERPRETER_PROFILER=OFF \
            -DUSE_MKLDNN=OFF \
            -DUSE_MPS=OFF \
            -DUSE_NUMPY=OFF \
            -DUSE_OPENMP=OFF \
            -DUSE_SOURCE_DEBUG_ON_MOBILE=OFF \
            -DUSE_XNNPACK=OFF \
            -DBUILD_TEST=OFF \
            -DATEN_NO_TEST=ON \
            ..
        cmake --build . -j 8
        cmake --build . -j 8 --target install/strip

    - name: torchvision
      if: steps.local-cache-torchvision.outputs.cache-hit != 'true' && steps.cache-torchvision.outputs.cache-hit != 'true'
      run: |
        wget -q https://github.com/pytorch/vision/archive/v${{ env.TORCHVISION_VERSION }}.zip -O vision-${{ env.TORCHVISION_VERSION }}.zip
        unzip -q vision-${{ env.TORCHVISION_VERSION }}.zip
        cd vision-${{ env.TORCHVISION_VERSION }}
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/vision-${{ env.TORCHVISION_VERSION }}-ops-only.patch
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/vision-${{ env.TORCHVISION_VERSION }}-no-cuda-version.patch
        mkdir -p build && cd build
        cmake -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/torchvision-${{ env.TORCHVISION_VERSION }}-install \
            -DTorch_DIR=$GITHUB_WORKSPACE/libtorch-${{ env.LIBTORCH_VERSION }}-install/share/cmake/Torch \
            -DCMAKE_BUILD_TYPE=MinSizeRel \
            -DWITH_PNG=OFF \
            -DWITH_JPEG=OFF ..
        cmake --build . -j 8
        cmake --build . -j 8 --target install/strip

    - name: onnxruntime
      if: steps.local-cache-onnxruntime.outputs.cache-hit != 'true' && steps.cache-onnxruntime.outputs.cache-hit != 'true'
      run: |
        wget -q https://github.com/protocolbuffers/protobuf/archive/v${{ env.PROTOBUF_VERSION }}.zip -O protobuf-${{ env.PROTOBUF_VERSION }}.zip
        unzip -q protobuf-${{ env.PROTOBUF_VERSION }}.zip
        cd protobuf-${{ env.PROTOBUF_VERSION }}
        mkdir -p build2 && cd build2
        cmake -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install \
            -Dprotobuf_BUILD_TESTS=OFF \
            -DCMAKE_BUILD_TYPE=MinSizeRel \
            -DCMAKE_POSITION_INDEPENDENT_CODE=ON ..
        cmake --build . -j 8
        cmake --build . -j 8 --target install/strip

        cd ../../
        wget -q https://github.com/microsoft/onnxruntime/archive/v${{ env.ONNXRUNTIME_VERSION }}.zip -O onnxruntime-${{ env.ONNXRUNTIME_VERSION }}.zip
        unzip -q onnxruntime-${{ env.ONNXRUNTIME_VERSION }}.zip
        cd onnxruntime-${{ env.ONNXRUNTIME_VERSION }}
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-less-mlas-features.patch
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-monolithic-static-library.patch
        patch -p1 -i $GITHUB_WORKSPACE/pnnx-patches/onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-use-clog.patch
        mkdir -p build2 && cd build2
        cmake -DCMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install \
            -DCMAKE_BUILD_TYPE=MinSizeRel \
            -Donnxruntime_USE_FULL_PROTOBUF=ON \
            -Donnxruntime_BUILD_SHARED_LIB=ON \
            -Donnxruntime_BUILD_UNIT_TESTS=OFF \
            -Donnxruntime_ENABLE_CPUINFO=OFF \
            -Donnxruntime_DISABLE_CONTRIB_OPS=ON \
            -Donnxruntime_DISABLE_ML_OPS=ON \
            -Donnxruntime_DISABLE_SPARSE_TENSORS=ON \
            -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
            --compile-no-warning-as-error ../cmake
        cmake --build . -j 8
        cmake --build . -j 8 --target install/strip

    - name: pnnx
      run: |
        cd tools/pnnx
        mkdir build && cd build
        cmake -DCMAKE_BUILD_TYPE=MinSizeRel \
            -DTorch_INSTALL_DIR=$GITHUB_WORKSPACE/libtorch-${{ env.LIBTORCH_VERSION }}-install \
            -DTorchVision_INSTALL_DIR=$GITHUB_WORKSPACE/torchvision-${{ env.TORCHVISION_VERSION }}-install \
            -Donnxruntime_INSTALL_DIR=$GITHUB_WORKSPACE/onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install \
            -Dprotobuf_DIR=$GITHUB_WORKSPACE/onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install/lib/cmake/protobuf ..
        cmake --build . -j 8
        strip src/pnnx

    - name: upload-pnnx
      uses: actions/upload-artifact@v5
      with:
        name: pnnx
        path: tools/pnnx/build/src/pnnx
        compression-level: 9

  test:
    needs: [build]
    runs-on: [self-hosted, linux, ubuntu25]
    strategy:
      fail-fast: false
      matrix:
        include:
          - { python: '3.8',  numpy: '1.24.4', opencv: '4.5.*',  torch: '1.8.1',  torchvision: '0.9.1',  torchaudio: '0.8.1',      transformers: '4.52.1' }
          - { python: '3.8',  numpy: '1.24.4', opencv: '4.5.*',  torch: '1.9.1',  torchvision: '0.10.1', torchaudio: '0.9.1',      transformers: '4.52.1' }
          - { python: '3.8',  numpy: '1.24.4', opencv: '4.6.*',  torch: '1.10.0', torchvision: '0.11.1', torchaudio: '0.10.0+cpu', transformers: '4.52.1' }
          - { python: '3.9',  numpy: '1.26.4', opencv: '4.6.*',  torch: '1.11.0', torchvision: '0.12.0', torchaudio: '0.11.0+cpu', transformers: '4.52.1' }
          - { python: '3.9',  numpy: '1.26.4', opencv: '4.7.*',  torch: '1.12.0', torchvision: '0.13.0', torchaudio: '0.12.0+cpu', transformers: '4.52.1' }
          - { python: '3.10', numpy: '1.26.4', opencv: '4.7.*',  torch: '1.13.0', torchvision: '0.14.0', torchaudio: '0.13.0+cpu', transformers: '4.52.1' }
          - { python: '3.10', numpy: '1.26.4', opencv: '4.8.*',  torch: '2.0.0',  torchvision: '0.15.1', torchaudio: '2.0.0+cpu',  transformers: '4.52.1' }
          - { python: '3.10', numpy: '1.26.4', opencv: '4.8.*',  torch: '2.1.0',  torchvision: '0.16.0', torchaudio: '2.1.0+cpu',  transformers: '4.52.1' }
          - { python: '3.11', numpy: '1.26.4', opencv: '4.9.*',  torch: '2.2.1',  torchvision: '0.17.1', torchaudio: '2.2.1+cpu',  transformers: '4.52.1' }
          - { python: '3.11', numpy: '1.26.4', opencv: '4.9.*',  torch: '2.3.0',  torchvision: '0.18.0', torchaudio: '2.3.0+cpu',  transformers: '4.52.1' }
          - { python: '3.11', numpy: '2.2.5',  opencv: '4.10.*', torch: '2.4.0',  torchvision: '0.19.0', torchaudio: '2.4.0+cpu',  transformers: '4.52.1' }
          - { python: '3.12', numpy: '2.2.5',  opencv: '4.10.*', torch: '2.5.0',  torchvision: '0.20.0', torchaudio: '2.5.0+cpu',  transformers: '4.52.1' }
          - { python: '3.12', numpy: '2.2.5',  opencv: '4.11.*', torch: '2.6.0',  torchvision: '0.21.0', torchaudio: '2.6.0+cpu',  transformers: '4.52.1' }
          - { python: '3.12', numpy: '2.2.5',  opencv: '4.11.*', torch: '2.7.0',  torchvision: '0.22.0', torchaudio: '2.7.0+cpu',  transformers: '4.52.1' }
          - { python: '3.13', numpy: '2.2.5',  opencv: '4.12.*', torch: '2.8.0',  torchvision: '0.23.0', torchaudio: '2.8.0+cpu',  transformers: '4.56.2' }
          - { python: '3.13', numpy: '2.2.5',  opencv: '4.12.*', torch: '2.9.0',  torchvision: '0.24.0', torchaudio: '2.9.0+cpu',  transformers: '4.56.2' }
          - { python: '3.13', numpy: '2.2.5',  opencv: '4.12.*', torch: '2.10.0', torchvision: '0.25.0', torchaudio: '2.10.0+cpu', transformers: '4.56.2' }

    name: test-${{ matrix.torch }}-py${{ matrix.python }}

    env:
      PYTHONUSERBASE: ${{ github.workspace }}/python-${{ matrix.python }}

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: local-cache-libtorch
      id: local-cache-libtorch
      uses: maxnowack/local-cache@v2
      with:
        path: libtorch-${{ env.LIBTORCH_VERSION }}-install
        key: libtorch-${{ env.LIBTORCH_VERSION }}-linux-install-${{ env.CACHE_DATE }}

    - name: local-cache-torchvision
      id: local-cache-torchvision
      uses: maxnowack/local-cache@v2
      with:
        path: torchvision-${{ env.TORCHVISION_VERSION }}-install
        key: torchvision-${{ env.TORCHVISION_VERSION }}-linux-install-${{ env.CACHE_DATE }}

    - name: local-cache-onnxruntime
      id: local-cache-onnxruntime
      uses: maxnowack/local-cache@v2
      with:
        path: onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install
        key: onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-linux-install-${{ env.CACHE_DATE }}

    - name: cache-libtorch
      if: steps.local-cache-libtorch.outputs.cache-hit != 'true'
      id: cache-libtorch
      uses: actions/cache/restore@v5
      with:
        path: libtorch-${{ env.LIBTORCH_VERSION }}-install
        key: libtorch-${{ env.LIBTORCH_VERSION }}-linux-install-${{ env.CACHE_DATE }}
        fail-on-cache-miss: true

    - name: cache-torchvision
      if: steps.local-cache-torchvision.outputs.cache-hit != 'true'
      id: cache-torchvision
      uses: actions/cache/restore@v5
      with:
        path: torchvision-${{ env.TORCHVISION_VERSION }}-install
        key: torchvision-${{ env.TORCHVISION_VERSION }}-linux-install-${{ env.CACHE_DATE }}
        fail-on-cache-miss: true

    - name: cache-onnxruntime
      if: steps.local-cache-onnxruntime.outputs.cache-hit != 'true'
      id: cache-onnxruntime
      uses: actions/cache/restore@v5
      with:
        path: onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install
        key: onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-linux-install-${{ env.CACHE_DATE }}
        fail-on-cache-miss: true

    - uses: actions/setup-python@v6
      with:
        python-version: ${{ matrix.python }}

    - name: setup-pytorch
      run: |
        export PATH=${{ env.PYTHONUSERBASE }}/bin:$PATH
        pip3 install --user pytest wheel twine requests einops numpy==${{ matrix.numpy }} opencv-python==${{ matrix.opencv }}
        pip3 install --user torch==${{ matrix.torch }}+cpu torchvision==${{ matrix.torchvision }}+cpu torchaudio==${{ matrix.torchaudio }} --index-url https://download.pytorch.org/whl/cpu
        pip3 install --user onnx onnxscript onnxruntime
        pip3 install --user "transformers<=${{ matrix.transformers }}" diffusers "safetensors<=0.6.2"

    - name: setup-pytorch-execstack-or-patchelf
      if: ${{ matrix.python }} == '3.8' || ${{ matrix.python }} == '3.9'
      run: |
        execstack -c ${{ env.PYTHONUSERBASE }}/lib/python${{ matrix.python }}/site-packages/torch/lib/libtorch_cpu.so || true
        patchelf --clear-execstack ${{ env.PYTHONUSERBASE }}/lib/python${{ matrix.python }}/site-packages/torch/lib/libtorch_cpu.so || true

    - name: python-ncnn
      run: |
        export CMAKE_BUILD_PARALLEL_LEVEL=8
        pip3 install --user . --verbose

    - name: pnnx
      run: |
        cd tools/pnnx
        mkdir build && cd build
        cmake -DCMAKE_BUILD_TYPE=Release \
            -DTorch_INSTALL_DIR=$GITHUB_WORKSPACE/libtorch-${{ env.LIBTORCH_VERSION }}-install \
            -DTorchVision_INSTALL_DIR=$GITHUB_WORKSPACE/torchvision-${{ env.TORCHVISION_VERSION }}-install \
            -Donnxruntime_INSTALL_DIR=$GITHUB_WORKSPACE/onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install \
            -Dprotobuf_DIR=$GITHUB_WORKSPACE/onnxruntime-${{ env.ONNXRUNTIME_VERSION }}-install/lib/cmake/protobuf ..

    - name: download-pnnx
      uses: actions/download-artifact@v8
      with:
        name: pnnx
        path: tools/pnnx/build/src

    - name: test
      run: |
        export PATH=${{ env.PYTHONUSERBASE }}/bin:$PATH
        chmod +x tools/pnnx/build/src/pnnx
        export OMP_THREAD_LIMIT=1
        export OMP_NUM_THREADS=1
        export MKL_NUM_THREADS=1
        export MKL_ENABLE_INSTRUCTIONS=SSE4_2
        cd tools/pnnx/build
        ctest --output-on-failure -j 8

    - name: python-pnnx
      run: |
        export PATH=${{ env.PYTHONUSERBASE }}/bin:$PATH
        export PNNX_WHEEL_WITHOUT_BUILD=ON
        cd tools/pnnx/python
        cp ../build/src/pnnx pnnx/
        python3 setup.py install --user
        pytest tests


================================================
FILE: .github/workflows/python.yml
================================================
name: python
on:
  push:
    branches: [master]
    paths:
    - '.github/workflows/python.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'python/**'
    - 'glslang'
  pull_request:
    branches: [master]
    paths:
    - '.github/workflows/python.yml'
    - 'CMakeLists.txt'
    - 'cmake/**'
    - 'src/*'
    - 'src/layer/*'
    - 'src/layer/x86/**'
    - 'src/layer/vulkan/**'
    - 'python/**'
    - 'glslang'
concurrency:
  group: python-${{ github.ref }}
  cancel-in-progress: true
env:
  DEVELOPER_DIR: /Applications/Xcode_16.4.0.app/Contents/Developer
  MAC_DEPLOYMENT_TARGET: '11.0'
  ENABLE_BITCODE: OFF
  ENABLE_ARC: OFF
  ENABLE_VISIBILITY: OFF
  CMAKE_BUILD_PARALLEL_LEVEL: 4
  UseMultiToolTask: true
permissions:
  contents: read

jobs:
  build:
    strategy:
      matrix:
        os: [ubuntu-latest, macos-15-intel, windows-latest]
        python-version: [3.9, 3.12]

    runs-on: ${{ matrix.os }}

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: cache-swiftshader
      if: matrix.os == 'ubuntu-latest'
      id: cache-swiftshader
      uses: actions/cache@v5
      with:
        path: swiftshader-install
        key: swiftshader-linux-install-20240622
    - name: checkout-swiftshader
      if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      uses: actions/checkout@v6
      with:
        repository: google/swiftshader
        path: swiftshader
        ref: de870ac7518fe2b6bb651ecc22fc36647cf7b986
    - name: checkout-swiftshader-submodules
      if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
        cd swiftshader
        git -c submodule."third_party/git-hooks".update=none submodule update --init --recursive
    - name: swiftshader
      if: matrix.os == 'ubuntu-latest' && steps.cache-swiftshader.outputs.cache-hit != 'true'
      run: |
        cd swiftshader
        mkdir -p build; cd build
        cmake -DCMAKE_INSTALL_PREFIX=install -DSWIFTSHADER_BUILD_EGL=FALSE -DSWIFTSHADER_BUILD_GLESv2=FALSE -DSWIFTSHADER_BUILD_GLES_CM=FALSE -DSWIFTSHADER_BUILD_VULKAN=TRUE -DSWIFTSHADER_BUILD_PVR=FALSE -DSWIFTSHADER_BUILD_TESTS=FALSE -DSWIFTSHADER_ENABLE_ASTC=FALSE -DSWIFTSHADER_WARNINGS_AS_ERRORS=FALSE -DREACTOR_BACKEND=Subzero -DREACTOR_DEFAULT_OPT_LEVEL=Default -DCMAKE_BUILD_TYPE=Release ..
        cmake --build . -j $(nproc)
        mkdir $GITHUB_WORKSPACE/swiftshader-install
        cp Linux/* $GITHUB_WORKSPACE/swiftshader-install

    - name: setup-python
      uses: actions/setup-python@v6
      with:
        python-version: ${{ matrix.python-version }}
    - name: install-deps
      run: |
        python -m pip install --upgrade pip
        pip install pytest setuptools wheel twine importlib-metadata

    - name: build
      if: matrix.os == 'ubuntu-latest'
      env:
        CC: clang
        CXX: clang++
      run: |
        mkdir build && cd build
        cmake -DNCNN_VULKAN=ON -DNCNN_PYTHON=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j $(nproc)
    - name: build
      if: matrix.os == 'macos-15-intel'
      run: |
        mkdir build && cd build
        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DPLATFORM=MAC -DARCHS="x86_64" \
            -DDEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET -DENABLE_BITCODE=$ENABLE_BITCODE -DENABLE_ARC=$ENABLE_ARC -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
            -DNCNN_VULKAN=OFF -DNCNN_PYTHON=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . -j 4
    - name: build
      if: matrix.os == 'windows-latest'
      run: |
        mkdir build; cd build
        cmake -T v142,host=x64 -A x64 -DNCNN_VULKAN=OFF -DNCNN_PYTHON=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
        cmake --build . --config Release -j 4
    - name: build-python
      run: cd python && pip install .
    - name: test
      if: matrix.os == 'ubuntu-latest'
      run: |
        export VK_ICD_FILENAMES="$GITHUB_WORKSPACE/swiftshader-install/vk_swiftshader_icd.json"
        cd python && pytest tests
    - name: test
      if: matrix.os != 'ubuntu-latest'
      run: |
        cd python && pytest tests


================================================
FILE: .github/workflows/release-python.yml
================================================
name: release-python
on:
  push:
    tags:
      - '*'
  workflow_dispatch:

env:
  DEVELOPER_DIR: /Applications/Xcode_16.4.0.app/Contents/Developer
  MAC_DEPLOYMENT_TARGET: '11.0'
  ENABLE_BITCODE: OFF
  ENABLE_ARC: OFF
  ENABLE_VISIBILITY: OFF
  CIBW_SKIP: "cp3??t-*"

jobs:
  build_sdist:
    name: Build SDist
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - uses: actions/setup-python@v6
      with:
        python-version: '3.x'

    - name: Install deps
      run: python -m pip install twine build

    - name: Build SDist
      run: python -m build -s

    - name: Check metadata
      run: twine check dist/*

    - uses: actions/upload-artifact@v6
      with:
        name: sdist
        path: dist/*.tar.gz

  build_wheels:
    name: ${{ matrix.arch }} ${{ matrix.build_id }} on ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - { os: ubuntu-24.04,     arch: x86_64,     build: 'cp*-manylinux*', build_id: cp-manylinux }
          - { os: ubuntu-24.04,     arch: x86_64,     build: 'cp*-musllinux*', build_id: cp-musllinux }
          - { os: ubuntu-24.04,     arch: x86_64,     build: 'pp*',            build_id: pp           }
          - { os: ubuntu-24.04,     arch: i686,       build: 'cp*-manylinux*', build_id: cp-manylinux }
          - { os: ubuntu-24.04,     arch: i686,       build: 'cp*-musllinux*', build_id: cp-musllinux }
          - { os: ubuntu-24.04,     arch: i686,       build: 'pp*',            build_id: pp           }
          - { os: windows-2025,     arch: x86,        build: 'cp*',            build_id: cp           }
          - { os: windows-2025,     arch: AMD64,      build: 'cp*',            build_id: cp           }
          - { os: windows-2025,     arch: AMD64,      build: 'pp*',            build_id: pp           }
          - { os: windows-11-arm,   arch: ARM64,      build: 'cp*',            build_id: cp           }
          - { os: macos-15-intel,   arch: x86_64,     build: 'cp*',            build_id: cp           }
          - { os: macos-15,         arch: arm64,      build: 'cp*',            build_id: cp           }
          - { os: ubuntu-24.04-arm, arch: armv7l,     build: 'cp*-manylinux*', build_id: cp-manylinux }
          - { os: ubuntu-24.04-arm, arch: armv7l,     build: 'cp*-musllinux*', build_id: cp-musllinux }
          - { os: ubuntu-24.04-arm, arch: aarch64,    build: 'cp*-manylinux*', build_id: cp-manylinux }
          - { os: ubuntu-24.04-arm, arch: aarch64,    build: 'cp*-musllinux*', build_id: cp-musllinux }
          - { os: ubuntu-24.04-arm, arch: aarch64,    build: 'pp*',            build_id: pp           }

    env:
      OPENMP_VERSION: '18.1.2'
      OPENMP_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DLIBOMP_ENABLE_SHARED=OFF \
        -DLIBOMP_OMPT_SUPPORT=OFF \
        -DLIBOMP_USE_HWLOC=OFF \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    # build wheels for ubuntu
    - name: Build wheels for ubuntu
      if: matrix.os == 'ubuntu-24.04'
      uses: pypa/cibuildwheel@v3.3.1
      env:
        CIBW_ARCHS_LINUX: ${{ matrix.arch }}
        CIBW_BUILD: ${{ matrix.build }}
        CIBW_ENABLE: pypy
        CIBW_BUILD_VERBOSITY: 1
        CIBW_ENVIRONMENT: CMAKE_BUILD_PARALLEL_LEVEL=4
      with:
        output-dir: wheelhouse

    # build wheels for ubuntu armv7l
    - name: Build wheels for ubuntu armv7l
      if: matrix.os == 'ubuntu-24.04-arm' && (matrix.arch == 'armv7l')
      uses: pypa/cibuildwheel@v3.3.1
      env:
        CIBW_ARCHS_LINUX: ${{ matrix.arch }}
        CIBW_BUILD: ${{ matrix.build }}
        CIBW_ENABLE: pypy
        CIBW_BUILD_VERBOSITY: 1
        CIBW_ENVIRONMENT: CMAKE_BUILD_PARALLEL_LEVEL=4
          CFLAGS="-mfpu=neon" CXXFLAGS="-mfpu=neon"
      with:
        output-dir: wheelhouse

    # build wheels for ubuntu aarch64
    - name: Build wheels for ubuntu aarch64
      if: matrix.os == 'ubuntu-24.04-arm' && (matrix.arch == 'aarch64')
      uses: pypa/cibuildwheel@v3.3.1
      env:
        CIBW_ARCHS_LINUX: ${{ matrix.arch }}
        CIBW_BUILD: ${{ matrix.build }}
        CIBW_ENABLE: pypy
        CIBW_BUILD_VERBOSITY: 1
        CIBW_ENVIRONMENT: CMAKE_BUILD_PARALLEL_LEVEL=4
      with:
        output-dir: wheelhouse

    # build wheels for windows
    - name: Build wheels for windows
      if: matrix.os == 'windows-2025' && (matrix.arch == 'AMD64' || matrix.arch == 'x86')
      uses: pypa/cibuildwheel@v3.3.1
      env:
        CIBW_ARCHS_WINDOWS: ${{ matrix.arch }}
        CIBW_BUILD: ${{ matrix.build }}
        CIBW_ENABLE: pypy
        CIBW_BUILD_VERBOSITY: 1
        CIBW_ENVIRONMENT_WINDOWS: CMAKE_BUILD_PARALLEL_LEVEL=4
        CIBW_BEFORE_BUILD: pip install delvewheel
        CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -w {dest_dir} {wheel}
      with:
        output-dir: wheelhouse

    - name: Build wheels for windows ARM64
      if: matrix.os == 'windows-11-arm' && matrix.arch == 'ARM64'
      uses: pypa/cibuildwheel@v3.3.1
      env:
        CIBW_ARCHS_WINDOWS: ${{ matrix.arch }}
        CIBW_BUILD: ${{ matrix.build }}
        CIBW_ENABLE: pypy
        CIBW_BUILD_VERBOSITY: 1
        CIBW_ENVIRONMENT_WINDOWS: CMAKE_BUILD_PARALLEL_LEVEL=4
        CIBW_BEFORE_BUILD: pip install delvewheel
        CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -w {dest_dir} {wheel} --no-dll "msvcp140.dll;vcomp140.dll"
      with:
        output-dir: wheelhouse

    # build wheels for macos
    - name: cache-openmp for macos
      if: matrix.os == 'macos-15-intel' || matrix.os == 'macos-15'
      id: cache-openmp
      uses: actions/cache@v5
      with:
        path: openmp-install
        key: openmp-macos-install-20251004

    - name: openmp for macos
      if: (matrix.os == 'macos-15-intel' || matrix.os == 'macos-15') && steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        mv cmake-${{ env.OPENMP_VERSION }}.src/Modules/* openmp-${{ env.OPENMP_VERSION }}.src/cmake/
        cd openmp-${{ env.OPENMP_VERSION }}.src
        wget https://github.com/nihui/llvm-project/commit/ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        patch -p2 -i ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        wget https://github.com/nihui/llvm-project/commit/5c12711f9a21f41bea70566bf15a4026804d6b20.patch
        patch -p2 -i 5c12711f9a21f41bea70566bf15a4026804d6b20.patch

    - name: openmp-build-x86_64 for macos
      if: (matrix.os == 'macos-15-intel' || matrix.os == 'macos-15') && steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-x86_64 && cd build-x86_64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install

    - name: openmp-build-arm64 for macos
      if: (matrix.os == 'macos-15-intel' || matrix.os == 'macos-15') && steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-arm64 && cd build-arm64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC_ARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install

    - name: openmp-merge-fat-library for macos
      if: (matrix.os == 'macos-15-intel' || matrix.os == 'macos-15') && steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        mkdir -p $GITHUB_WORKSPACE/openmp-install
        cp -a openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/include $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install/lib
        lipo -create \
            openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/lib/libomp.a \
            openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/lib/libomp.a \
            -o $GITHUB_WORKSPACE/openmp-install/lib/libomp.a

    - name: install-openmp for macos
      if: matrix.os == 'macos-15-intel' || matrix.os == 'macos-15'
      run: |
        sudo cp $GITHUB_WORKSPACE/openmp-install/include/* $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include
        sudo cp $GITHUB_WORKSPACE/openmp-install/lib/libomp.a $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/lib

    - name: vulkansdk for macos
      if: matrix.os == 'macos-15-intel' || matrix.os == 'macos-15'
      run: |
        wget -q https://sdk.lunarg.com/sdk/download/1.4.335.1/mac/vulkansdk-macos-1.4.335.1.zip?Human=true -O vulkansdk-macos-1.4.335.1.zip
        unzip vulkansdk-macos-1.4.335.1.zip
        sudo vulkansdk-macOS-1.4.335.1.app/Contents/MacOS/vulkansdk-macOS-1.4.335.1 --root $GITHUB_WORKSPACE/vulkansdk-macos-1.4.335.1 --accept-licenses --default-answer --confirm-command install

    - name: Build wheels for macos x86_64
      if: matrix.os == 'macos-15-intel' && matrix.arch == 'x86_64'
      uses: pypa/cibuildwheel@v3.3.1
      env:
        CIBW_ARCHS_MACOS: ${{ matrix.arch }}
        CIBW_BUILD: ${{ matrix.build }}
        CIBW_ENABLE: pypy
        CIBW_BUILD_VERBOSITY: 1
        CIBW_ENVIRONMENT: CMAKE_BUILD_PARALLEL_LEVEL=4
          CMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/toolchains/ios.toolchain.cmake PLATFORM=MAC ARCHS="x86_64"
          DEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET ENABLE_BITCODE=OFF ENABLE_ARC=OFF ENABLE_VISIBILITY=OFF
          OpenMP_C_FLAGS="-Xclang -fopenmp" OpenMP_CXX_FLAGS="-Xclang -fopenmp"
          OpenMP_C_LIB_NAMES="libomp" OpenMP_CXX_LIB_NAMES="libomp"
          OpenMP_libomp_LIBRARY="libomp.a"
          Vulkan_LIBRARY=$GITHUB_WORKSPACE/vulkansdk-macos-1.4.335.1/macOS/lib/libMoltenVK.dylib
          MACOSX_DEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET
      with:
        output-dir: wheelhouse

    - name: Build wheels for macos arm64
      if: matrix.os == 'macos-15' && matrix.arch == 'arm64'
      uses: pypa/cibuildwheel@v3.3.1
      env:
        CIBW_ARCHS_MACOS: ${{ matrix.arch }}
        CIBW_BUILD: ${{ matrix.build }}
        CIBW_ENABLE: pypy
        CIBW_BUILD_VERBOSITY: 1
        CIBW_ENVIRONMENT: CMAKE_BUILD_PARALLEL_LEVEL=4
          CMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/toolchains/ios.toolchain.cmake PLATFORM=MAC_ARM64 ARCHS="arm64"
          DEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET ENABLE_BITCODE=OFF ENABLE_ARC=OFF ENABLE_VISIBILITY=OFF
          OpenMP_C_FLAGS="-Xclang -fopenmp" OpenMP_CXX_FLAGS="-Xclang -fopenmp"
          OpenMP_C_LIB_NAMES="libomp" OpenMP_CXX_LIB_NAMES="libomp"
          OpenMP_libomp_LIBRARY="libomp.a"
          Vulkan_LIBRARY=$GITHUB_WORKSPACE/vulkansdk-macos-1.4.335.1/macOS/lib/libMoltenVK.dylib
          MACOSX_DEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET
      with:
        output-dir: wheelhouse

    - name: Show files
      run: ls -lh wheelhouse
      shell: bash

    - name: Verify clean directory
      run: git diff --exit-code
      shell: bash

    - name: Upload wheels
      uses: actions/upload-artifact@v6
      with:
        name: wheels-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.build_id }}
        path: wheelhouse/*.whl

  build_wheels_qemu_cp:
    name: ${{ matrix.arch }} ${{ matrix.build_cp }} ${{ matrix.build_sub }}
    runs-on: ubuntu-24.04

    strategy:
      fail-fast: false
      matrix:
        arch: [riscv64]
        build_cp: [cp38, cp39, cp310, cp311, cp312, cp313, cp314]
        build_sub: [manylinux, musllinux]

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true

    - name: Set up QEMU
      uses: docker/setup-qemu-action@v3
      with:
        platforms: all

    - name: Build wheels with qemu
      uses: pypa/cibuildwheel@v3.3.1
      env:
        CIBW_ARCHS_LINUX: ${{ matrix.arch }}
        CIBW_BUILD: ${{ matrix.build_cp }}-${{ matrix.build_sub }}*
        CIBW_BUILD_VERBOSITY: 1
        CIBW_ENVIRONMENT: CMAKE_BUILD_PARALLEL_LEVEL=4 EXTRA_CMAKE_ARGS="-DNCNN_XTHEADVECTOR=OFF"
      with:
        output-dir: wheelhouse

    - name: Show files
      run: ls -lh wheelhouse
      shell: bash

    - name: Verify clean directory
      run: git diff --exit-code
      shell: bash

    - name: Upload wheels
      uses: actions/upload-artifact@v6
      with:
        name: wheels_qemu_cp-${{ matrix.arch }}-${{ matrix.build_cp }}-${{ matrix.build_sub }}
        path: wheelhouse/*.whl

  upload_all:
    permissions:
      contents: none
    name: Upload
    needs: [build_wheels, build_wheels_qemu_cp, build_sdist]
    runs-on: ubuntu-latest

    steps:
    - uses: actions/download-artifact@v8
      with:
        path: dist
        merge-multiple: true

    - uses: pypa/gh-action-pypi-publish@release/v1
      with:
        user: __token__
        password: ${{ secrets.PYPI_API_TOKEN }}


================================================
FILE: .github/workflows/release.yml
================================================
name: release
on:
  push:
    tags:
      - '*'

env:
  DEVELOPER_DIR: /Applications/Xcode_16.4.0.app/Contents/Developer
  IOS_DEPLOYMENT_TARGET: '13.0'
  MAC_DEPLOYMENT_TARGET: '11.0'
  MAC_CATALYST_DEPLOYMENT_TARGET: '13.1'
  WATCHOS_DEPLOYMENT_TARGET: '6.0'
  TVOS_DEPLOYMENT_TARGET: '11.0'
  VISIONOS_DEPLOYMENT_TARGET: '1.0'
  ENABLE_BITCODE: OFF
  ENABLE_ARC: OFF
  ENABLE_VISIBILITY: OFF
  EMSCRIPTEN_VERSION: 3.1.28

permissions:
  contents: read

jobs:

  setup:
    permissions:
      contents: none
    runs-on: ubuntu-latest
    outputs:
      VERSION: ${{ steps.get_version.outputs.VERSION }}
    steps:
    - name: get-version
      id: get_version
      run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_OUTPUT

  full-source:
    needs: [setup]
    runs-on: ubuntu-latest
    env:
      PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-full-source
    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true
    - name: package
      run: |
        rm -rf .git
        rm -f /tmp/${{ env.PACKAGENAME }}.zip
        zip -9 -y -r /tmp/${{ env.PACKAGENAME }}.zip .
    - name: upload-zip
      uses: actions/upload-artifact@v6
      with:
        name: ${{ env.PACKAGENAME }}
        path: /tmp/${{ env.PACKAGENAME }}.zip

  ubuntu:
    needs: [setup]
    strategy:
      matrix:
        opt:
          - { shared-lib: OFF, os: ubuntu-22.04, id: ubuntu-2204        }
          - { shared-lib: OFF, os: ubuntu-24.04, id: ubuntu-2404        }
          - { shared-lib: ON,  os: ubuntu-22.04, id: ubuntu-2204-shared }
          - { shared-lib: ON,  os: ubuntu-24.04, id: ubuntu-2404-shared }
    runs-on: ${{ matrix.opt.os }}
    env:
      PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-${{ matrix.opt.id }}
    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true
    - name: apt
      run: |
        sudo apt-get install -y libprotobuf-dev protobuf-compiler
    - name: build
      run: |
        mkdir build && cd build
        cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DNCNN_VERSION_STRING="${{ needs.setup.outputs.VERSION }}" \
            -DNCNN_VULKAN=ON -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TOOLS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_SHARED_LIB=${{ matrix.opt.shared-lib }} ..
        cmake --build . -j $(nproc)
        cmake --build . --target install/strip
    - name: package
      run: |
        rm -rf ${{ env.PACKAGENAME }}
        mkdir -p ${{ env.PACKAGENAME }}
        cp -a build/install/* ${{ env.PACKAGENAME }}
        rm -f ${{ env.PACKAGENAME }}.zip
        zip -9 -y -r ${{ env.PACKAGENAME }}.zip ${{ env.PACKAGENAME }}
    - name: upload-zip
      uses: actions/upload-artifact@v6
      with:
        name: ${{ env.PACKAGENAME }}
        path: ${{ env.PACKAGENAME }}.zip

  openmp-macos:
    runs-on: macos-15-intel
    env:
      OPENMP_VERSION: '18.1.2'
      OPENMP_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DLIBOMP_ENABLE_SHARED=OFF \
        -DLIBOMP_OMPT_SUPPORT=OFF \
        -DLIBOMP_USE_HWLOC=OFF \
    steps:
    - name: cache-openmp
      id: cache-openmp
      uses: actions/cache@v5
      with:
        path: openmp-install
        key: openmp-macos-release-18.1.2-20251004
    - name: checkout
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      uses: actions/checkout@v6
    - name: openmp
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        mv cmake-${{ env.OPENMP_VERSION }}.src/Modules/* openmp-${{ env.OPENMP_VERSION }}.src/cmake/
        cd openmp-${{ env.OPENMP_VERSION }}.src
        wget https://github.com/nihui/llvm-project/commit/ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        patch -p2 -i ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        wget https://github.com/nihui/llvm-project/commit/5c12711f9a21f41bea70566bf15a4026804d6b20.patch
        patch -p2 -i 5c12711f9a21f41bea70566bf15a4026804d6b20.patch
    - name: build-x86_64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-x86_64 && cd build-x86_64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: build-arm64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-arm64 && cd build-arm64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC_ARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: merge-fat-library
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        rm -rf $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install
        cp -a openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/include $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install/lib
        lipo -create \
            openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/lib/libomp.a \
            openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/lib/libomp.a \
            -o $GITHUB_WORKSPACE/openmp-install/lib/libomp.a
    - name: upload
      uses: actions/upload-artifact@v6
      with:
        name: openmp-macos
        path: openmp-install

  macos:
    needs: [setup, openmp-macos]
    strategy:
      matrix:
        opt:
          - { vulkan: OFF, id: macos        }
          - { vulkan: ON,  id: macos-vulkan }
    runs-on: macos-15-intel
    env:
      PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-${{ matrix.opt.id }}
      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$MAC_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
        -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
        -DOpenMP_libomp_LIBRARY="libomp.a" \
        -DNCNN_VERSION_STRING="${{ needs.setup.outputs.VERSION }}" \
        -DNCNN_BUILD_TOOLS=OFF \
        -DNCNN_BUILD_EXAMPLES=OFF \
        -DNCNN_BUILD_BENCHMARK=OFF \
        -DNCNN_VULKAN=${{ matrix.opt.vulkan }} \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true
    - name: download-openmp-macos
      uses: actions/download-artifact@v8
      with:
        name: openmp-macos
        path: openmp-macos
    - name: install-openmp
      run: |
        sudo cp openmp-macos/include/* $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include
        sudo cp openmp-macos/lib/libomp.a $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/lib
    - name: build-x86_64
      run: |
        mkdir build-x86_64 && cd build-x86_64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: build-arm64
      run: |
        mkdir build-arm64 && cd build-arm64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC_ARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: package-openmp
      run: |
        rm -rf openmp.framework
        mkdir -p openmp.framework/Versions/A/Headers
        mkdir -p openmp.framework/Versions/A/Resources
        ln -s A openmp.framework/Versions/Current
        ln -s Versions/Current/Headers openmp.framework/Headers
        ln -s Versions/Current/Resources openmp.framework/Resources
        ln -s Versions/Current/openmp openmp.framework/openmp
        cp openmp-macos/lib/libomp.a openmp.framework/Versions/A/openmp
        cp -a openmp-macos/include/* openmp.framework/Versions/A/Headers/
        sed -e 's/__NAME__/openmp/g' -e 's/__IDENTIFIER__/org.llvm.openmp/g' -e 's/__VERSION__/18.1/g' Info.plist > openmp.framework/Versions/A/Resources/Info.plist
    - name: package-glslang
      if: matrix.opt.vulkan == 'ON'
      run: |
        rm -rf glslang.framework
        mkdir -p glslang.framework/Versions/A/Headers
        mkdir -p glslang.framework/Versions/A/Resources
        ln -s A glslang.framework/Versions/Current
        ln -s Versions/Current/Headers glslang.framework/Headers
        ln -s Versions/Current/Resources glslang.framework/Resources
        ln -s Versions/Current/glslang glslang.framework/glslang
        libtool -static \
            build-x86_64/install/lib/libglslang.a \
            build-x86_64/install/lib/libSPIRV.a \
            -o build-x86_64/install/lib/libglslang_combined.a
        libtool -static \
            build-arm64/install/lib/libglslang.a \
            build-arm64/install/lib/libSPIRV.a \
            -o build-arm64/install/lib/libglslang_combined.a
        lipo -create build-x86_64/install/lib/libglslang_combined.a build-arm64/install/lib/libglslang_combined.a -o glslang.framework/Versions/A/glslang
        cp -a build-x86_64/install/include/glslang glslang.framework/Versions/A/Headers/
        sed -e 's/__NAME__/glslang/g' -e 's/__IDENTIFIER__/org.khronos.glslang/g' -e 's/__VERSION__/1.0/g' Info.plist > glslang.framework/Versions/A/Resources/Info.plist
    - name: package-ncnn
      run: |
        rm -rf ncnn.framework
        mkdir -p ncnn.framework/Versions/A/Headers
        mkdir -p ncnn.framework/Versions/A/Resources
        ln -s A ncnn.framework/Versions/Current
        ln -s Versions/Current/Headers ncnn.framework/Headers
        ln -s Versions/Current/Resources ncnn.framework/Resources
        ln -s Versions/Current/ncnn ncnn.framework/ncnn
        lipo -create build-x86_64/install/lib/libncnn.a build-arm64/install/lib/libncnn.a -o ncnn.framework/Versions/A/ncnn
        cp -a build-x86_64/install/include/* ncnn.framework/Versions/A/Headers/
        sed -e 's/__NAME__/ncnn/g' -e 's/__IDENTIFIER__/com.tencent.ncnn/g' -e 's/__VERSION__/1.0/g' Info.plist > ncnn.framework/Versions/A/Resources/Info.plist
    - name: package
      if: matrix.opt.vulkan == 'OFF'
      run: |
        rm -f ${{ env.PACKAGENAME }}.zip
        zip -9 -y -r ${{ env.PACKAGENAME }}.zip openmp.framework ncnn.framework
    - name: package
      if: matrix.opt.vulkan == 'ON'
      run: |
        rm -f ${{ env.PACKAGENAME }}.zip
        zip -9 -y -r ${{ env.PACKAGENAME }}.zip openmp.framework glslang.framework ncnn.framework
    - name: upload-zip
      uses: actions/upload-artifact@v6
      with:
        name: ${{ env.PACKAGENAME }}
        path: ${{ env.PACKAGENAME }}.zip

  openmp-ios:
    runs-on: macos-15-intel
    env:
      OPENMP_VERSION: '18.1.2'
      OPENMP_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$IOS_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DLIBOMP_ENABLE_SHARED=OFF \
        -DLIBOMP_OMPT_SUPPORT=OFF \
        -DLIBOMP_USE_HWLOC=OFF \
    steps:
    - name: cache-openmp
      id: cache-openmp
      uses: actions/cache@v5
      with:
        path: openmp-install
        key: openmp-ios-release-18.1.2-20251004
    - name: checkout
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      uses: actions/checkout@v6
    - name: openmp
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        mv cmake-${{ env.OPENMP_VERSION }}.src/Modules/* openmp-${{ env.OPENMP_VERSION }}.src/cmake/
        cd openmp-${{ env.OPENMP_VERSION }}.src
        wget https://github.com/nihui/llvm-project/commit/ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        patch -p2 -i ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        wget https://github.com/nihui/llvm-project/commit/5c12711f9a21f41bea70566bf15a4026804d6b20.patch
        patch -p2 -i 5c12711f9a21f41bea70566bf15a4026804d6b20.patch
    - name: build-arm64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-arm64 && cd build-arm64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=OS64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: merge-fat-library
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        rm -rf $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install
        cp -a openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/include $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install/lib
        cp openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/lib/libomp.a $GITHUB_WORKSPACE/openmp-install/lib/libomp.a
    - name: upload
      uses: actions/upload-artifact@v6
      with:
        name: openmp-ios
        path: openmp-install

  ios:
    needs: [setup, openmp-ios]
    strategy:
      matrix:
        opt:
          - { vulkan: OFF, id: ios        }
          - { vulkan: ON,  id: ios-vulkan }
    runs-on: macos-15-intel
    env:
      PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-${{ matrix.opt.id }}
      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$IOS_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
        -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
        -DOpenMP_libomp_LIBRARY="libomp.a" \
        -DNCNN_VERSION_STRING="${{ needs.setup.outputs.VERSION }}" \
        -DNCNN_BUILD_BENCHMARK=OFF \
        -DNCNN_VULKAN=${{ matrix.opt.vulkan }} \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true
    - name: download-openmp-ios
      uses: actions/download-artifact@v8
      with:
        name: openmp-ios
        path: openmp-ios
    - name: install-openmp
      run: |
        sudo cp openmp-ios/include/* $DEVELOPER_DIR/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/include
        sudo cp openmp-ios/lib/libomp.a $DEVELOPER_DIR/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/lib
    - name: build-arm64
      run: |
        mkdir build-arm64 && cd build-arm64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=OS64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: package-openmp
      run: |
        rm -rf openmp.framework
        mkdir -p openmp.framework/Versions/A/Headers
        mkdir -p openmp.framework/Versions/A/Resources
        ln -s A openmp.framework/Versions/Current
        ln -s Versions/Current/Headers openmp.framework/Headers
        ln -s Versions/Current/Resources openmp.framework/Resources
        ln -s Versions/Current/openmp openmp.framework/openmp
        cp openmp-ios/lib/libomp.a openmp.framework/Versions/A/openmp
        cp -a openmp-ios/include/* openmp.framework/Versions/A/Headers/
        sed -e 's/__NAME__/openmp/g' -e 's/__IDENTIFIER__/org.llvm.openmp/g' -e 's/__VERSION__/18.1/g' Info.plist > openmp.framework/Versions/A/Resources/Info.plist
    - name: package-glslang
      if: matrix.opt.vulkan == 'ON'
      run: |
        rm -rf glslang.framework
        mkdir -p glslang.framework/Versions/A/Headers
        mkdir -p glslang.framework/Versions/A/Resources
        ln -s A glslang.framework/Versions/Current
        ln -s Versions/Current/Headers glslang.framework/Headers
        ln -s Versions/Current/Resources glslang.framework/Resources
        ln -s Versions/Current/glslang glslang.framework/glslang
        libtool -static \
            build-arm64/install/lib/libglslang.a \
            build-arm64/install/lib/libSPIRV.a \
            -o build-arm64/install/lib/libglslang_combined.a
        cp build-arm64/install/lib/libglslang_combined.a glslang.framework/Versions/A/glslang
        cp -a build-arm64/install/include/glslang glslang.framework/Versions/A/Headers/
        sed -e 's/__NAME__/glslang/g' -e 's/__IDENTIFIER__/org.khronos.glslang/g' -e 's/__VERSION__/1.0/g' Info.plist > glslang.framework/Versions/A/Resources/Info.plist
    - name: package-ncnn
      run: |
        rm -rf ncnn.framework
        mkdir -p ncnn.framework/Versions/A/Headers
        mkdir -p ncnn.framework/Versions/A/Resources
        ln -s A ncnn.framework/Versions/Current
        ln -s Versions/Current/Headers ncnn.framework/Headers
        ln -s Versions/Current/Resources ncnn.framework/Resources
        ln -s Versions/Current/ncnn ncnn.framework/ncnn
        cp build-arm64/install/lib/libncnn.a ncnn.framework/Versions/A/ncnn
        cp -a build-arm64/install/include/* ncnn.framework/Versions/A/Headers/
        sed -e 's/__NAME__/ncnn/g' -e 's/__IDENTIFIER__/com.tencent.ncnn/g' -e 's/__VERSION__/1.0/g' Info.plist > ncnn.framework/Versions/A/Resources/Info.plist
    - name: package
      if: matrix.opt.vulkan == 'OFF'
      run: |
        rm -f ${{ env.PACKAGENAME }}.zip
        zip -9 -y -r ${{ env.PACKAGENAME }}.zip openmp.framework ncnn.framework
    - name: package
      if: matrix.opt.vulkan == 'ON'
      run: |
        rm -f ${{ env.PACKAGENAME }}.zip
        zip -9 -y -r ${{ env.PACKAGENAME }}.zip openmp.framework glslang.framework ncnn.framework
    - name: upload-zip
      uses: actions/upload-artifact@v6
      with:
        name: ${{ env.PACKAGENAME }}
        path: ${{ env.PACKAGENAME }}.zip

  openmp-ios-simulator:
    runs-on: macos-15-intel
    env:
      OPENMP_VERSION: '18.1.2'
      OPENMP_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$IOS_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DLIBOMP_ENABLE_SHARED=OFF \
        -DLIBOMP_OMPT_SUPPORT=OFF \
        -DLIBOMP_USE_HWLOC=OFF \
    steps:
    - name: cache-openmp
      id: cache-openmp
      uses: actions/cache@v5
      with:
        path: openmp-install
        key: openmp-ios-simulator-release-18.1.2-20251004
    - name: checkout
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      uses: actions/checkout@v6
    - name: openmp
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        mv cmake-${{ env.OPENMP_VERSION }}.src/Modules/* openmp-${{ env.OPENMP_VERSION }}.src/cmake/
        cd openmp-${{ env.OPENMP_VERSION }}.src
        wget https://github.com/nihui/llvm-project/commit/ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        patch -p2 -i ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        wget https://github.com/nihui/llvm-project/commit/5c12711f9a21f41bea70566bf15a4026804d6b20.patch
        patch -p2 -i 5c12711f9a21f41bea70566bf15a4026804d6b20.patch
    - name: build-x86_64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-x86_64 && cd build-x86_64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=SIMULATOR64 -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: build-arm64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-arm64 && cd build-arm64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=SIMULATORARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: merge-fat-library
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        rm -rf $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install
        cp -a openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/include $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install/lib
        lipo -create \
            openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/lib/libomp.a \
            openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/lib/libomp.a \
            -o $GITHUB_WORKSPACE/openmp-install/lib/libomp.a
    - name: upload
      uses: actions/upload-artifact@v6
      with:
        name: openmp-ios-simulator
        path: openmp-install

  ios-simulator:
    needs: [setup, openmp-ios-simulator]
    strategy:
      matrix:
        opt:
          - { vulkan: OFF, id: ios-simulator        }
          - { vulkan: ON,  id: ios-simulator-vulkan }
    runs-on: macos-15-intel
    env:
      PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-${{ matrix.opt.id }}
      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$IOS_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
        -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
        -DOpenMP_libomp_LIBRARY="libomp.a" \
        -DNCNN_VERSION_STRING="${{ needs.setup.outputs.VERSION }}" \
        -DNCNN_BUILD_BENCHMARK=OFF \
        -DNCNN_VULKAN=${{ matrix.opt.vulkan }} \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true
    - name: download-openmp-ios-simulator
      uses: actions/download-artifact@v8
      with:
        name: openmp-ios-simulator
        path: openmp-ios-simulator
    - name: install-openmp
      run: |
        sudo cp openmp-ios-simulator/include/* $DEVELOPER_DIR/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/usr/include
        sudo cp openmp-ios-simulator/lib/libomp.a $DEVELOPER_DIR/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/usr/lib
    - name: build-x86_64
      run: |
        mkdir build-x86_64 && cd build-x86_64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=SIMULATOR64 -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: build-arm64
      run: |
        mkdir build-arm64 && cd build-arm64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=SIMULATORARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: package-openmp
      run: |
        rm -rf openmp.framework
        mkdir -p openmp.framework/Versions/A/Headers
        mkdir -p openmp.framework/Versions/A/Resources
        ln -s A openmp.framework/Versions/Current
        ln -s Versions/Current/Headers openmp.framework/Headers
        ln -s Versions/Current/Resources openmp.framework/Resources
        ln -s Versions/Current/openmp openmp.framework/openmp
        cp openmp-ios-simulator/lib/libomp.a openmp.framework/Versions/A/openmp
        cp -a openmp-ios-simulator/include/* openmp.framework/Versions/A/Headers/
        sed -e 's/__NAME__/openmp/g' -e 's/__IDENTIFIER__/org.llvm.openmp/g' -e 's/__VERSION__/18.1/g' Info.plist > openmp.framework/Versions/A/Resources/Info.plist
    - name: package-glslang
      if: matrix.opt.vulkan == 'ON'
      run: |
        rm -rf glslang.framework
        mkdir -p glslang.framework/Versions/A/Headers
        mkdir -p glslang.framework/Versions/A/Resources
        ln -s A glslang.framework/Versions/Current
        ln -s Versions/Current/Headers glslang.framework/Headers
        ln -s Versions/Current/Resources glslang.framework/Resources
        ln -s Versions/Current/glslang glslang.framework/glslang
        libtool -static \
            build-x86_64/install/lib/libglslang.a \
            build-x86_64/install/lib/libSPIRV.a \
            -o build-x86_64/install/lib/libglslang_combined.a
        libtool -static \
            build-arm64/install/lib/libglslang.a \
            build-arm64/install/lib/libSPIRV.a \
            -o build-arm64/install/lib/libglslang_combined.a
        lipo -create \
            build-x86_64/install/lib/libglslang_combined.a \
            build-arm64/install/lib/libglslang_combined.a \
            -o glslang.framework/Versions/A/glslang
        cp -a build-x86_64/install/include/glslang glslang.framework/Versions/A/Headers/
        sed -e 's/__NAME__/glslang/g' -e 's/__IDENTIFIER__/org.khronos.glslang/g' -e 's/__VERSION__/1.0/g' Info.plist > glslang.framework/Versions/A/Resources/Info.plist
    - name: package-ncnn
      run: |
        rm -rf ncnn.framework
        mkdir -p ncnn.framework/Versions/A/Headers
        mkdir -p ncnn.framework/Versions/A/Resources
        ln -s A ncnn.framework/Versions/Current
        ln -s Versions/Current/Headers ncnn.framework/Headers
        ln -s Versions/Current/Resources ncnn.framework/Resources
        ln -s Versions/Current/ncnn ncnn.framework/ncnn
        lipo -create \
            build-x86_64/install/lib/libncnn.a \
            build-arm64/install/lib/libncnn.a \
            -o ncnn.framework/Versions/A/ncnn
        cp -a build-x86_64/install/include/* ncnn.framework/Versions/A/Headers/
        sed -e 's/__NAME__/ncnn/g' -e 's/__IDENTIFIER__/com.tencent.ncnn/g' -e 's/__VERSION__/1.0/g' Info.plist > ncnn.framework/Versions/A/Resources/Info.plist
    - name: package
      if: matrix.opt.vulkan == 'OFF'
      run: |
        rm -f ${{ env.PACKAGENAME }}.zip
        zip -9 -y -r ${{ env.PACKAGENAME }}.zip openmp.framework ncnn.framework
    - name: package
      if: matrix.opt.vulkan == 'ON'
      run: |
        rm -f ${{ env.PACKAGENAME }}.zip
        zip -9 -y -r ${{ env.PACKAGENAME }}.zip openmp.framework glslang.framework ncnn.framework
    - name: upload-zip
      uses: actions/upload-artifact@v6
      with:
        name: ${{ env.PACKAGENAME }}
        path: ${{ env.PACKAGENAME }}.zip

  openmp-mac-catalyst:
    runs-on: macos-15-intel
    env:
      OPENMP_VERSION: '18.1.2'
      OPENMP_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$MAC_CATALYST_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DLIBOMP_ENABLE_SHARED=OFF \
        -DLIBOMP_OMPT_SUPPORT=OFF \
        -DLIBOMP_USE_HWLOC=OFF \
    steps:
    - name: cache-openmp
      id: cache-openmp
      uses: actions/cache@v5
      with:
        path: openmp-install
        key: openmp-mac-catalyst-release-18.1.2-20251004
    - name: checkout
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      uses: actions/checkout@v6
    - name: openmp
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf cmake-${{ env.OPENMP_VERSION }}.src.tar.xz
        wget https://github.com/llvm/llvm-project/releases/download/llvmorg-${{ env.OPENMP_VERSION }}/openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        tar -xf openmp-${{ env.OPENMP_VERSION }}.src.tar.xz
        mv cmake-${{ env.OPENMP_VERSION }}.src/Modules/* openmp-${{ env.OPENMP_VERSION }}.src/cmake/
        cd openmp-${{ env.OPENMP_VERSION }}.src
        wget https://github.com/nihui/llvm-project/commit/ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        patch -p2 -i ef8c35bcf5d9cfdb0764ffde6a63c04ec715bc37.patch
        wget https://github.com/nihui/llvm-project/commit/5c12711f9a21f41bea70566bf15a4026804d6b20.patch
        patch -p2 -i 5c12711f9a21f41bea70566bf15a4026804d6b20.patch
    - name: build-x86_64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-x86_64 && cd build-x86_64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC_CATALYST -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: build-arm64
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        cd openmp-${{ env.OPENMP_VERSION }}.src
        mkdir -p build-arm64 && cd build-arm64
        cmake ${{ env.OPENMP_CMAKE_OPTIONS }} -DPLATFORM=MAC_CATALYST_ARM64 -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: merge-fat-library
      if: steps.cache-openmp.outputs.cache-hit != 'true'
      run: |
        rm -rf $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install
        cp -a openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/include $GITHUB_WORKSPACE/openmp-install
        mkdir -p $GITHUB_WORKSPACE/openmp-install/lib
        lipo -create \
            openmp-${{ env.OPENMP_VERSION }}.src/build-x86_64/install/lib/libomp.a \
            openmp-${{ env.OPENMP_VERSION }}.src/build-arm64/install/lib/libomp.a \
            -o $GITHUB_WORKSPACE/openmp-install/lib/libomp.a
    - name: upload
      uses: actions/upload-artifact@v6
      with:
        name: openmp-mac-catalyst
        path: openmp-install

  mac-catalyst:
    needs: [setup, openmp-mac-catalyst]
    strategy:
      matrix:
        opt:
          - { vulkan: OFF, id: mac-catalyst        }
          - { vulkan: ON,  id: mac-catalyst-vulkan }
    runs-on: macos-15-intel
    env:
      PACKAGENAME: ncnn-${{ needs.setup.outputs.VERSION }}-${{ matrix.opt.id }}
      NCNN_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$MAC_CATALYST_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
        -DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
        -DOpenMP_libomp_LIBRARY="libomp.a" \
        -DNCNN_VERSION_STRING="${{ needs.setup.outputs.VERSION }}" \
        -DNCNN_BUILD_BENCHMARK=OFF \
        -DNCNN_VULKAN=${{ matrix.opt.vulkan }} \

    steps:
    - uses: actions/checkout@v6
      with:
        submodules: true
    - name: download-openmp-mac-catalyst
      uses: actions/download-artifact@v8
      with:
        name: openmp-mac-catalyst
        path: openmp-mac-catalyst
    - name: install-openmp
      run: |
        sudo cp openmp-mac-catalyst/include/* $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include
        sudo cp openmp-mac-catalyst/lib/libomp.a $DEVELOPER_DIR/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/lib
    - name: build-x86_64
      run: |
        mkdir build-x86_64 && cd build-x86_64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC_CATALYST -DARCHS="x86_64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: build-arm64
      run: |
        mkdir build-arm64 && cd build-arm64
        cmake ${{ env.NCNN_CMAKE_OPTIONS }} -DPLATFORM=MAC_CATALYST -DARCHS="arm64" ..
        cmake --build . -j 4
        cmake --build . --target install/strip
    - name: package-openmp
      run: |
        rm -rf openmp.framework
        mkdir -p openmp.framework/Versions/A/Headers
        mkdir -p openmp.framework/Versions/A/Resources
        ln -s A openmp.framework/Versions/Current
        ln -s Versions/Current/Headers openmp.framework/Headers
        ln -s Versions/Current/Resources openmp.framework/Resources
        ln -s Versions/Current/openmp openmp.framework/openmp
        cp openmp-mac-catalyst/lib/libomp.a openmp.framework/Versions/A/openmp
        cp -a openmp-mac-catalyst/include/* openmp.framework/Versions/A/Headers/
        sed -e 's/__NAME__/openmp/g' -e 's/__IDENTIFIER__/org.llvm.openmp/g' -e 's/__VERSION__/18.1/g' Info.plist > openmp.framework/Versions/A/Resources/Info.plist
    - name: package-glslang
      if: matrix.opt.vulkan == 'ON'
      run: |
        rm -rf glslang.framework
        mkdir -p glslang.framework/Versions/A/Headers
        mkdir -p glslang.framework/Versions/A/Resources
        ln -s A glslang.framework/Versions/Current
        ln -s Versions/Current/Headers glslang.framework/Headers
        ln -s Versions/Current/Resources glslang.framework/Resources
        ln -s Versions/Current/glslang glslang.framework/glslang
        libtool -static \
            build-x86_64/install/lib/libglslang.a \
            build-x86_64/install/lib/libSPIRV.a \
            -o build-x86_64/install/lib/libglslang_combined.a
        libtool -static \
            build-arm64/install/lib/libglslang.a \
            build-arm64/install/lib/libSPIRV.a \
            -o build-arm64/install/lib/libglslang_combined.a
        lipo -create \
            build-x86_64/install/lib/libglslang_combined.a \
            build-arm64/install/lib/libglslang_combined.a \
            -o glslang.framework/Versions/A/glslang
        cp -a build-x86_64/install/include/glslang glslang.framework/Versions/A/Headers/
        sed -e 's/__NAME__/glslang/g' -e 's/__IDENTIFIER__/org.khronos.glslang/g' -e 's/__VERSION__/1.0/g' Info.plist > glslang.framework/Versions/A/Resources/Info.plist
    - name: package-ncnn
      run: |
        rm -rf ncnn.framework
        mkdir -p ncnn.framework/Versions/A/Headers
        mkdir -p ncnn.framework/Versions/A/Resources
        ln -s A ncnn.framework/Versions/Current
        ln -s Versions/Current/Headers ncnn.framework/Headers
        ln -s Versions/Current/Resources ncnn.framework/Resources
        ln -s Versions/Current/ncnn ncnn.framework/ncnn
        lipo -create \
            build-x86_64/install/lib/libncnn.a \
            build-arm64/install/lib/libncnn.a \
            -o ncnn.framework/Versions/A/ncnn
        cp -a build-x86_64/install/include/* ncnn.framework/Versions/A/Headers/
        sed -e 's/__NAME__/ncnn/g' -e 's/__IDENTIFIER__/com.tencent.ncnn/g' -e 's/__VERSION__/1.0/g' Info.plist > ncnn.framework/Versions/A/Resources/Info.plist
    - name: package
      if: matrix.opt.vulkan == 'OFF'
      run: |
        rm -f ${{ env.PACKAGENAME }}.zip
        zip -9 -y -r ${{ env.PACKAGENAME }}.zip openmp.framework ncnn.framework
    - name: package
      if: matrix.opt.vulkan == 'ON'
      run: |
        rm -f ${{ env.PACKAGENAME }}.zip
        zip -9 -y -r ${{ env.PACKAGENAME }}.zip openmp.framework glslang.framework ncnn.framework
    - name: upload-zip
      uses: actions/upload-artifact@v6
      with:
        name: ${{ env.PACKAGENAME }}
        path: ${{ env.PACKAGENAME }}.zip

  openmp-watchos:
    runs-on: macos-15-intel
    env:
      OPENMP_VERSION: '18.1.2'
      OPENMP_CMAKE_OPTIONS: |
        -DCMAKE_TOOLCHAIN_FILE=../../toolchains/ios.toolchain.cmake \
        -DDEPLOYMENT_TARGET=$WATCHOS_DEPLOYMENT_TARGET \
        -DENABLE_BITCODE=$ENABLE_BITCODE \
        -DENABLE_ARC=$ENABLE_ARC \
        -DENABLE_VISIBILITY=$ENABLE_VISIBILITY \
        -DCMAKE_INSTALL_PREFIX=install \
        -DCMAKE_BUILD_TYPE=Release \
        -DLIBOMP_ENABLE_SHARED=OFF \
        -DLIBOMP_OMPT_SUPPORT=OFF \
        -DLIBOMP_USE_HWLOC=OFF \
    steps:
    - name: cache-openmp
      id: cache-openmp
      uses: actions/cache@v5
      with:
        path: openmp-install
        key: openmp-watchos-release-18.1.2-20251004
    - name: checkout
      if: steps
Download .txt
gitextract_nmtq5ath/

├── .astylerc
├── .clang-format
├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug.md
│   │   ├── model-convert.md
│   │   ├── others.md
│   │   └── quantization.md
│   ├── dependabot.yml
│   ├── labeler.yml
│   └── workflows/
│       ├── android.yml
│       ├── code-format-msg.yml
│       ├── code-format.yml
│       ├── codeql-analysis.yml
│       ├── compare-binary-size-pr-comment.yml
│       ├── compare-binary-size.yml
│       ├── elf-riscv32.yml
│       ├── elf-riscv64.yml
│       ├── esp32.yml
│       ├── harmonyos.yml
│       ├── ios.yml
│       ├── labeler.yml
│       ├── linux-aarch64.yml
│       ├── linux-arm.yml
│       ├── linux-loongarch64.yml
│       ├── linux-mips.yml
│       ├── linux-mips64.yml
│       ├── linux-ppc64.yml
│       ├── linux-riscv32.yml
│       ├── linux-riscv64.yml
│       ├── linux-x64-cpu-clang.yml
│       ├── linux-x64-cpu-gcc-musl.yml
│       ├── linux-x64-cpu-gcc.yml
│       ├── linux-x64-gpu-clang.yml
│       ├── linux-x64-gpu-gcc.yml
│       ├── linux-x64-sde.yml
│       ├── linux-x86-cpu-clang.yml
│       ├── linux-x86-cpu-gcc.yml
│       ├── mac-catalyst.yml
│       ├── macos.yml
│       ├── pnnx.yml
│       ├── python.yml
│       ├── release-python.yml
│       ├── release.yml
│       ├── sync-wiki.yml
│       ├── test-coverage.yml
│       ├── tvos.yml
│       ├── visionos.yml
│       ├── watchos.yml
│       ├── web-assembly.yml
│       ├── windows-arm.yml
│       ├── windows-clang.yml
│       ├── windows-mingw.yml
│       ├── windows-xp.yml
│       └── windows.yml
├── .gitignore
├── .gitmodules
├── CITATION.cff
├── CMakeLists.txt
├── CONTRIBUTING.md
├── Info.plist
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── benchmark/
│   ├── CMakeLists.txt
│   ├── FastestDet.param
│   ├── README.md
│   ├── RankCards/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── Rcards.h
│   │   └── main.cpp
│   ├── alexnet.param
│   ├── benchncnn.cpp
│   ├── benchncnn_param_data.h.in
│   ├── blazeface.param
│   ├── efficientnet_b0.param
│   ├── efficientnetv2_b0.param
│   ├── googlenet.param
│   ├── googlenet_int8.param
│   ├── mnasnet.param
│   ├── mobilenet.param
│   ├── mobilenet_int8.param
│   ├── mobilenet_ssd.param
│   ├── mobilenet_ssd_int8.param
│   ├── mobilenet_v2.param
│   ├── mobilenet_v3.param
│   ├── mobilenet_yolo.param
│   ├── mobilenetv2_yolov3.param
│   ├── nanodet_m.param
│   ├── proxylessnasnet.param
│   ├── regnety_400m.param
│   ├── resnet18.param
│   ├── resnet18_int8.param
│   ├── resnet50.param
│   ├── resnet50_int8.param
│   ├── shufflenet.param
│   ├── shufflenet_v2.param
│   ├── squeezenet.param
│   ├── squeezenet_int8.param
│   ├── squeezenet_ssd.param
│   ├── squeezenet_ssd_int8.param
│   ├── vgg16.param
│   ├── vgg16_int8.param
│   ├── vision_transformer.param
│   ├── yolo-fastest-1.1.param
│   ├── yolo-fastestv2.param
│   └── yolov4-tiny.param
├── build-android.cmd
├── build.sh
├── cmake/
│   ├── ncnnConfig.cmake.in
│   ├── ncnn_add_layer.cmake
│   ├── ncnn_add_param.cmake
│   ├── ncnn_add_shader.cmake
│   ├── ncnn_generate_avx512_source.cmake
│   ├── ncnn_generate_avx_source.cmake
│   ├── ncnn_generate_fma_source.cmake
│   ├── ncnn_generate_lasx_source.cmake
│   ├── ncnn_generate_lsx_source.cmake
│   ├── ncnn_generate_msa_source.cmake
│   ├── ncnn_generate_param_header.cmake
│   ├── ncnn_generate_rvv_source.cmake
│   ├── ncnn_generate_shader_comp_header.cmake
│   ├── ncnn_generate_xtheadvector_source.cmake
│   └── run_test.cmake
├── codeformat.sh
├── docs/
│   ├── Home.md
│   ├── application-with-ncnn-inside.md
│   ├── benchmark/
│   │   ├── the-benchmark-of-caffe-android-lib,-mini-caffe,-and-ncnn.md
│   │   └── vulkan-conformance-test.md
│   ├── developer-guide/
│   │   ├── aarch64-mix-assembly-and-intrinsic.md
│   │   ├── add-custom-layer.zh.md
│   │   ├── arm-a53-a55-dual-issue.md
│   │   ├── armv7-mix-assembly-and-intrinsic.md
│   │   ├── binaryop-broadcasting.md
│   │   ├── build-ncnn-on-windows-xp.zh.md
│   │   ├── custom-allocator.md
│   │   ├── element-packing.md
│   │   ├── expression.md
│   │   ├── glsl-extension.md
│   │   ├── glsl-extension.zh.md
│   │   ├── how-to-be-a-contributor.zh.md
│   │   ├── how-to-implement-custom-layer-step-by-step.md
│   │   ├── how-to-write-a-neon-optimized-op-kernel.md
│   │   ├── how-to-write-a-sse-optimized-op-kernel.zh.md
│   │   ├── kvcache.md
│   │   ├── layer-feat-mask.md
│   │   ├── layer-support-behavior.md
│   │   ├── low-level-operation-api.md
│   │   ├── ncnn-tips-and-tricks.zh.md
│   │   ├── new-model-load-api.md
│   │   ├── new-param-load-api.md
│   │   ├── operation-param-weight-table.md
│   │   ├── operators.md
│   │   ├── param-and-model-file-structure.md
│   │   ├── preload-practice.zh.md
│   │   ├── tensorflow-op-combination.md
│   │   └── vulkan-driver-loader.md
│   ├── faq.en.md
│   ├── faq.md
│   ├── how-to-build/
│   │   ├── build-mlir2ncnn.md
│   │   └── how-to-build.md
│   └── how-to-use-and-FAQ/
│       ├── FAQ-ncnn-produce-wrong-result.md
│       ├── FAQ-ncnn-protobuf-problem.zh.md
│       ├── FAQ-ncnn-throw-error.md
│       ├── FAQ-ncnn-vulkan.md
│       ├── build-minimal-library.md
│       ├── efficient-roi-resize-rotate.md
│       ├── ncnn-load-model.md
│       ├── openmp-best-practice.md
│       ├── openmp-best-practice.zh.md
│       ├── quantized-int8-inference.md
│       ├── use-ncnn-with-alexnet.md
│       ├── use-ncnn-with-alexnet.zh.md
│       ├── use-ncnn-with-opencv.md
│       ├── use-ncnn-with-own-project.md
│       ├── use-ncnn-with-pytorch-or-onnx.md
│       ├── use-ncnnoptimize-to-optimize-model.md
│       └── vulkan-notes.md
├── examples/
│   ├── CMakeLists.txt
│   ├── arcface.cpp
│   ├── fasterrcnn.cpp
│   ├── mobilenetssd.cpp
│   ├── mobilenetv2ssdlite.cpp
│   ├── mobilenetv3ssdlite.cpp
│   ├── nanodet.cpp
│   ├── nanodetplus_pnnx.cpp
│   ├── p2pnet.cpp
│   ├── peleenetssd_seg.cpp
│   ├── piper.cpp
│   ├── ppocrv5.cpp
│   ├── ppocrv5_dict.h
│   ├── retinaface.cpp
│   ├── rfcn.cpp
│   ├── rvm.cpp
│   ├── scrfd.cpp
│   ├── scrfd_crowdhuman.cpp
│   ├── shufflenetv2.cpp
│   ├── simplepose.cpp
│   ├── squeezencnn/
│   │   └── README.md
│   ├── squeezenet.cpp
│   ├── squeezenet_c_api.cpp
│   ├── squeezenet_v1.1.caffemodel
│   ├── squeezenet_v1.1.param
│   ├── squeezenet_v1.1.prototxt
│   ├── squeezenetssd.cpp
│   ├── synset_words.txt
│   ├── whisper.cpp
│   ├── yolact.cpp
│   ├── yolo11.cpp
│   ├── yolo11_cls.cpp
│   ├── yolo11_obb.cpp
│   ├── yolo11_pose.cpp
│   ├── yolo11_seg.cpp
│   ├── yolov2.cpp
│   ├── yolov3.cpp
│   ├── yolov4.cpp
│   ├── yolov5.cpp
│   ├── yolov5_pnnx.cpp
│   ├── yolov7.cpp
│   ├── yolov7_pnnx.cpp
│   ├── yolov8.cpp
│   ├── yolov8_cls.cpp
│   ├── yolov8_obb.cpp
│   ├── yolov8_pose.cpp
│   ├── yolov8_seg.cpp
│   ├── yoloworld.cpp
│   └── yolox.cpp
├── package.sh
├── pyproject.toml
├── python/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── examples/
│   │   ├── fasterrcnn.py
│   │   ├── mobilenetssd.py
│   │   ├── mobilenetv2ssdlite.py
│   │   ├── mobilenetv3ssdlite.py
│   │   ├── model_zoo.py
│   │   ├── nanodet.py
│   │   ├── peleenetssd.py
│   │   ├── retinaface.py
│   │   ├── rfcn.py
│   │   ├── shufflenetv2.py
│   │   ├── simplepose.py
│   │   ├── squeezenet.py
│   │   ├── squeezenetssd.py
│   │   ├── yolact.py
│   │   ├── yolov2.py
│   │   ├── yolov3.py
│   │   ├── yolov4.py
│   │   ├── yolov5.py
│   │   └── yolov8.py
│   ├── ncnn/
│   │   ├── __init__.py
│   │   ├── model_zoo/
│   │   │   ├── __init__.py
│   │   │   ├── fasterrcnn.py
│   │   │   ├── mobilenetssd.py
│   │   │   ├── mobilenetv2ssdlite.py
│   │   │   ├── mobilenetv3ssdlite.py
│   │   │   ├── model_store.py
│   │   │   ├── model_zoo.py
│   │   │   ├── nanodet.py
│   │   │   ├── peleenetssd.py
│   │   │   ├── retinaface.py
│   │   │   ├── rfcn.py
│   │   │   ├── shufflenetv2.py
│   │   │   ├── simplepose.py
│   │   │   ├── squeezenet.py
│   │   │   ├── squeezenetssd.py
│   │   │   ├── yolact.py
│   │   │   ├── yolov2.py
│   │   │   ├── yolov3.py
│   │   │   ├── yolov4.py
│   │   │   ├── yolov5.py
│   │   │   ├── yolov7.py
│   │   │   └── yolov8.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── download.py
│   │       ├── functional.py
│   │       ├── objects.py
│   │       └── visual.py
│   ├── requirements.txt
│   ├── setup.py.i
│   ├── src/
│   │   ├── main.cpp
│   │   ├── pybind11_allocator.h
│   │   ├── pybind11_bind.h
│   │   ├── pybind11_datareader.h
│   │   ├── pybind11_layer.h
│   │   ├── pybind11_mat.h
│   │   └── pybind11_modelbin.h
│   └── tests/
│       ├── benchmark.py
│       ├── custom_layer.param
│       ├── test.param
│       ├── test_allocator.py
│       ├── test_blob.py
│       ├── test_extractor.py
│       ├── test_mat.py
│       ├── test_net.py
│       ├── test_option.py
│       ├── test_paramdict.py
│       ├── test_vulkan_allocator.py
│       └── test_vulkan_device.py
├── setup.py
├── src/
│   ├── CMakeLists.txt
│   ├── allocator.cpp
│   ├── allocator.h
│   ├── benchmark.cpp
│   ├── benchmark.h
│   ├── blob.cpp
│   ├── blob.h
│   ├── c_api.cpp
│   ├── c_api.h
│   ├── command.cpp
│   ├── command.h
│   ├── convert_ycbcr.comp
│   ├── cpu.cpp
│   ├── cpu.h
│   ├── datareader.cpp
│   ├── datareader.h
│   ├── expression.cpp
│   ├── expression.h
│   ├── gpu.cpp
│   ├── gpu.h
│   ├── layer/
│   │   ├── absval.cpp
│   │   ├── absval.h
│   │   ├── argmax.cpp
│   │   ├── argmax.h
│   │   ├── arm/
│   │   │   ├── absval_arm.cpp
│   │   │   ├── absval_arm.h
│   │   │   ├── arm_activation.h
│   │   │   ├── arm_usability.h
│   │   │   ├── batchnorm_arm.cpp
│   │   │   ├── batchnorm_arm.h
│   │   │   ├── batchnorm_arm_asimdhp.cpp
│   │   │   ├── bias_arm.cpp
│   │   │   ├── bias_arm.h
│   │   │   ├── binaryop_arm.cpp
│   │   │   ├── binaryop_arm.h
│   │   │   ├── binaryop_arm_asimdhp.cpp
│   │   │   ├── cast_arm.cpp
│   │   │   ├── cast_arm.h
│   │   │   ├── cast_arm_bf16.cpp
│   │   │   ├── cast_arm_vfpv4.cpp
│   │   │   ├── cast_bf16.h
│   │   │   ├── cast_fp16.h
│   │   │   ├── clip_arm.cpp
│   │   │   ├── clip_arm.h
│   │   │   ├── clip_arm_asimdhp.cpp
│   │   │   ├── concat_arm.cpp
│   │   │   ├── concat_arm.h
│   │   │   ├── convolution1d_arm.cpp
│   │   │   ├── convolution1d_arm.h
│   │   │   ├── convolution1d_arm_asimdhp.cpp
│   │   │   ├── convolution1d_packed.h
│   │   │   ├── convolution1d_packed_bf16s.h
│   │   │   ├── convolution1d_packed_fp16s.h
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_2x2.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_int8.h
│   │   │   ├── convolution_3x3_pack1to4.h
│   │   │   ├── convolution_3x3_pack1to4_bf16s.h
│   │   │   ├── convolution_3x3_pack1to4_fp16s.h
│   │   │   ├── convolution_3x3_pack1to8_fp16s.h
│   │   │   ├── convolution_3x3_pack4.h
│   │   │   ├── convolution_3x3_pack4_bf16s.h
│   │   │   ├── convolution_3x3_pack4_fp16s.h
│   │   │   ├── convolution_3x3_pack4to1.h
│   │   │   ├── convolution_3x3_pack8_fp16s.h
│   │   │   ├── convolution_3x3_winograd.h
│   │   │   ├── convolution_3x3_winograd_bf16s.h
│   │   │   ├── convolution_3x3_winograd_fp16s.h
│   │   │   ├── convolution_3x3_winograd_int8.h
│   │   │   ├── convolution_4x4.h
│   │   │   ├── convolution_5x5.h
│   │   │   ├── convolution_5x5_pack4.h
│   │   │   ├── convolution_5x5_pack4_bf16s.h
│   │   │   ├── convolution_5x5_pack8_fp16s.h
│   │   │   ├── convolution_7x7.h
│   │   │   ├── convolution_7x7_pack1to4.h
│   │   │   ├── convolution_7x7_pack1to4_bf16s.h
│   │   │   ├── convolution_7x7_pack1to8_fp16s.h
│   │   │   ├── convolution_arm.cpp
│   │   │   ├── convolution_arm.h
│   │   │   ├── convolution_arm_asimddp.cpp
│   │   │   ├── convolution_arm_asimdhp.cpp
│   │   │   ├── convolution_arm_i8mm.cpp
│   │   │   ├── convolution_im2col_gemm.h
│   │   │   ├── convolution_im2col_gemm_bf16s.h
│   │   │   ├── convolution_im2col_gemm_bf16s_fp16s.h
│   │   │   ├── convolution_im2col_gemm_fp16s.h
│   │   │   ├── convolution_im2col_gemm_int8.h
│   │   │   ├── convolution_packed.h
│   │   │   ├── convolution_packed_bf16s.h
│   │   │   ├── convolution_packed_fp16s.h
│   │   │   ├── convolution_packed_int8.h
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_fp16s.h
│   │   │   ├── convolutiondepthwise_3x3_int8.h
│   │   │   ├── convolutiondepthwise_3x3_pack4.h
│   │   │   ├── convolutiondepthwise_3x3_pack4_bf16s.h
│   │   │   ├── convolutiondepthwise_3x3_pack8_fp16s.h
│   │   │   ├── convolutiondepthwise_3x3_pack8_int8.h
│   │   │   ├── convolutiondepthwise_5x5.h
│   │   │   ├── convolutiondepthwise_5x5_pack4.h
│   │   │   ├── convolutiondepthwise_5x5_pack4_bf16s.h
│   │   │   ├── convolutiondepthwise_5x5_pack8_fp16s.h
│   │   │   ├── convolutiondepthwise_arm.cpp
│   │   │   ├── convolutiondepthwise_arm.h
│   │   │   ├── convolutiondepthwise_arm_asimdhp.cpp
│   │   │   ├── crop_arm.cpp
│   │   │   ├── crop_arm.h
│   │   │   ├── deconvolution_3x3.h
│   │   │   ├── deconvolution_4x4.h
│   │   │   ├── deconvolution_4x4_fp16s.h
│   │   │   ├── deconvolution_arm.cpp
│   │   │   ├── deconvolution_arm.h
│   │   │   ├── deconvolution_arm_asimdhp.cpp
│   │   │   ├── deconvolutiondepthwise_arm.cpp
│   │   │   ├── deconvolutiondepthwise_arm.h
│   │   │   ├── deconvolutiondepthwise_arm_asimdhp.cpp
│   │   │   ├── dequantize_arm.cpp
│   │   │   ├── dequantize_arm.h
│   │   │   ├── dequantize_arm_asimdhp.cpp
│   │   │   ├── dropout_arm.cpp
│   │   │   ├── dropout_arm.h
│   │   │   ├── eltwise_arm.cpp
│   │   │   ├── eltwise_arm.h
│   │   │   ├── eltwise_arm_asimdhp.cpp
│   │   │   ├── flatten_arm.cpp
│   │   │   ├── flatten_arm.h
│   │   │   ├── gelu_arm.cpp
│   │   │   ├── gelu_arm.h
│   │   │   ├── gelu_arm_asimdhp.cpp
│   │   │   ├── gemm_arm.cpp
│   │   │   ├── gemm_arm.h
│   │   │   ├── gemm_arm_asimddp.cpp
│   │   │   ├── gemm_arm_asimdfhm.cpp
│   │   │   ├── gemm_arm_asimdhp.cpp
│   │   │   ├── gemm_arm_i8mm.cpp
│   │   │   ├── gemm_arm_vfpv4.cpp
│   │   │   ├── gemm_bf16s.h
│   │   │   ├── gemm_bf16s_fp16s.h
│   │   │   ├── gemm_fp16s.h
│   │   │   ├── gemm_int8.h
│   │   │   ├── gemm_int8_bf16s.h
│   │   │   ├── gemm_int8_fp16s.h
│   │   │   ├── groupnorm_arm.cpp
│   │   │   ├── groupnorm_arm.h
│   │   │   ├── groupnorm_arm_asimdhp.cpp
│   │   │   ├── gru_arm.cpp
│   │   │   ├── gru_arm.h
│   │   │   ├── gru_arm_asimddp.cpp
│   │   │   ├── gru_arm_asimdhp.cpp
│   │   │   ├── gru_arm_vfpv4.cpp
│   │   │   ├── gru_int8.h
│   │   │   ├── hardsigmoid_arm.cpp
│   │   │   ├── hardsigmoid_arm.h
│   │   │   ├── hardsigmoid_arm_asimdhp.cpp
│   │   │   ├── hardswish_arm.cpp
│   │   │   ├── hardswish_arm.h
│   │   │   ├── hardswish_arm_asimdhp.cpp
│   │   │   ├── innerproduct_arm.cpp
│   │   │   ├── innerproduct_arm.h
│   │   │   ├── innerproduct_arm_asimdfhm.cpp
│   │   │   ├── innerproduct_arm_asimdhp.cpp
│   │   │   ├── innerproduct_arm_vfpv4.cpp
│   │   │   ├── innerproduct_fp16s.h
│   │   │   ├── innerproduct_gemm_fp16s.h
│   │   │   ├── instancenorm_arm.cpp
│   │   │   ├── instancenorm_arm.h
│   │   │   ├── instancenorm_arm_asimdhp.cpp
│   │   │   ├── interp_arm.cpp
│   │   │   ├── interp_arm.h
│   │   │   ├── interp_arm_asimdhp.cpp
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_bf16s.h
│   │   │   ├── interp_bicubic_fp16s.h
│   │   │   ├── interp_bicubic_pack4.h
│   │   │   ├── interp_bicubic_pack4_bf16s.h
│   │   │   ├── interp_bicubic_pack4_fp16s.h
│   │   │   ├── interp_bicubic_pack8_fp16s.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_bf16s.h
│   │   │   ├── interp_bilinear_fp16s.h
│   │   │   ├── interp_bilinear_pack4.h
│   │   │   ├── interp_bilinear_pack4_bf16s.h
│   │   │   ├── interp_bilinear_pack4_fp16s.h
│   │   │   ├── interp_bilinear_pack8_fp16s.h
│   │   │   ├── layernorm_arm.cpp
│   │   │   ├── layernorm_arm.h
│   │   │   ├── layernorm_arm_asimdhp.cpp
│   │   │   ├── lrn_arm.cpp
│   │   │   ├── lrn_arm.h
│   │   │   ├── lstm_arm.cpp
│   │   │   ├── lstm_arm.h
│   │   │   ├── lstm_arm_asimddp.cpp
│   │   │   ├── lstm_arm_asimdhp.cpp
│   │   │   ├── lstm_arm_vfpv4.cpp
│   │   │   ├── lstm_int8.h
│   │   │   ├── matmul_arm.cpp
│   │   │   ├── matmul_arm.h
│   │   │   ├── mish_arm.cpp
│   │   │   ├── mish_arm.h
│   │   │   ├── mish_arm_asimdhp.cpp
│   │   │   ├── multiheadattention_arm.cpp
│   │   │   ├── multiheadattention_arm.h
│   │   │   ├── neon_mathfun.h
│   │   │   ├── neon_mathfun_fp16s.h
│   │   │   ├── neon_mathfun_tanh.h
│   │   │   ├── packing_arm.cpp
│   │   │   ├── packing_arm.h
│   │   │   ├── padding_arm.cpp
│   │   │   ├── padding_arm.h
│   │   │   ├── padding_pack4.h
│   │   │   ├── padding_pack4_bf16s_fp16s.h
│   │   │   ├── padding_pack8_fp16s.h
│   │   │   ├── padding_pack8_int8.h
│   │   │   ├── pixelshuffle_arm.cpp
│   │   │   ├── pixelshuffle_arm.h
│   │   │   ├── pooling_2x2.h
│   │   │   ├── pooling_2x2_pack4.h
│   │   │   ├── pooling_2x2_pack4_bf16s.h
│   │   │   ├── pooling_3x3.h
│   │   │   ├── pooling_3x3_pack4.h
│   │   │   ├── pooling_3x3_pack4_bf16s.h
│   │   │   ├── pooling_arm.cpp
│   │   │   ├── pooling_arm.h
│   │   │   ├── pooling_arm_asimdhp.cpp
│   │   │   ├── prelu_arm.cpp
│   │   │   ├── prelu_arm.h
│   │   │   ├── prelu_arm_asimdhp.cpp
│   │   │   ├── quantize_arm.cpp
│   │   │   ├── quantize_arm.h
│   │   │   ├── quantize_arm_asimdhp.cpp
│   │   │   ├── relu_arm.cpp
│   │   │   ├── relu_arm.h
│   │   │   ├── relu_arm_asimdhp.cpp
│   │   │   ├── requantize_arm.cpp
│   │   │   ├── requantize_arm.h
│   │   │   ├── reshape_arm.cpp
│   │   │   ├── reshape_arm.h
│   │   │   ├── rmsnorm_arm.cpp
│   │   │   ├── rmsnorm_arm.h
│   │   │   ├── rmsnorm_arm_asimdhp.cpp
│   │   │   ├── rnn_arm.cpp
│   │   │   ├── rnn_arm.h
│   │   │   ├── rnn_arm_asimddp.cpp
│   │   │   ├── rnn_arm_asimdhp.cpp
│   │   │   ├── rnn_arm_vfpv4.cpp
│   │   │   ├── rnn_int8.h
│   │   │   ├── scale_arm.cpp
│   │   │   ├── scale_arm.h
│   │   │   ├── selu_arm.cpp
│   │   │   ├── selu_arm.h
│   │   │   ├── shufflechannel_arm.cpp
│   │   │   ├── shufflechannel_arm.h
│   │   │   ├── sigmoid_arm.cpp
│   │   │   ├── sigmoid_arm.h
│   │   │   ├── sigmoid_arm_asimdhp.cpp
│   │   │   ├── slice_arm.cpp
│   │   │   ├── slice_arm.h
│   │   │   ├── softmax_arm.cpp
│   │   │   ├── softmax_arm.h
│   │   │   ├── softmax_arm_asimdhp.cpp
│   │   │   ├── swish_arm.cpp
│   │   │   ├── swish_arm.h
│   │   │   ├── swish_arm_asimdhp.cpp
│   │   │   ├── tanh_arm.cpp
│   │   │   ├── tanh_arm.h
│   │   │   ├── tanh_arm_asimdhp.cpp
│   │   │   ├── unaryop_arm.cpp
│   │   │   ├── unaryop_arm.h
│   │   │   └── unaryop_arm_asimdhp.cpp
│   │   ├── batchnorm.cpp
│   │   ├── batchnorm.h
│   │   ├── bias.cpp
│   │   ├── bias.h
│   │   ├── binaryop.cpp
│   │   ├── binaryop.h
│   │   ├── bnll.cpp
│   │   ├── bnll.h
│   │   ├── cast.cpp
│   │   ├── cast.h
│   │   ├── celu.cpp
│   │   ├── celu.h
│   │   ├── clip.cpp
│   │   ├── clip.h
│   │   ├── concat.cpp
│   │   ├── concat.h
│   │   ├── convolution.cpp
│   │   ├── convolution.h
│   │   ├── convolution1d.cpp
│   │   ├── convolution1d.h
│   │   ├── convolution3d.cpp
│   │   ├── convolution3d.h
│   │   ├── convolutiondepthwise.cpp
│   │   ├── convolutiondepthwise.h
│   │   ├── convolutiondepthwise1d.cpp
│   │   ├── convolutiondepthwise1d.h
│   │   ├── convolutiondepthwise3d.cpp
│   │   ├── convolutiondepthwise3d.h
│   │   ├── copyto.cpp
│   │   ├── copyto.h
│   │   ├── crop.cpp
│   │   ├── crop.h
│   │   ├── cumulativesum.cpp
│   │   ├── cumulativesum.h
│   │   ├── deconvolution.cpp
│   │   ├── deconvolution.h
│   │   ├── deconvolution1d.cpp
│   │   ├── deconvolution1d.h
│   │   ├── deconvolution3d.cpp
│   │   ├── deconvolution3d.h
│   │   ├── deconvolutiondepthwise.cpp
│   │   ├── deconvolutiondepthwise.h
│   │   ├── deconvolutiondepthwise1d.cpp
│   │   ├── deconvolutiondepthwise1d.h
│   │   ├── deconvolutiondepthwise3d.cpp
│   │   ├── deconvolutiondepthwise3d.h
│   │   ├── deepcopy.cpp
│   │   ├── deepcopy.h
│   │   ├── deformableconv2d.cpp
│   │   ├── deformableconv2d.h
│   │   ├── dequantize.cpp
│   │   ├── dequantize.h
│   │   ├── detectionoutput.cpp
│   │   ├── detectionoutput.h
│   │   ├── diag.cpp
│   │   ├── diag.h
│   │   ├── dropout.cpp
│   │   ├── dropout.h
│   │   ├── einsum.cpp
│   │   ├── einsum.h
│   │   ├── eltwise.cpp
│   │   ├── eltwise.h
│   │   ├── elu.cpp
│   │   ├── elu.h
│   │   ├── embed.cpp
│   │   ├── embed.h
│   │   ├── erf.cpp
│   │   ├── erf.h
│   │   ├── exp.cpp
│   │   ├── exp.h
│   │   ├── expanddims.cpp
│   │   ├── expanddims.h
│   │   ├── flatten.cpp
│   │   ├── flatten.h
│   │   ├── flip.cpp
│   │   ├── flip.h
│   │   ├── fold.cpp
│   │   ├── fold.h
│   │   ├── fused_activation.h
│   │   ├── gelu.cpp
│   │   ├── gelu.h
│   │   ├── gemm.cpp
│   │   ├── gemm.h
│   │   ├── glu.cpp
│   │   ├── glu.h
│   │   ├── gridsample.cpp
│   │   ├── gridsample.h
│   │   ├── groupnorm.cpp
│   │   ├── groupnorm.h
│   │   ├── gru.cpp
│   │   ├── gru.h
│   │   ├── hardsigmoid.cpp
│   │   ├── hardsigmoid.h
│   │   ├── hardswish.cpp
│   │   ├── hardswish.h
│   │   ├── innerproduct.cpp
│   │   ├── innerproduct.h
│   │   ├── input.cpp
│   │   ├── input.h
│   │   ├── instancenorm.cpp
│   │   ├── instancenorm.h
│   │   ├── interp.cpp
│   │   ├── interp.h
│   │   ├── inversespectrogram.cpp
│   │   ├── inversespectrogram.h
│   │   ├── layernorm.cpp
│   │   ├── layernorm.h
│   │   ├── log.cpp
│   │   ├── log.h
│   │   ├── loongarch/
│   │   │   ├── absval_loongarch.cpp
│   │   │   ├── absval_loongarch.h
│   │   │   ├── batchnorm_loongarch.cpp
│   │   │   ├── batchnorm_loongarch.h
│   │   │   ├── bias_loongarch.cpp
│   │   │   ├── bias_loongarch.h
│   │   │   ├── binaryop_loongarch.cpp
│   │   │   ├── binaryop_loongarch.h
│   │   │   ├── cast_loongarch.cpp
│   │   │   ├── cast_loongarch.h
│   │   │   ├── clip_loongarch.cpp
│   │   │   ├── clip_loongarch.h
│   │   │   ├── concat_loongarch.cpp
│   │   │   ├── concat_loongarch.h
│   │   │   ├── convolution1d_loongarch.cpp
│   │   │   ├── convolution1d_loongarch.h
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_1x1_int8.h
│   │   │   ├── convolution_1x1_pack1to4_int8.h
│   │   │   ├── convolution_1x1_pack4.h
│   │   │   ├── convolution_1x1_pack4to1.h
│   │   │   ├── convolution_1x1_pack8to1_int8.h
│   │   │   ├── convolution_1x1_pack8to4_int8.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_int8.h
│   │   │   ├── convolution_3x3_pack1to4.h
│   │   │   ├── convolution_3x3_pack4.h
│   │   │   ├── convolution_3x3_pack8to1_int8.h
│   │   │   ├── convolution_3x3_pack8to4_int8.h
│   │   │   ├── convolution_7x7_pack1to4.h
│   │   │   ├── convolution_int8.h
│   │   │   ├── convolution_loongarch.cpp
│   │   │   ├── convolution_loongarch.h
│   │   │   ├── convolution_pack1to4.h
│   │   │   ├── convolution_pack1to4_int8.h
│   │   │   ├── convolution_pack4.h
│   │   │   ├── convolution_pack4to1.h
│   │   │   ├── convolution_pack8to1_int8.h
│   │   │   ├── convolution_pack8to4_int8.h
│   │   │   ├── convolution_sgemm.h
│   │   │   ├── convolution_sgemm_int8.h
│   │   │   ├── convolution_sgemm_pack1to4_int8.h
│   │   │   ├── convolution_sgemm_pack4.h
│   │   │   ├── convolution_sgemm_pack4to1.h
│   │   │   ├── convolution_sgemm_pack8to1_int8.h
│   │   │   ├── convolution_sgemm_pack8to4_int8.h
│   │   │   ├── convolution_winograd_dot.h
│   │   │   ├── convolution_winograd_dot_int8.h
│   │   │   ├── convolution_winograd_dot_pack4.h
│   │   │   ├── convolution_winograd_dot_pack8to1_int8.h
│   │   │   ├── convolution_winograd_dot_pack8to4_int8.h
│   │   │   ├── convolution_winograd_transform.h
│   │   │   ├── convolution_winograd_transform_int8.h
│   │   │   ├── convolution_winograd_transform_pack4.h
│   │   │   ├── convolution_winograd_transform_pack4_int8.h
│   │   │   ├── convolution_winograd_transform_pack8_int8.h
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_pack4.h
│   │   │   ├── convolutiondepthwise_5x5_pack4.h
│   │   │   ├── convolutiondepthwise_loongarch.cpp
│   │   │   ├── convolutiondepthwise_loongarch.h
│   │   │   ├── crop_loongarch.cpp
│   │   │   ├── crop_loongarch.h
│   │   │   ├── deconvolution_loongarch.cpp
│   │   │   ├── deconvolution_loongarch.h
│   │   │   ├── deconvolution_pack1to4.h
│   │   │   ├── deconvolution_pack4.h
│   │   │   ├── deconvolution_pack4to1.h
│   │   │   ├── deconvolutiondepthwise_loongarch.cpp
│   │   │   ├── deconvolutiondepthwise_loongarch.h
│   │   │   ├── dequantize_loongarch.cpp
│   │   │   ├── dequantize_loongarch.h
│   │   │   ├── dropout_loongarch.cpp
│   │   │   ├── dropout_loongarch.h
│   │   │   ├── eltwise_loongarch.cpp
│   │   │   ├── eltwise_loongarch.h
│   │   │   ├── flatten_loongarch.cpp
│   │   │   ├── flatten_loongarch.h
│   │   │   ├── hardsigmoid_loongarch.cpp
│   │   │   ├── hardsigmoid_loongarch.h
│   │   │   ├── hardswish_loongarch.cpp
│   │   │   ├── hardswish_loongarch.h
│   │   │   ├── innerproduct_loongarch.cpp
│   │   │   ├── innerproduct_loongarch.h
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_pack4.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_pack4.h
│   │   │   ├── interp_loongarch.cpp
│   │   │   ├── interp_loongarch.h
│   │   │   ├── lasx_mathfun.h
│   │   │   ├── loongarch_activation.h
│   │   │   ├── loongarch_usability.h
│   │   │   ├── lsx_mathfun.h
│   │   │   ├── mish_loongarch.cpp
│   │   │   ├── mish_loongarch.h
│   │   │   ├── packing_loongarch.cpp
│   │   │   ├── packing_loongarch.h
│   │   │   ├── padding_loongarch.cpp
│   │   │   ├── padding_loongarch.h
│   │   │   ├── padding_pack4.h
│   │   │   ├── padding_pack8_int8.h
│   │   │   ├── pooling_loongarch.cpp
│   │   │   ├── pooling_loongarch.h
│   │   │   ├── prelu_loongarch.cpp
│   │   │   ├── prelu_loongarch.h
│   │   │   ├── quantize_loongarch.cpp
│   │   │   ├── quantize_loongarch.h
│   │   │   ├── relu_loongarch.cpp
│   │   │   ├── relu_loongarch.h
│   │   │   ├── requantize_loongarch.cpp
│   │   │   ├── requantize_loongarch.h
│   │   │   ├── sigmoid_loongarch.cpp
│   │   │   ├── sigmoid_loongarch.h
│   │   │   ├── slice_loongarch.cpp
│   │   │   ├── slice_loongarch.h
│   │   │   ├── softmax_loongarch.cpp
│   │   │   ├── softmax_loongarch.h
│   │   │   ├── swish_loongarch.cpp
│   │   │   ├── swish_loongarch.h
│   │   │   ├── tanh_loongarch.cpp
│   │   │   ├── tanh_loongarch.h
│   │   │   ├── unaryop_loongarch.cpp
│   │   │   └── unaryop_loongarch.h
│   │   ├── lrn.cpp
│   │   ├── lrn.h
│   │   ├── lstm.cpp
│   │   ├── lstm.h
│   │   ├── matmul.cpp
│   │   ├── matmul.h
│   │   ├── memorydata.cpp
│   │   ├── memorydata.h
│   │   ├── mips/
│   │   │   ├── absval_mips.cpp
│   │   │   ├── absval_mips.h
│   │   │   ├── batchnorm_mips.cpp
│   │   │   ├── batchnorm_mips.h
│   │   │   ├── bias_mips.cpp
│   │   │   ├── bias_mips.h
│   │   │   ├── binaryop_mips.cpp
│   │   │   ├── binaryop_mips.h
│   │   │   ├── cast_mips.cpp
│   │   │   ├── cast_mips.h
│   │   │   ├── clip_mips.cpp
│   │   │   ├── clip_mips.h
│   │   │   ├── concat_mips.cpp
│   │   │   ├── concat_mips.h
│   │   │   ├── convolution1d_mips.cpp
│   │   │   ├── convolution1d_mips.h
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_1x1_int8.h
│   │   │   ├── convolution_1x1_pack1to4_int8.h
│   │   │   ├── convolution_1x1_pack4.h
│   │   │   ├── convolution_1x1_pack4to1.h
│   │   │   ├── convolution_1x1_pack8to1_int8.h
│   │   │   ├── convolution_1x1_pack8to4_int8.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_int8.h
│   │   │   ├── convolution_3x3_pack1to4.h
│   │   │   ├── convolution_3x3_pack4.h
│   │   │   ├── convolution_3x3_pack8to1_int8.h
│   │   │   ├── convolution_3x3_pack8to4_int8.h
│   │   │   ├── convolution_7x7_pack1to4.h
│   │   │   ├── convolution_int8.h
│   │   │   ├── convolution_mips.cpp
│   │   │   ├── convolution_mips.h
│   │   │   ├── convolution_mips_mmi.cpp
│   │   │   ├── convolution_pack1to4.h
│   │   │   ├── convolution_pack1to4_int8.h
│   │   │   ├── convolution_pack4.h
│   │   │   ├── convolution_pack4to1.h
│   │   │   ├── convolution_pack8to1_int8.h
│   │   │   ├── convolution_pack8to4_int8.h
│   │   │   ├── convolution_sgemm.h
│   │   │   ├── convolution_sgemm_int8.h
│   │   │   ├── convolution_sgemm_pack1to4_int8.h
│   │   │   ├── convolution_sgemm_pack4.h
│   │   │   ├── convolution_sgemm_pack4to1.h
│   │   │   ├── convolution_sgemm_pack8to1_int8.h
│   │   │   ├── convolution_sgemm_pack8to4_int8.h
│   │   │   ├── convolution_winograd_dot.h
│   │   │   ├── convolution_winograd_dot_int8.h
│   │   │   ├── convolution_winograd_dot_pack4.h
│   │   │   ├── convolution_winograd_dot_pack8to1_int8.h
│   │   │   ├── convolution_winograd_dot_pack8to4_int8.h
│   │   │   ├── convolution_winograd_transform.h
│   │   │   ├── convolution_winograd_transform_int8.h
│   │   │   ├── convolution_winograd_transform_pack4.h
│   │   │   ├── convolution_winograd_transform_pack4_int8.h
│   │   │   ├── convolution_winograd_transform_pack8_int8.h
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_pack4.h
│   │   │   ├── convolutiondepthwise_5x5_pack4.h
│   │   │   ├── convolutiondepthwise_mips.cpp
│   │   │   ├── convolutiondepthwise_mips.h
│   │   │   ├── crop_mips.cpp
│   │   │   ├── crop_mips.h
│   │   │   ├── deconvolution_mips.cpp
│   │   │   ├── deconvolution_mips.h
│   │   │   ├── deconvolution_pack1to4.h
│   │   │   ├── deconvolution_pack4.h
│   │   │   ├── deconvolution_pack4to1.h
│   │   │   ├── deconvolutiondepthwise_mips.cpp
│   │   │   ├── deconvolutiondepthwise_mips.h
│   │   │   ├── dequantize_mips.cpp
│   │   │   ├── dequantize_mips.h
│   │   │   ├── dropout_mips.cpp
│   │   │   ├── dropout_mips.h
│   │   │   ├── eltwise_mips.cpp
│   │   │   ├── eltwise_mips.h
│   │   │   ├── elu_mips.cpp
│   │   │   ├── elu_mips.h
│   │   │   ├── erf_mips.cpp
│   │   │   ├── erf_mips.h
│   │   │   ├── flatten_mips.cpp
│   │   │   ├── flatten_mips.h
│   │   │   ├── gelu_mips.cpp
│   │   │   ├── gelu_mips.h
│   │   │   ├── hardsigmoid_mips.cpp
│   │   │   ├── hardsigmoid_mips.h
│   │   │   ├── hardswish_mips.cpp
│   │   │   ├── hardswish_mips.h
│   │   │   ├── innerproduct_mips.cpp
│   │   │   ├── innerproduct_mips.h
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_pack4.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_pack4.h
│   │   │   ├── interp_mips.cpp
│   │   │   ├── interp_mips.h
│   │   │   ├── loongson_mmi.h
│   │   │   ├── mips_activation.h
│   │   │   ├── mips_usability.h
│   │   │   ├── mish_mips.cpp
│   │   │   ├── mish_mips.h
│   │   │   ├── msa_mathfun.h
│   │   │   ├── packing_mips.cpp
│   │   │   ├── packing_mips.h
│   │   │   ├── padding_mips.cpp
│   │   │   ├── padding_mips.h
│   │   │   ├── padding_pack4.h
│   │   │   ├── padding_pack8_int8.h
│   │   │   ├── pooling_mips.cpp
│   │   │   ├── pooling_mips.h
│   │   │   ├── prelu_mips.cpp
│   │   │   ├── prelu_mips.h
│   │   │   ├── quantize_mips.cpp
│   │   │   ├── quantize_mips.h
│   │   │   ├── relu_mips.cpp
│   │   │   ├── relu_mips.h
│   │   │   ├── requantize_mips.cpp
│   │   │   ├── requantize_mips.h
│   │   │   ├── selu_mips.cpp
│   │   │   ├── selu_mips.h
│   │   │   ├── sigmoid_mips.cpp
│   │   │   ├── sigmoid_mips.h
│   │   │   ├── slice_mips.cpp
│   │   │   ├── slice_mips.h
│   │   │   ├── softmax_mips.cpp
│   │   │   ├── softmax_mips.h
│   │   │   ├── swish_mips.cpp
│   │   │   ├── swish_mips.h
│   │   │   ├── tanh_mips.cpp
│   │   │   ├── tanh_mips.h
│   │   │   ├── unaryop_mips.cpp
│   │   │   └── unaryop_mips.h
│   │   ├── mish.cpp
│   │   ├── mish.h
│   │   ├── multiheadattention.cpp
│   │   ├── multiheadattention.h
│   │   ├── mvn.cpp
│   │   ├── mvn.h
│   │   ├── noop.cpp
│   │   ├── noop.h
│   │   ├── normalize.cpp
│   │   ├── normalize.h
│   │   ├── packing.cpp
│   │   ├── packing.h
│   │   ├── padding.cpp
│   │   ├── padding.h
│   │   ├── permute.cpp
│   │   ├── permute.h
│   │   ├── pixelshuffle.cpp
│   │   ├── pixelshuffle.h
│   │   ├── pooling.cpp
│   │   ├── pooling.h
│   │   ├── pooling1d.cpp
│   │   ├── pooling1d.h
│   │   ├── pooling3d.cpp
│   │   ├── pooling3d.h
│   │   ├── power.cpp
│   │   ├── power.h
│   │   ├── prelu.cpp
│   │   ├── prelu.h
│   │   ├── priorbox.cpp
│   │   ├── priorbox.h
│   │   ├── proposal.cpp
│   │   ├── proposal.h
│   │   ├── psroipooling.cpp
│   │   ├── psroipooling.h
│   │   ├── quantize.cpp
│   │   ├── quantize.h
│   │   ├── reduction.cpp
│   │   ├── reduction.h
│   │   ├── relu.cpp
│   │   ├── relu.h
│   │   ├── reorg.cpp
│   │   ├── reorg.h
│   │   ├── requantize.cpp
│   │   ├── requantize.h
│   │   ├── reshape.cpp
│   │   ├── reshape.h
│   │   ├── riscv/
│   │   │   ├── absval_riscv.cpp
│   │   │   ├── absval_riscv.h
│   │   │   ├── absval_riscv_zfh.cpp
│   │   │   ├── batchnorm_riscv.cpp
│   │   │   ├── batchnorm_riscv.h
│   │   │   ├── batchnorm_riscv_zfh.cpp
│   │   │   ├── bias_riscv.cpp
│   │   │   ├── bias_riscv.h
│   │   │   ├── bias_riscv_zfh.cpp
│   │   │   ├── binaryop_riscv.cpp
│   │   │   ├── binaryop_riscv.h
│   │   │   ├── binaryop_riscv_zfh.cpp
│   │   │   ├── bnll_riscv.cpp
│   │   │   ├── bnll_riscv.h
│   │   │   ├── bnll_riscv_zfh.cpp
│   │   │   ├── cast_riscv.cpp
│   │   │   ├── cast_riscv.h
│   │   │   ├── cast_riscv_zfh.cpp
│   │   │   ├── celu_riscv.cpp
│   │   │   ├── celu_riscv.h
│   │   │   ├── celu_riscv_zfh.cpp
│   │   │   ├── clip_riscv.cpp
│   │   │   ├── clip_riscv.h
│   │   │   ├── clip_riscv_zfh.cpp
│   │   │   ├── concat_riscv.cpp
│   │   │   ├── concat_riscv.h
│   │   │   ├── convolution1d_riscv.cpp
│   │   │   ├── convolution1d_riscv.h
│   │   │   ├── convolution1d_riscv_zfh.cpp
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_1x1_fp16s.h
│   │   │   ├── convolution_1x1_pack1ton.h
│   │   │   ├── convolution_1x1_pack1ton_fp16s.h
│   │   │   ├── convolution_1x1_packn.h
│   │   │   ├── convolution_1x1_packn_fp16s.h
│   │   │   ├── convolution_1x1_packnto1.h
│   │   │   ├── convolution_1x1_packnto1_fp16s.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_pack1ton.h
│   │   │   ├── convolution_3x3_pack1ton_fp16s.h
│   │   │   ├── convolution_3x3_packn.h
│   │   │   ├── convolution_3x3_packn_fp16s.h
│   │   │   ├── convolution_7x7_pack1ton.h
│   │   │   ├── convolution_7x7_pack1ton_fp16s.h
│   │   │   ├── convolution_fp16s.h
│   │   │   ├── convolution_pack1ton.h
│   │   │   ├── convolution_pack1ton_fp16s.h
│   │   │   ├── convolution_packn.h
│   │   │   ├── convolution_packn_fp16s.h
│   │   │   ├── convolution_packnto1.h
│   │   │   ├── convolution_packnto1_fp16s.h
│   │   │   ├── convolution_riscv.cpp
│   │   │   ├── convolution_riscv.h
│   │   │   ├── convolution_riscv_zfh.cpp
│   │   │   ├── convolution_sgemm.h
│   │   │   ├── convolution_sgemm_fp16s.h
│   │   │   ├── convolution_sgemm_pack1ton.h
│   │   │   ├── convolution_sgemm_pack1ton_fp16s.h
│   │   │   ├── convolution_sgemm_packn.h
│   │   │   ├── convolution_sgemm_packn_fp16s.h
│   │   │   ├── convolution_sgemm_packnto1.h
│   │   │   ├── convolution_sgemm_packnto1_fp16s.h
│   │   │   ├── convolution_winograd_dot.h
│   │   │   ├── convolution_winograd_dot_packn.h
│   │   │   ├── convolution_winograd_dot_packn_fp16s.h
│   │   │   ├── convolution_winograd_transform.h
│   │   │   ├── convolution_winograd_transform_packn.h
│   │   │   ├── convolution_winograd_transform_packn_fp16s.h
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_packn.h
│   │   │   ├── convolutiondepthwise_3x3_packn_fp16s.h
│   │   │   ├── convolutiondepthwise_5x5_packn.h
│   │   │   ├── convolutiondepthwise_5x5_packn_fp16s.h
│   │   │   ├── convolutiondepthwise_riscv.cpp
│   │   │   ├── convolutiondepthwise_riscv.h
│   │   │   ├── convolutiondepthwise_riscv_zfh.cpp
│   │   │   ├── crop_riscv.cpp
│   │   │   ├── crop_riscv.h
│   │   │   ├── deconvolution_fp16s.h
│   │   │   ├── deconvolution_pack1ton.h
│   │   │   ├── deconvolution_pack1ton_fp16s.h
│   │   │   ├── deconvolution_packn.h
│   │   │   ├── deconvolution_packn_fp16s.h
│   │   │   ├── deconvolution_packnto1.h
│   │   │   ├── deconvolution_packnto1_fp16s.h
│   │   │   ├── deconvolution_riscv.cpp
│   │   │   ├── deconvolution_riscv.h
│   │   │   ├── deconvolution_riscv_zfh.cpp
│   │   │   ├── deconvolutiondepthwise_riscv.cpp
│   │   │   ├── deconvolutiondepthwise_riscv.h
│   │   │   ├── deconvolutiondepthwise_riscv_zfh.cpp
│   │   │   ├── deformableconv2d_pack1ton.h
│   │   │   ├── deformableconv2d_packn.h
│   │   │   ├── deformableconv2d_packnto1.h
│   │   │   ├── deformableconv2d_riscv.cpp
│   │   │   ├── deformableconv2d_riscv.h
│   │   │   ├── dropout_riscv.cpp
│   │   │   ├── dropout_riscv.h
│   │   │   ├── eltwise_riscv.cpp
│   │   │   ├── eltwise_riscv.h
│   │   │   ├── eltwise_riscv_zfh.cpp
│   │   │   ├── flatten_riscv.cpp
│   │   │   ├── flatten_riscv.h
│   │   │   ├── gelu_riscv.cpp
│   │   │   ├── gelu_riscv.h
│   │   │   ├── gemm_bf16s_fp16s.h
│   │   │   ├── gemm_fp16s.h
│   │   │   ├── gemm_riscv.cpp
│   │   │   ├── gemm_riscv.h
│   │   │   ├── gemm_riscv_zfh.cpp
│   │   │   ├── gru_riscv.cpp
│   │   │   ├── gru_riscv.h
│   │   │   ├── gru_riscv_zfh.cpp
│   │   │   ├── hardsigmoid_riscv.cpp
│   │   │   ├── hardsigmoid_riscv.h
│   │   │   ├── hardsigmoid_riscv_zfh.cpp
│   │   │   ├── hardswish_riscv.cpp
│   │   │   ├── hardswish_riscv.h
│   │   │   ├── hardswish_riscv_zfh.cpp
│   │   │   ├── innerproduct_riscv.cpp
│   │   │   ├── innerproduct_riscv.h
│   │   │   ├── innerproduct_riscv_zfh.cpp
│   │   │   ├── instancenorm_riscv.cpp
│   │   │   ├── instancenorm_riscv.h
│   │   │   ├── instancenorm_riscv_zfh.cpp
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_fp16s.h
│   │   │   ├── interp_bicubic_packn.h
│   │   │   ├── interp_bicubic_packn_fp16s.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_fp16s.h
│   │   │   ├── interp_bilinear_packn.h
│   │   │   ├── interp_bilinear_packn_fp16s.h
│   │   │   ├── interp_riscv.cpp
│   │   │   ├── interp_riscv.h
│   │   │   ├── interp_riscv_zfh.cpp
│   │   │   ├── layernorm_riscv.cpp
│   │   │   ├── layernorm_riscv.h
│   │   │   ├── layernorm_riscv_zfh.cpp
│   │   │   ├── mish_riscv.cpp
│   │   │   ├── mish_riscv.h
│   │   │   ├── mish_riscv_zfh.cpp
│   │   │   ├── packing_riscv.cpp
│   │   │   ├── packing_riscv.h
│   │   │   ├── padding_packn.h
│   │   │   ├── padding_riscv.cpp
│   │   │   ├── padding_riscv.h
│   │   │   ├── pooling_riscv.cpp
│   │   │   ├── pooling_riscv.h
│   │   │   ├── pooling_riscv_zfh.cpp
│   │   │   ├── prelu_riscv.cpp
│   │   │   ├── prelu_riscv.h
│   │   │   ├── prelu_riscv_zfh.cpp
│   │   │   ├── relu_riscv.cpp
│   │   │   ├── relu_riscv.h
│   │   │   ├── relu_riscv_zfh.cpp
│   │   │   ├── riscv_activation.h
│   │   │   ├── riscv_usability.h
│   │   │   ├── rvv_mathfun.h
│   │   │   ├── rvv_mathfun_fp16s.h
│   │   │   ├── selu_riscv.cpp
│   │   │   ├── selu_riscv.h
│   │   │   ├── shufflechannel_riscv.cpp
│   │   │   ├── shufflechannel_riscv.h
│   │   │   ├── sigmoid_riscv.cpp
│   │   │   ├── sigmoid_riscv.h
│   │   │   ├── sigmoid_riscv_zfh.cpp
│   │   │   ├── softmax_riscv.cpp
│   │   │   ├── softmax_riscv.h
│   │   │   ├── swish_riscv.cpp
│   │   │   ├── swish_riscv.h
│   │   │   ├── swish_riscv_zfh.cpp
│   │   │   ├── tanh_riscv.cpp
│   │   │   ├── tanh_riscv.h
│   │   │   ├── tanh_riscv_zfh.cpp
│   │   │   ├── unaryop_riscv.cpp
│   │   │   ├── unaryop_riscv.h
│   │   │   └── unaryop_riscv_zfh.cpp
│   │   ├── rmsnorm.cpp
│   │   ├── rmsnorm.h
│   │   ├── rnn.cpp
│   │   ├── rnn.h
│   │   ├── roialign.cpp
│   │   ├── roialign.h
│   │   ├── roipooling.cpp
│   │   ├── roipooling.h
│   │   ├── rotaryembed.cpp
│   │   ├── rotaryembed.h
│   │   ├── scale.cpp
│   │   ├── scale.h
│   │   ├── sdpa.cpp
│   │   ├── sdpa.h
│   │   ├── selu.cpp
│   │   ├── selu.h
│   │   ├── shrink.cpp
│   │   ├── shrink.h
│   │   ├── shufflechannel.cpp
│   │   ├── shufflechannel.h
│   │   ├── sigmoid.cpp
│   │   ├── sigmoid.h
│   │   ├── slice.cpp
│   │   ├── slice.h
│   │   ├── softmax.cpp
│   │   ├── softmax.h
│   │   ├── softplus.cpp
│   │   ├── softplus.h
│   │   ├── spectrogram.cpp
│   │   ├── spectrogram.h
│   │   ├── split.cpp
│   │   ├── split.h
│   │   ├── spp.cpp
│   │   ├── spp.h
│   │   ├── squeeze.cpp
│   │   ├── squeeze.h
│   │   ├── statisticspooling.cpp
│   │   ├── statisticspooling.h
│   │   ├── swish.cpp
│   │   ├── swish.h
│   │   ├── tanh.cpp
│   │   ├── tanh.h
│   │   ├── threshold.cpp
│   │   ├── threshold.h
│   │   ├── tile.cpp
│   │   ├── tile.h
│   │   ├── unaryop.cpp
│   │   ├── unaryop.h
│   │   ├── unfold.cpp
│   │   ├── unfold.h
│   │   ├── vulkan/
│   │   │   ├── absval_vulkan.cpp
│   │   │   ├── absval_vulkan.h
│   │   │   ├── batchnorm_vulkan.cpp
│   │   │   ├── batchnorm_vulkan.h
│   │   │   ├── binaryop_vulkan.cpp
│   │   │   ├── binaryop_vulkan.h
│   │   │   ├── cast_vulkan.cpp
│   │   │   ├── cast_vulkan.h
│   │   │   ├── celu_vulkan.cpp
│   │   │   ├── celu_vulkan.h
│   │   │   ├── clip_vulkan.cpp
│   │   │   ├── clip_vulkan.h
│   │   │   ├── concat_vulkan.cpp
│   │   │   ├── concat_vulkan.h
│   │   │   ├── convolution1d_vulkan.cpp
│   │   │   ├── convolution1d_vulkan.h
│   │   │   ├── convolution_vulkan.cpp
│   │   │   ├── convolution_vulkan.h
│   │   │   ├── convolutiondepthwise_vulkan.cpp
│   │   │   ├── convolutiondepthwise_vulkan.h
│   │   │   ├── crop_vulkan.cpp
│   │   │   ├── crop_vulkan.h
│   │   │   ├── deconvolution_vulkan.cpp
│   │   │   ├── deconvolution_vulkan.h
│   │   │   ├── deconvolutiondepthwise_vulkan.cpp
│   │   │   ├── deconvolutiondepthwise_vulkan.h
│   │   │   ├── deepcopy_vulkan.cpp
│   │   │   ├── deepcopy_vulkan.h
│   │   │   ├── dequantize_vulkan.cpp
│   │   │   ├── dequantize_vulkan.h
│   │   │   ├── dropout_vulkan.cpp
│   │   │   ├── dropout_vulkan.h
│   │   │   ├── eltwise_vulkan.cpp
│   │   │   ├── eltwise_vulkan.h
│   │   │   ├── elu_vulkan.cpp
│   │   │   ├── elu_vulkan.h
│   │   │   ├── erf_vulkan.cpp
│   │   │   ├── erf_vulkan.h
│   │   │   ├── flatten_vulkan.cpp
│   │   │   ├── flatten_vulkan.h
│   │   │   ├── gelu_vulkan.cpp
│   │   │   ├── gelu_vulkan.h
│   │   │   ├── gemm_vulkan.cpp
│   │   │   ├── gemm_vulkan.h
│   │   │   ├── groupnorm_vulkan.cpp
│   │   │   ├── groupnorm_vulkan.h
│   │   │   ├── hardsigmoid_vulkan.cpp
│   │   │   ├── hardsigmoid_vulkan.h
│   │   │   ├── hardswish_vulkan.cpp
│   │   │   ├── hardswish_vulkan.h
│   │   │   ├── innerproduct_vulkan.cpp
│   │   │   ├── innerproduct_vulkan.h
│   │   │   ├── instancenorm_vulkan.cpp
│   │   │   ├── instancenorm_vulkan.h
│   │   │   ├── interp_vulkan.cpp
│   │   │   ├── interp_vulkan.h
│   │   │   ├── layernorm_vulkan.cpp
│   │   │   ├── layernorm_vulkan.h
│   │   │   ├── lrn_vulkan.cpp
│   │   │   ├── lrn_vulkan.h
│   │   │   ├── memorydata_vulkan.cpp
│   │   │   ├── memorydata_vulkan.h
│   │   │   ├── mish_vulkan.cpp
│   │   │   ├── mish_vulkan.h
│   │   │   ├── multiheadattention_vulkan.cpp
│   │   │   ├── multiheadattention_vulkan.h
│   │   │   ├── noop_vulkan.cpp
│   │   │   ├── noop_vulkan.h
│   │   │   ├── normalize_vulkan.cpp
│   │   │   ├── normalize_vulkan.h
│   │   │   ├── packing_vulkan.cpp
│   │   │   ├── packing_vulkan.h
│   │   │   ├── padding_vulkan.cpp
│   │   │   ├── padding_vulkan.h
│   │   │   ├── permute_vulkan.cpp
│   │   │   ├── permute_vulkan.h
│   │   │   ├── pixelshuffle_vulkan.cpp
│   │   │   ├── pixelshuffle_vulkan.h
│   │   │   ├── pooling_vulkan.cpp
│   │   │   ├── pooling_vulkan.h
│   │   │   ├── prelu_vulkan.cpp
│   │   │   ├── prelu_vulkan.h
│   │   │   ├── priorbox_vulkan.cpp
│   │   │   ├── priorbox_vulkan.h
│   │   │   ├── quantize_vulkan.cpp
│   │   │   ├── quantize_vulkan.h
│   │   │   ├── reduction_vulkan.cpp
│   │   │   ├── reduction_vulkan.h
│   │   │   ├── relu_vulkan.cpp
│   │   │   ├── relu_vulkan.h
│   │   │   ├── reorg_vulkan.cpp
│   │   │   ├── reorg_vulkan.h
│   │   │   ├── requantize_vulkan.cpp
│   │   │   ├── requantize_vulkan.h
│   │   │   ├── reshape_vulkan.cpp
│   │   │   ├── reshape_vulkan.h
│   │   │   ├── rmsnorm_vulkan.cpp
│   │   │   ├── rmsnorm_vulkan.h
│   │   │   ├── rotaryembed_vulkan.cpp
│   │   │   ├── rotaryembed_vulkan.h
│   │   │   ├── scale_vulkan.cpp
│   │   │   ├── scale_vulkan.h
│   │   │   ├── sdpa_vulkan.cpp
│   │   │   ├── sdpa_vulkan.h
│   │   │   ├── selu_vulkan.cpp
│   │   │   ├── selu_vulkan.h
│   │   │   ├── shader/
│   │   │   │   ├── .clang-format
│   │   │   │   ├── absval.comp
│   │   │   │   ├── batchnorm.comp
│   │   │   │   ├── batchnorm_pack4.comp
│   │   │   │   ├── binaryop.comp
│   │   │   │   ├── binaryop_broadcast.comp
│   │   │   │   ├── binaryop_broadcast_pack1to4.comp
│   │   │   │   ├── binaryop_broadcast_pack4.comp
│   │   │   │   ├── binaryop_pack4.comp
│   │   │   │   ├── cast_fp16_to_fp32.comp
│   │   │   │   ├── cast_fp16_to_fp32_pack4.comp
│   │   │   │   ├── cast_fp32_to_fp16.comp
│   │   │   │   ├── cast_fp32_to_fp16_pack4.comp
│   │   │   │   ├── celu.comp
│   │   │   │   ├── clip.comp
│   │   │   │   ├── concat.comp
│   │   │   │   ├── concat_pack4.comp
│   │   │   │   ├── concat_pack4to1.comp
│   │   │   │   ├── convolution1d_packed.comp
│   │   │   │   ├── convolution_1x1s1d1_cm.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd23_transform_input.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd23_transform_output.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd43_transform_input.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd43_transform_output.comp
│   │   │   │   ├── convolution_3x3s1d1_winograd_gemm.comp
│   │   │   │   ├── convolution_gemm_cm.comp
│   │   │   │   ├── convolution_pack1to4_3x3s1d1_winograd_gemm.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd23_transform_input.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd23_transform_output.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd43_transform_input.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd43_transform_output.comp
│   │   │   │   ├── convolution_pack4_3x3s1d1_winograd_gemm.comp
│   │   │   │   ├── convolution_pack4to1_3x3s1d1_winograd_gemm.comp
│   │   │   │   ├── convolution_packed.comp
│   │   │   │   ├── convolution_packed_1x1s1d1.comp
│   │   │   │   ├── convolution_packed_gemm.comp
│   │   │   │   ├── convolution_winograd_gemm_cm.comp
│   │   │   │   ├── convolutiondepthwise.comp
│   │   │   │   ├── convolutiondepthwise_group.comp
│   │   │   │   ├── convolutiondepthwise_group_pack1to4.comp
│   │   │   │   ├── convolutiondepthwise_group_pack4.comp
│   │   │   │   ├── convolutiondepthwise_group_pack4to1.comp
│   │   │   │   ├── convolutiondepthwise_pack4.comp
│   │   │   │   ├── crop.comp
│   │   │   │   ├── crop_pack1to4.comp
│   │   │   │   ├── crop_pack4.comp
│   │   │   │   ├── crop_pack4to1.comp
│   │   │   │   ├── deconvolution_col2im.comp
│   │   │   │   ├── deconvolution_gemm_cm.comp
│   │   │   │   ├── deconvolution_gemm_packed.comp
│   │   │   │   ├── deconvolution_pack4_col2im.comp
│   │   │   │   ├── deconvolution_packed.comp
│   │   │   │   ├── deconvolutiondepthwise.comp
│   │   │   │   ├── deconvolutiondepthwise_group.comp
│   │   │   │   ├── deconvolutiondepthwise_group_pack1to4.comp
│   │   │   │   ├── deconvolutiondepthwise_group_pack4.comp
│   │   │   │   ├── deconvolutiondepthwise_group_pack4to1.comp
│   │   │   │   ├── deconvolutiondepthwise_pack4.comp
│   │   │   │   ├── deepcopy.comp
│   │   │   │   ├── deepcopy_pack4.comp
│   │   │   │   ├── dequantize.comp
│   │   │   │   ├── dequantize_pack4.comp
│   │   │   │   ├── dropout.comp
│   │   │   │   ├── eltwise.comp
│   │   │   │   ├── elu.comp
│   │   │   │   ├── erf.comp
│   │   │   │   ├── flatten.comp
│   │   │   │   ├── flatten_pack1to4.comp
│   │   │   │   ├── flatten_pack4.comp
│   │   │   │   ├── gelu.comp
│   │   │   │   ├── gemm.comp
│   │   │   │   ├── gemm_cm.comp
│   │   │   │   ├── gemm_sg.comp
│   │   │   │   ├── groupnorm_coeffs.comp
│   │   │   │   ├── groupnorm_coeffs_pack4.comp
│   │   │   │   ├── groupnorm_norm.comp
│   │   │   │   ├── groupnorm_norm_pack4.comp
│   │   │   │   ├── groupnorm_reduce_mean.comp
│   │   │   │   ├── groupnorm_reduce_mean_pack4.comp
│   │   │   │   ├── groupnorm_reduce_sum4_fp16_to_fp32.comp
│   │   │   │   ├── groupnorm_reduce_sum4_fp16_to_fp32_pack4.comp
│   │   │   │   ├── groupnorm_reduce_sum4_fp32.comp
│   │   │   │   ├── groupnorm_reduce_sum4_fp32_pack4.comp
│   │   │   │   ├── groupnorm_sub_mean_square.comp
│   │   │   │   ├── groupnorm_sub_mean_square_pack4.comp
│   │   │   │   ├── hardsigmoid.comp
│   │   │   │   ├── hardswish.comp
│   │   │   │   ├── innerproduct.comp
│   │   │   │   ├── innerproduct_gemm.comp
│   │   │   │   ├── innerproduct_gemm_wp1to4.comp
│   │   │   │   ├── innerproduct_gemm_wp4.comp
│   │   │   │   ├── innerproduct_gemm_wp4to1.comp
│   │   │   │   ├── innerproduct_pack1to4.comp
│   │   │   │   ├── innerproduct_pack4.comp
│   │   │   │   ├── innerproduct_pack4to1.comp
│   │   │   │   ├── innerproduct_reduce_sum8.comp
│   │   │   │   ├── innerproduct_reduce_sum8_pack4.comp
│   │   │   │   ├── innerproduct_sum8.comp
│   │   │   │   ├── innerproduct_sum8_pack1to4.comp
│   │   │   │   ├── innerproduct_sum8_pack4.comp
│   │   │   │   ├── innerproduct_sum8_pack4to1.comp
│   │   │   │   ├── instancenorm_coeffs.comp
│   │   │   │   ├── instancenorm_coeffs_pack4.comp
│   │   │   │   ├── instancenorm_norm.comp
│   │   │   │   ├── instancenorm_norm_pack4.comp
│   │   │   │   ├── instancenorm_reduce_mean.comp
│   │   │   │   ├── instancenorm_reduce_mean_pack4.comp
│   │   │   │   ├── instancenorm_reduce_sum4_fp16_to_fp32.comp
│   │   │   │   ├── instancenorm_reduce_sum4_fp16_to_fp32_pack4.comp
│   │   │   │   ├── instancenorm_reduce_sum4_fp32.comp
│   │   │   │   ├── instancenorm_reduce_sum4_fp32_pack4.comp
│   │   │   │   ├── instancenorm_sub_mean_square.comp
│   │   │   │   ├── instancenorm_sub_mean_square_pack4.comp
│   │   │   │   ├── interp.comp
│   │   │   │   ├── interp_bicubic.comp
│   │   │   │   ├── interp_bicubic_coeffs.comp
│   │   │   │   ├── interp_bicubic_pack4.comp
│   │   │   │   ├── interp_pack4.comp
│   │   │   │   ├── layernorm_coeffs.comp
│   │   │   │   ├── layernorm_coeffs_pack4.comp
│   │   │   │   ├── layernorm_norm.comp
│   │   │   │   ├── layernorm_norm_pack4.comp
│   │   │   │   ├── layernorm_reduce_mean.comp
│   │   │   │   ├── layernorm_reduce_mean_pack4.comp
│   │   │   │   ├── layernorm_reduce_sum4_fp16_to_fp32.comp
│   │   │   │   ├── layernorm_reduce_sum4_fp16_to_fp32_pack4.comp
│   │   │   │   ├── layernorm_reduce_sum4_fp32.comp
│   │   │   │   ├── layernorm_reduce_sum4_fp32_pack4.comp
│   │   │   │   ├── layernorm_sub_mean_square.comp
│   │   │   │   ├── layernorm_sub_mean_square_pack4.comp
│   │   │   │   ├── lrn_norm.comp
│   │   │   │   ├── lrn_norm_across_channel_pack4.comp
│   │   │   │   ├── lrn_norm_within_channel_pack4.comp
│   │   │   │   ├── lrn_square_pad.comp
│   │   │   │   ├── lrn_square_pad_across_channel_pack4.comp
│   │   │   │   ├── lrn_square_pad_within_channel_pack4.comp
│   │   │   │   ├── mish.comp
│   │   │   │   ├── multiheadattention_qk_cross.comp
│   │   │   │   ├── multiheadattention_qk_cross_pack1to4.comp
│   │   │   │   ├── multiheadattention_qk_cross_pack4.comp
│   │   │   │   ├── multiheadattention_qk_cross_pack4to1.comp
│   │   │   │   ├── multiheadattention_qkv_cross.comp
│   │   │   │   ├── multiheadattention_qkv_cross_pack1to4.comp
│   │   │   │   ├── multiheadattention_qkv_cross_pack4.comp
│   │   │   │   ├── multiheadattention_qkv_cross_pack4to1.comp
│   │   │   │   ├── normalize_coeffs.comp
│   │   │   │   ├── normalize_coeffs_pack4.comp
│   │   │   │   ├── normalize_norm.comp
│   │   │   │   ├── normalize_norm_pack4.comp
│   │   │   │   ├── normalize_reduce_sum4_fp16_to_fp32.comp
│   │   │   │   ├── normalize_reduce_sum4_fp16_to_fp32_pack4.comp
│   │   │   │   ├── normalize_reduce_sum4_fp32.comp
│   │   │   │   ├── normalize_reduce_sum4_fp32_pack4.comp
│   │   │   │   ├── packing.comp
│   │   │   │   ├── packing_int8.comp
│   │   │   │   ├── packing_pack1to4.comp
│   │   │   │   ├── packing_pack1to4_int8.comp
│   │   │   │   ├── packing_pack4to1.comp
│   │   │   │   ├── packing_pack4to1_int8.comp
│   │   │   │   ├── padding.comp
│   │   │   │   ├── padding_3d.comp
│   │   │   │   ├── padding_3d_pack4.comp
│   │   │   │   ├── padding_pack1to4.comp
│   │   │   │   ├── padding_pack4.comp
│   │   │   │   ├── padding_pack4to1.comp
│   │   │   │   ├── permute.comp
│   │   │   │   ├── permute_pack1to4.comp
│   │   │   │   ├── permute_pack4.comp
│   │   │   │   ├── permute_pack4to1.comp
│   │   │   │   ├── pixelshuffle.comp
│   │   │   │   ├── pixelshuffle_pack4.comp
│   │   │   │   ├── pixelshuffle_pack4to1.comp
│   │   │   │   ├── pooling.comp
│   │   │   │   ├── pooling_adaptive.comp
│   │   │   │   ├── pooling_adaptive_pack4.comp
│   │   │   │   ├── pooling_global_reduce_max.comp
│   │   │   │   ├── pooling_global_reduce_max_first.comp
│   │   │   │   ├── pooling_global_reduce_max_first_pack4.comp
│   │   │   │   ├── pooling_global_reduce_max_last.comp
│   │   │   │   ├── pooling_global_reduce_max_last_pack4.comp
│   │   │   │   ├── pooling_global_reduce_max_pack4.comp
│   │   │   │   ├── pooling_global_reduce_sum.comp
│   │   │   │   ├── pooling_global_reduce_sum_first.comp
│   │   │   │   ├── pooling_global_reduce_sum_first_pack4.comp
│   │   │   │   ├── pooling_global_reduce_sum_last.comp
│   │   │   │   ├── pooling_global_reduce_sum_last_pack4.comp
│   │   │   │   ├── pooling_global_reduce_sum_pack4.comp
│   │   │   │   ├── pooling_pack4.comp
│   │   │   │   ├── prelu.comp
│   │   │   │   ├── prelu_pack4.comp
│   │   │   │   ├── priorbox.comp
│   │   │   │   ├── priorbox_mxnet.comp
│   │   │   │   ├── quantize.comp
│   │   │   │   ├── quantize_pack4.comp
│   │   │   │   ├── reduction.comp
│   │   │   │   ├── relu.comp
│   │   │   │   ├── reorg.comp
│   │   │   │   ├── reorg_pack1to4.comp
│   │   │   │   ├── reorg_pack4.comp
│   │   │   │   ├── requantize.comp
│   │   │   │   ├── requantize_pack4.comp
│   │   │   │   ├── reshape.comp
│   │   │   │   ├── reshape_pack1to4.comp
│   │   │   │   ├── reshape_pack4.comp
│   │   │   │   ├── reshape_pack4to1.comp
│   │   │   │   ├── rmsnorm_coeffs.comp
│   │   │   │   ├── rmsnorm_coeffs_pack4.comp
│   │   │   │   ├── rmsnorm_norm.comp
│   │   │   │   ├── rmsnorm_norm_pack4.comp
│   │   │   │   ├── rmsnorm_square.comp
│   │   │   │   ├── rmsnorm_square_pack4.comp
│   │   │   │   ├── rotaryembed.comp
│   │   │   │   ├── rotaryembed_pack4.comp
│   │   │   │   ├── scale.comp
│   │   │   │   ├── scale_pack4.comp
│   │   │   │   ├── sdpa_cross.comp
│   │   │   │   ├── sdpa_cross_cm.comp
│   │   │   │   ├── sdpa_fa.comp
│   │   │   │   ├── sdpa_fa_cm.comp
│   │   │   │   ├── selu.comp
│   │   │   │   ├── shrink.comp
│   │   │   │   ├── shufflechannel.comp
│   │   │   │   ├── shufflechannel_pack4.comp
│   │   │   │   ├── sigmoid.comp
│   │   │   │   ├── slice.comp
│   │   │   │   ├── slice_pack1to4.comp
│   │   │   │   ├── slice_pack4.comp
│   │   │   │   ├── softmax_div_sum.comp
│   │   │   │   ├── softmax_div_sum_pack4.comp
│   │   │   │   ├── softmax_exp_sub_max.comp
│   │   │   │   ├── softmax_exp_sub_max_pack4.comp
│   │   │   │   ├── softmax_reduce_max.comp
│   │   │   │   ├── softmax_reduce_max_pack4.comp
│   │   │   │   ├── softmax_reduce_sum.comp
│   │   │   │   ├── softmax_reduce_sum_pack4.comp
│   │   │   │   ├── softplus.comp
│   │   │   │   ├── swish.comp
│   │   │   │   ├── tanh.comp
│   │   │   │   ├── unaryop.comp
│   │   │   │   ├── unfold_im2col.comp
│   │   │   │   ├── unfold_im2col_pack1to4.comp
│   │   │   │   ├── unfold_im2col_pack4.comp
│   │   │   │   ├── unfold_im2col_pack4to1.comp
│   │   │   │   └── vulkan_activation.comp
│   │   │   ├── shrink_vulkan.cpp
│   │   │   ├── shrink_vulkan.h
│   │   │   ├── shufflechannel_vulkan.cpp
│   │   │   ├── shufflechannel_vulkan.h
│   │   │   ├── sigmoid_vulkan.cpp
│   │   │   ├── sigmoid_vulkan.h
│   │   │   ├── slice_vulkan.cpp
│   │   │   ├── slice_vulkan.h
│   │   │   ├── softmax_vulkan.cpp
│   │   │   ├── softmax_vulkan.h
│   │   │   ├── softplus_vulkan.cpp
│   │   │   ├── softplus_vulkan.h
│   │   │   ├── split_vulkan.cpp
│   │   │   ├── split_vulkan.h
│   │   │   ├── swish_vulkan.cpp
│   │   │   ├── swish_vulkan.h
│   │   │   ├── tanh_vulkan.cpp
│   │   │   ├── tanh_vulkan.h
│   │   │   ├── unaryop_vulkan.cpp
│   │   │   ├── unaryop_vulkan.h
│   │   │   ├── unfold_vulkan.cpp
│   │   │   └── unfold_vulkan.h
│   │   ├── x86/
│   │   │   ├── absval_x86.cpp
│   │   │   ├── absval_x86.h
│   │   │   ├── avx512_mathfun.h
│   │   │   ├── avx_mathfun.h
│   │   │   ├── batchnorm_bf16s.h
│   │   │   ├── batchnorm_x86.cpp
│   │   │   ├── batchnorm_x86.h
│   │   │   ├── batchnorm_x86_avx512bf16.cpp
│   │   │   ├── bias_x86.cpp
│   │   │   ├── bias_x86.h
│   │   │   ├── binaryop_bf16s.h
│   │   │   ├── binaryop_functor.h
│   │   │   ├── binaryop_x86.cpp
│   │   │   ├── binaryop_x86.h
│   │   │   ├── binaryop_x86_avx512bf16.cpp
│   │   │   ├── bnll_x86.cpp
│   │   │   ├── bnll_x86.h
│   │   │   ├── cast_bf16.h
│   │   │   ├── cast_fp16.h
│   │   │   ├── cast_x86.cpp
│   │   │   ├── cast_x86.h
│   │   │   ├── cast_x86_avx2.cpp
│   │   │   ├── cast_x86_avx512bf16.cpp
│   │   │   ├── cast_x86_f16c.cpp
│   │   │   ├── clip_bf16s.h
│   │   │   ├── clip_x86.cpp
│   │   │   ├── clip_x86.h
│   │   │   ├── clip_x86_avx512bf16.cpp
│   │   │   ├── concat_x86.cpp
│   │   │   ├── concat_x86.h
│   │   │   ├── convolution1d_packed.h
│   │   │   ├── convolution1d_x86.cpp
│   │   │   ├── convolution1d_x86.h
│   │   │   ├── convolution_1x1.h
│   │   │   ├── convolution_2x2_pack8.h
│   │   │   ├── convolution_3x3.h
│   │   │   ├── convolution_3x3_int8.h
│   │   │   ├── convolution_3x3_pack16to1.h
│   │   │   ├── convolution_3x3_pack1to4.h
│   │   │   ├── convolution_3x3_pack1to8.h
│   │   │   ├── convolution_3x3_pack8.h
│   │   │   ├── convolution_3x3_pack8to1.h
│   │   │   ├── convolution_3x3_winograd.h
│   │   │   ├── convolution_3x3_winograd_int8.h
│   │   │   ├── convolution_5x5.h
│   │   │   ├── convolution_im2col_gemm.h
│   │   │   ├── convolution_im2col_gemm_int8.h
│   │   │   ├── convolution_packed.h
│   │   │   ├── convolution_packed_int8.h
│   │   │   ├── convolution_x86.cpp
│   │   │   ├── convolution_x86.h
│   │   │   ├── convolution_x86_avx2.cpp
│   │   │   ├── convolution_x86_avx512vnni.cpp
│   │   │   ├── convolution_x86_avxvnni.cpp
│   │   │   ├── convolution_x86_avxvnniint8.cpp
│   │   │   ├── convolution_x86_xop.cpp
│   │   │   ├── convolutiondepthwise_3x3.h
│   │   │   ├── convolutiondepthwise_3x3_int8.h
│   │   │   ├── convolutiondepthwise_3x3_pack16.h
│   │   │   ├── convolutiondepthwise_3x3_pack4.h
│   │   │   ├── convolutiondepthwise_3x3_pack8.h
│   │   │   ├── convolutiondepthwise_5x5_pack16.h
│   │   │   ├── convolutiondepthwise_5x5_pack4.h
│   │   │   ├── convolutiondepthwise_5x5_pack8.h
│   │   │   ├── convolutiondepthwise_x86.cpp
│   │   │   ├── convolutiondepthwise_x86.h
│   │   │   ├── crop_x86.cpp
│   │   │   ├── crop_x86.h
│   │   │   ├── deconvolution_packed.h
│   │   │   ├── deconvolution_x86.cpp
│   │   │   ├── deconvolution_x86.h
│   │   │   ├── deconvolutiondepthwise_x86.cpp
│   │   │   ├── deconvolutiondepthwise_x86.h
│   │   │   ├── deformableconv2d_packed.h
│   │   │   ├── deformableconv2d_x86.cpp
│   │   │   ├── deformableconv2d_x86.h
│   │   │   ├── dequantize_x86.cpp
│   │   │   ├── dequantize_x86.h
│   │   │   ├── dropout_x86.cpp
│   │   │   ├── dropout_x86.h
│   │   │   ├── eltwise_x86.cpp
│   │   │   ├── eltwise_x86.h
│   │   │   ├── elu_x86.cpp
│   │   │   ├── elu_x86.h
│   │   │   ├── erf_x86.cpp
│   │   │   ├── erf_x86.h
│   │   │   ├── flatten_x86.cpp
│   │   │   ├── flatten_x86.h
│   │   │   ├── gelu_x86.cpp
│   │   │   ├── gelu_x86.h
│   │   │   ├── gemm_bf16s.h
│   │   │   ├── gemm_int8.h
│   │   │   ├── gemm_x86.cpp
│   │   │   ├── gemm_x86.h
│   │   │   ├── gemm_x86_avx2.cpp
│   │   │   ├── gemm_x86_avx512vnni.cpp
│   │   │   ├── gemm_x86_avxvnni.cpp
│   │   │   ├── gemm_x86_avxvnniint8.cpp
│   │   │   ├── gemm_x86_xop.cpp
│   │   │   ├── gridsample_bicubic_apply_interpolation.h
│   │   │   ├── gridsample_bicubic_compute_blob.h
│   │   │   ├── gridsample_bilinear_apply_interpolation.h
│   │   │   ├── gridsample_bilinear_compute_blob.h
│   │   │   ├── gridsample_compute_blob.h
│   │   │   ├── gridsample_nearest_apply_interpolation.h
│   │   │   ├── gridsample_nearest_compute_blob.h
│   │   │   ├── gridsample_x86.cpp
│   │   │   ├── gridsample_x86.h
│   │   │   ├── groupnorm_bf16s.h
│   │   │   ├── groupnorm_x86.cpp
│   │   │   ├── groupnorm_x86.h
│   │   │   ├── groupnorm_x86_avx512bf16.cpp
│   │   │   ├── hardsigmoid_x86.cpp
│   │   │   ├── hardsigmoid_x86.h
│   │   │   ├── hardswish_x86.cpp
│   │   │   ├── hardswish_x86.h
│   │   │   ├── innerproduct_fp.h
│   │   │   ├── innerproduct_gemm_fp.h
│   │   │   ├── innerproduct_x86.cpp
│   │   │   ├── innerproduct_x86.h
│   │   │   ├── innerproduct_x86_f16c.cpp
│   │   │   ├── instancenorm_bf16s.h
│   │   │   ├── instancenorm_x86.cpp
│   │   │   ├── instancenorm_x86.h
│   │   │   ├── instancenorm_x86_avx512bf16.cpp
│   │   │   ├── interp_bicubic.h
│   │   │   ├── interp_bicubic_pack16.h
│   │   │   ├── interp_bicubic_pack4.h
│   │   │   ├── interp_bicubic_pack8.h
│   │   │   ├── interp_bilinear.h
│   │   │   ├── interp_bilinear_pack16.h
│   │   │   ├── interp_bilinear_pack4.h
│   │   │   ├── interp_bilinear_pack8.h
│   │   │   ├── interp_x86.cpp
│   │   │   ├── interp_x86.h
│   │   │   ├── interp_x86_avx2.cpp
│   │   │   ├── layernorm_bf16s.h
│   │   │   ├── layernorm_x86.cpp
│   │   │   ├── layernorm_x86.h
│   │   │   ├── layernorm_x86_avx512bf16.cpp
│   │   │   ├── lrn_x86.cpp
│   │   │   ├── lrn_x86.h
│   │   │   ├── lstm_int8.h
│   │   │   ├── lstm_x86.cpp
│   │   │   ├── lstm_x86.h
│   │   │   ├── lstm_x86_avx2.cpp
│   │   │   ├── lstm_x86_avx512vnni.cpp
│   │   │   ├── lstm_x86_avxvnni.cpp
│   │   │   ├── lstm_x86_xop.cpp
│   │   │   ├── matmul_x86.cpp
│   │   │   ├── matmul_x86.h
│   │   │   ├── mish_x86.cpp
│   │   │   ├── mish_x86.h
│   │   │   ├── multiheadattention_x86.cpp
│   │   │   ├── multiheadattention_x86.h
│   │   │   ├── packing_x86.cpp
│   │   │   ├── packing_x86.h
│   │   │   ├── padding_pack16.h
│   │   │   ├── padding_pack16_bf16s_fp16s.h
│   │   │   ├── padding_pack4.h
│   │   │   ├── padding_pack4_bf16s_fp16s.h
│   │   │   ├── padding_pack8.h
│   │   │   ├── padding_pack8_bf16s_fp16s.h
│   │   │   ├── padding_pack8_int8.h
│   │   │   ├── padding_x86.cpp
│   │   │   ├── padding_x86.h
│   │   │   ├── pooling_2x2.h
│   │   │   ├── pooling_2x2_pack16.h
│   │   │   ├── pooling_2x2_pack4.h
│   │   │   ├── pooling_2x2_pack8.h
│   │   │   ├── pooling_3x3_pack16.h
│   │   │   ├── pooling_3x3_pack4.h
│   │   │   ├── pooling_3x3_pack8.h
│   │   │   ├── pooling_x86.cpp
│   │   │   ├── pooling_x86.h
│   │   │   ├── prelu_bf16s.h
│   │   │   ├── prelu_x86.cpp
│   │   │   ├── prelu_x86.h
│   │   │   ├── prelu_x86_avx512bf16.cpp
│   │   │   ├── quantize_x86.cpp
│   │   │   ├── quantize_x86.h
│   │   │   ├── relu_bf16s.h
│   │   │   ├── relu_x86.cpp
│   │   │   ├── relu_x86.h
│   │   │   ├── relu_x86_avx512bf16.cpp
│   │   │   ├── requantize_x86.cpp
│   │   │   ├── requantize_x86.h
│   │   │   ├── reshape_x86.cpp
│   │   │   ├── reshape_x86.h
│   │   │   ├── rmsnorm_bf16s.h
│   │   │   ├── rmsnorm_x86.cpp
│   │   │   ├── rmsnorm_x86.h
│   │   │   ├── rmsnorm_x86_avx512bf16.cpp
│   │   │   ├── roialign_x86.cpp
│   │   │   ├── roialign_x86.h
│   │   │   ├── rotaryembed_x86.cpp
│   │   │   ├── rotaryembed_x86.h
│   │   │   ├── scale_bf16s.h
│   │   │   ├── scale_x86.cpp
│   │   │   ├── scale_x86.h
│   │   │   ├── scale_x86_avx512bf16.cpp
│   │   │   ├── sdpa_x86.cpp
│   │   │   ├── sdpa_x86.h
│   │   │   ├── selu_x86.cpp
│   │   │   ├── selu_x86.h
│   │   │   ├── shufflechannel_x86.cpp
│   │   │   ├── shufflechannel_x86.h
│   │   │   ├── sigmoid_bf16s.h
│   │   │   ├── sigmoid_x86.cpp
│   │   │   ├── sigmoid_x86.h
│   │   │   ├── sigmoid_x86_avx512bf16.cpp
│   │   │   ├── slice_x86.cpp
│   │   │   ├── slice_x86.h
│   │   │   ├── softmax_bf16s.h
│   │   │   ├── softmax_x86.cpp
│   │   │   ├── softmax_x86.h
│   │   │   ├── softmax_x86_avx512bf16.cpp
│   │   │   ├── sse_mathfun.h
│   │   │   ├── swish_bf16s.h
│   │   │   ├── swish_x86.cpp
│   │   │   ├── swish_x86.h
│   │   │   ├── swish_x86_avx512bf16.cpp
│   │   │   ├── tanh_x86.cpp
│   │   │   ├── tanh_x86.h
│   │   │   ├── unaryop_bf16s.h
│   │   │   ├── unaryop_functor.h
│   │   │   ├── unaryop_x86.cpp
│   │   │   ├── unaryop_x86.h
│   │   │   ├── unaryop_x86_avx512bf16.cpp
│   │   │   ├── x86_activation.h
│   │   │   ├── x86_usability.h
│   │   │   ├── yolov3detectionoutput_x86.cpp
│   │   │   └── yolov3detectionoutput_x86.h
│   │   ├── yolodetectionoutput.cpp
│   │   ├── yolodetectionoutput.h
│   │   ├── yolov3detectionoutput.cpp
│   │   └── yolov3detectionoutput.h
│   ├── layer.cpp
│   ├── layer.h
│   ├── layer_declaration.h.in
│   ├── layer_registry.h.in
│   ├── layer_shader_registry.h.in
│   ├── layer_shader_spv_data.h.in
│   ├── layer_shader_type.h
│   ├── layer_shader_type_enum.h.in
│   ├── layer_type.h
│   ├── layer_type_enum.h.in
│   ├── mat.cpp
│   ├── mat.h
│   ├── mat_pixel.cpp
│   ├── mat_pixel_affine.cpp
│   ├── mat_pixel_android.cpp
│   ├── mat_pixel_drawing.cpp
│   ├── mat_pixel_drawing_font.h
│   ├── mat_pixel_resize.cpp
│   ├── mat_pixel_rotate.cpp
│   ├── modelbin.cpp
│   ├── modelbin.h
│   ├── ncnn.pc.in
│   ├── net.cpp
│   ├── net.h
│   ├── option.cpp
│   ├── option.h
│   ├── paramdict.cpp
│   ├── paramdict.h
│   ├── pipeline.cpp
│   ├── pipeline.h
│   ├── pipelinecache.cpp
│   ├── pipelinecache.h
│   ├── platform.h.in
│   ├── ruapu.h
│   ├── simplemath.cpp
│   ├── simplemath.h
│   ├── simpleocv.cpp
│   ├── simpleocv.h
│   ├── simpleomp.cpp
│   ├── simpleomp.h
│   ├── simplestl.cpp
│   ├── simplestl.h
│   ├── simplevk.cpp
│   ├── simplevk.h
│   ├── simplevk.tbd
│   ├── stb_image.h
│   ├── stb_image_write.h
│   └── vulkan_header_fix.h
├── tests/
│   ├── CMakeLists.txt
│   ├── perf/
│   │   ├── CMakeLists.txt
│   │   ├── perf_batchnorm.cpp
│   │   ├── perf_binaryop.cpp
│   │   ├── perf_concat.cpp
│   │   ├── perf_convolution.cpp
│   │   ├── perf_convolutiondepthwise.cpp
│   │   ├── perf_deconvolution.cpp
│   │   ├── perf_innerproduct.cpp
│   │   ├── perf_pooling.cpp
│   │   ├── perf_relu.cpp
│   │   ├── perf_sigmoid.cpp
│   │   ├── perf_softmax.cpp
│   │   ├── perfutil.cpp
│   │   └── perfutil.h
│   ├── prng.h
│   ├── test_absval.cpp
│   ├── test_batchnorm.cpp
│   ├── test_bias.cpp
│   ├── test_binaryop.cpp
│   ├── test_binaryop_1.cpp
│   ├── test_binaryop_2.cpp
│   ├── test_binaryop_3.cpp
│   ├── test_binaryop_4.cpp
│   ├── test_bnll.cpp
│   ├── test_c_api.cpp
│   ├── test_cast.cpp
│   ├── test_celu.cpp
│   ├── test_clip.cpp
│   ├── test_command.cpp
│   ├── test_concat.cpp
│   ├── test_concat_oom.cpp
│   ├── test_convolution.cpp
│   ├── test_convolution1d.cpp
│   ├── test_convolution3d.cpp
│   ├── test_convolution_1.cpp
│   ├── test_convolution_2.cpp
│   ├── test_convolution_3.cpp
│   ├── test_convolution_oom.cpp
│   ├── test_convolutiondepthwise.cpp
│   ├── test_convolutiondepthwise1d.cpp
│   ├── test_convolutiondepthwise3d.cpp
│   ├── test_convolutiondepthwise_1.cpp
│   ├── test_copyto.cpp
│   ├── test_copyto_1.cpp
│   ├── test_cpu.cpp
│   ├── test_crop.cpp
│   ├── test_crop_1.cpp
│   ├── test_crop_2.cpp
│   ├── test_crop_3.cpp
│   ├── test_crop_oom.cpp
│   ├── test_cumulativesum.cpp
│   ├── test_deconvolution.cpp
│   ├── test_deconvolution1d.cpp
│   ├── test_deconvolution3d.cpp
│   ├── test_deconvolutiondepthwise.cpp
│   ├── test_deconvolutiondepthwise1d.cpp
│   ├── test_deconvolutiondepthwise3d.cpp
│   ├── test_deconvolutiondepthwise_1.cpp
│   ├── test_deepcopy.cpp
│   ├── test_deformableconv2d.cpp
│   ├── test_deformableconv2d_1.cpp
│   ├── test_deformableconv2d_2.cpp
│   ├── test_deformableconv2d_3.cpp
│   ├── test_deformableconv2d_4.cpp
│   ├── test_dequantize.cpp
│   ├── test_diag.cpp
│   ├── test_dropout.cpp
│   ├── test_einsum.cpp
│   ├── test_eltwise.cpp
│   ├── test_elu.cpp
│   ├── test_embed.cpp
│   ├── test_erf.cpp
│   ├── test_expanddims.cpp
│   ├── test_expression.cpp
│   ├── test_flatten.cpp
│   ├── test_flip.cpp
│   ├── test_fold.cpp
│   ├── test_gelu.cpp
│   ├── test_gemm_0.h
│   ├── test_gemm_0a.cpp
│   ├── test_gemm_0b.cpp
│   ├── test_gemm_0c.cpp
│   ├── test_gemm_0d.cpp
│   ├── test_gemm_0e.cpp
│   ├── test_gemm_0f.cpp
│   ├── test_gemm_1.h
│   ├── test_gemm_1a.cpp
│   ├── test_gemm_1b.cpp
│   ├── test_gemm_2.h
│   ├── test_gemm_2a.cpp
│   ├── test_gemm_2b.cpp
│   ├── test_gemm_2c.cpp
│   ├── test_gemm_2d.cpp
│   ├── test_gemm_2e.cpp
│   ├── test_gemm_3.cpp
│   ├── test_gemm_4.cpp
│   ├── test_gemm_nt.cpp
│   ├── test_gemm_oom.cpp
│   ├── test_glu.cpp
│   ├── test_gridsample.cpp
│   ├── test_groupnorm.cpp
│   ├── test_gru.cpp
│   ├── test_hardsigmoid.cpp
│   ├── test_hardswish.cpp
│   ├── test_innerproduct.cpp
│   ├── test_instancenorm.cpp
│   ├── test_interp.cpp
│   ├── test_interp_1.cpp
│   ├── test_inversespectrogram.cpp
│   ├── test_layernorm.cpp
│   ├── test_lrn.cpp
│   ├── test_lstm.cpp
│   ├── test_mat_pixel.cpp
│   ├── test_mat_pixel_affine.cpp
│   ├── test_mat_pixel_drawing.cpp
│   ├── test_mat_pixel_resize.cpp
│   ├── test_mat_pixel_rotate.cpp
│   ├── test_matmul.cpp
│   ├── test_memorydata.cpp
│   ├── test_mish.cpp
│   ├── test_multiheadattention.cpp
│   ├── test_multiheadattention_1.cpp
│   ├── test_multiheadattention_kvcache.cpp
│   ├── test_multiheadattention_oom.cpp
│   ├── test_noop.cpp
│   ├── test_normalize.cpp
│   ├── test_packing.cpp
│   ├── test_padding.cpp
│   ├── test_paramdict.cpp
│   ├── test_permute.cpp
│   ├── test_pixelshuffle.cpp
│   ├── test_pooling.cpp
│   ├── test_pooling1d.cpp
│   ├── test_pooling3d.cpp
│   ├── test_power.cpp
│   ├── test_prelu.cpp
│   ├── test_priorbox.cpp
│   ├── test_quantize.cpp
│   ├── test_quantize_oom.cpp
│   ├── test_reduction.cpp
│   ├── test_relu.cpp
│   ├── test_reorg.cpp
│   ├── test_requantize.cpp
│   ├── test_requantize_oom.cpp
│   ├── test_reshape.cpp
│   ├── test_reshape_1.cpp
│   ├── test_reshape_oom.cpp
│   ├── test_rmsnorm.cpp
│   ├── test_rnn.cpp
│   ├── test_roialign.cpp
│   ├── test_roipooling.cpp
│   ├── test_rotaryembed.cpp
│   ├── test_rotaryembed_oom.cpp
│   ├── test_scale.cpp
│   ├── test_sdpa.cpp
│   ├── test_sdpa_kvcache.cpp
│   ├── test_sdpa_oom.cpp
│   ├── test_selu.cpp
│   ├── test_shrink.cpp
│   ├── test_shufflechannel.cpp
│   ├── test_sigmoid.cpp
│   ├── test_slice.cpp
│   ├── test_slice_oom.cpp
│   ├── test_softmax.cpp
│   ├── test_softmax_oom.cpp
│   ├── test_softplus.cpp
│   ├── test_spectrogram.cpp
│   ├── test_squeeze.cpp
│   ├── test_squeezenet.cpp
│   ├── test_swish.cpp
│   ├── test_tanh.cpp
│   ├── test_tile.cpp
│   ├── test_tile_oom.cpp
│   ├── test_unaryop.cpp
│   ├── test_unfold.cpp
│   ├── test_yolov3detectionoutput.cpp
│   ├── testutil.cpp
│   └── testutil.h
├── toolchains/
│   ├── aarch64-linux-gnu-c.toolchain.cmake
│   ├── aarch64-linux-gnu.toolchain.cmake
│   ├── aarch64-qnx.toolchain.cmake
│   ├── anykav500.toolchain.cmake
│   ├── arm-linux-gnueabi-c.toolchain.cmake
│   ├── arm-linux-gnueabi.toolchain.cmake
│   ├── arm-linux-gnueabihf-vfpv3-d16.toolchain.cmake
│   ├── arm-linux-gnueabihf.toolchain.cmake
│   ├── c906-v310.toolchain.cmake
│   ├── c907-rv32-v310.toolchain.cmake
│   ├── c907-v310.toolchain.cmake
│   ├── c908-v310.toolchain.cmake
│   ├── c910-v310.toolchain.cmake
│   ├── esp32.toolchain.cmake
│   ├── himix100.toolchain.cmake
│   ├── himix200.toolchain.cmake
│   ├── himix210.toolchain.cmake
│   ├── hisiv300.toolchain.cmake
│   ├── hisiv500.toolchain.cmake
│   ├── hisiv600.toolchain.cmake
│   ├── host-c.clang.toolchain.cmake
│   ├── host-c.gcc.toolchain.cmake
│   ├── host.clang-m32.toolchain.cmake
│   ├── host.gcc-c++03.toolchain.cmake
│   ├── host.gcc-m32.toolchain.cmake
│   ├── host.gcc.toolchain.cmake
│   ├── ingenic-x2000.toolchain.cmake
│   ├── ios.toolchain.cmake
│   ├── iossimxc-x64.toolchain.cmake
│   ├── iossimxc.toolchain.cmake
│   ├── iosxc-arm64.toolchain.cmake
│   ├── iosxc.toolchain.cmake
│   ├── jetson.toolchain.cmake
│   ├── k1.llvm.toolchain.cmake
│   ├── k1.toolchain.cmake
│   ├── loongarch64-linux-gnu.toolchain.cmake
│   ├── loongarch64-unknown-linux-gnu.toolchain.cmake
│   ├── loongson2f-linux-gnuabi64.toolchain.cmake
│   ├── mips-mti-linux-gnu.toolchain.cmake
│   ├── mips32r2-linux-gnu.toolchain.cmake
│   ├── mips64el-linux-gnuabi64.toolchain.cmake
│   ├── mipsel-linux-gnu.toolchain.cmake
│   ├── mipsisa32r6el-linux-gnu.toolchain.cmake
│   ├── mipsisa64r6el-linux-gnuabi64.toolchain.cmake
│   ├── pi3.toolchain.cmake
│   ├── power8le-linux-gnu-vsx.clang.toolchain.cmake
│   ├── power8le-linux-gnu-vsx.toolchain.cmake
│   ├── power9le-linux-gnu-vsx.clang.toolchain.cmake
│   ├── power9le-linux-gnu-vsx.toolchain.cmake
│   ├── powerpc-linux-gnu.toolchain.cmake
│   ├── powerpc64le-linux-gnu.toolchain.cmake
│   ├── riscv32-unknown-elf.toolchain.cmake
│   ├── riscv64-linux-gnu.toolchain.cmake
│   ├── riscv64-unknown-elf.toolchain.cmake
│   ├── riscv64-unknown-linux-gnu.llvm-toolchain.cmake
│   ├── riscv64-unknown-linux-gnu.toolchain.cmake
│   ├── v831.toolchain.cmake
│   ├── windows-xp-clang.toolchain.cmake
│   ├── windows-xp-mingw.toolchain.cmake
│   └── windows-xp-msvc.toolchain.cmake
└── tools/
    ├── CMakeLists.txt
    ├── caffe/
    │   ├── CMakeLists.txt
    │   ├── caffe.proto
    │   └── caffe2ncnn.cpp
    ├── darknet/
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   └── darknet2ncnn.cpp
    ├── keras/
    │   └── readme.md
    ├── mlir/
    │   ├── CMakeLists.txt
    │   ├── fix_td.sh
    │   ├── mlir2ncnn.cpp
    │   ├── ncnn_dialect.cpp
    │   ├── ncnn_dialect.h
    │   ├── ncnn_ops.td
    │   ├── ncnn_rewriter.cpp
    │   ├── ncnn_rewriter.td
    │   ├── tf_attributes.cc
    │   ├── tf_attributes.h
    │   ├── tf_dialect.cpp
    │   ├── tf_dialect.h
    │   ├── tf_generated_ops.td
    │   ├── tf_op_base.td
    │   ├── tf_ops.td
    │   ├── tf_side_effects.h
    │   ├── tf_traits.h
    │   ├── tf_types.cc
    │   ├── tf_types.def
    │   └── tf_types.h
    ├── modelwriter.h
    ├── mxnet/
    │   ├── CMakeLists.txt
    │   └── mxnet2ncnn.cpp
    ├── ncnn2mem.cpp
    ├── ncnnmerge.cpp
    ├── ncnnoptimize.cpp
    ├── onnx/
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── onnx.proto
    │   └── onnx2ncnn.cpp
    ├── plugin/
    │   ├── ImageWatchNCNN.natvis
    │   ├── ImageWatchNNIE.natvis
    │   └── README.md
    ├── pnnx/
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── cmake/
    │   │   └── PNNXPyTorch.cmake
    │   ├── python/
    │   │   ├── README.md
    │   │   ├── examples/
    │   │   │   ├── convert.py
    │   │   │   └── export.py
    │   │   ├── pnnx/
    │   │   │   ├── __init__.py
    │   │   │   └── utils/
    │   │   │       ├── __init__.py
    │   │   │       ├── convert.py
    │   │   │       ├── export.py
    │   │   │       └── utils.py
    │   │   ├── requirements.txt
    │   │   ├── setup.py
    │   │   └── tests/
    │   │       ├── test_convert.py
    │   │       ├── test_dynamicinput_convert.py
    │   │       ├── test_dynamicinput_export.py
    │   │       ├── test_export.py
    │   │       ├── test_naiveinput_convert.py
    │   │       └── test_naiveinput_export.py
    │   ├── src/
    │   │   ├── CMakeLists.txt
    │   │   ├── ir.cpp
    │   │   ├── ir.h
    │   │   ├── load_onnx.cpp
    │   │   ├── load_onnx.h
    │   │   ├── load_tnn.cpp
    │   │   ├── load_tnn.h
    │   │   ├── load_torchscript.cpp
    │   │   ├── load_torchscript.h
    │   │   ├── main.cpp
    │   │   ├── onnx-data.proto
    │   │   ├── onnx-ml.proto
    │   │   ├── onnx-operators-ml.proto
    │   │   ├── pass_level0/
    │   │   │   ├── constant_unpooling.cpp
    │   │   │   ├── constant_unpooling.h
    │   │   │   ├── convert_half_to_float.cpp
    │   │   │   ├── convert_half_to_float.h
    │   │   │   ├── flatten_input.cpp
    │   │   │   ├── flatten_input.h
    │   │   │   ├── inline_block.cpp
    │   │   │   ├── inline_block.h
    │   │   │   ├── reset_device.cpp
    │   │   │   ├── reset_device.h
    │   │   │   ├── shape_inference.cpp
    │   │   │   └── shape_inference.h
    │   │   ├── pass_level0.cpp
    │   │   ├── pass_level0.h
    │   │   ├── pass_level1/
    │   │   │   ├── fuse_module_pass.cpp
    │   │   │   ├── fuse_module_pass.h
    │   │   │   ├── nn_AdaptiveAvgPool1d.cpp
    │   │   │   ├── nn_AdaptiveAvgPool2d.cpp
    │   │   │   ├── nn_AdaptiveAvgPool3d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool1d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool2d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool3d.cpp
    │   │   │   ├── nn_AlphaDropout.cpp
    │   │   │   ├── nn_AvgPool1d.cpp
    │   │   │   ├── nn_AvgPool2d.cpp
    │   │   │   ├── nn_AvgPool3d.cpp
    │   │   │   ├── nn_BatchNorm1d.cpp
    │   │   │   ├── nn_BatchNorm2d.cpp
    │   │   │   ├── nn_BatchNorm3d.cpp
    │   │   │   ├── nn_CELU.cpp
    │   │   │   ├── nn_ChannelShuffle.cpp
    │   │   │   ├── nn_ConstantPad1d.cpp
    │   │   │   ├── nn_ConstantPad2d.cpp
    │   │   │   ├── nn_ConstantPad3d.cpp
    │   │   │   ├── nn_Conv1d.cpp
    │   │   │   ├── nn_Conv2d.cpp
    │   │   │   ├── nn_Conv3d.cpp
    │   │   │   ├── nn_ConvTranspose1d.cpp
    │   │   │   ├── nn_ConvTranspose2d.cpp
    │   │   │   ├── nn_ConvTranspose3d.cpp
    │   │   │   ├── nn_Dropout.cpp
    │   │   │   ├── nn_Dropout2d.cpp
    │   │   │   ├── nn_Dropout3d.cpp
    │   │   │   ├── nn_ELU.cpp
    │   │   │   ├── nn_Embedding.cpp
    │   │   │   ├── nn_Fold.cpp
    │   │   │   ├── nn_GELU.cpp
    │   │   │   ├── nn_GLU.cpp
    │   │   │   ├── nn_GRU.cpp
    │   │   │   ├── nn_GroupNorm.cpp
    │   │   │   ├── nn_Hardshrink.cpp
    │   │   │   ├── nn_Hardsigmoid.cpp
    │   │   │   ├── nn_Hardswish.cpp
    │   │   │   ├── nn_Hardtanh.cpp
    │   │   │   ├── nn_InstanceNorm1d.cpp
    │   │   │   ├── nn_InstanceNorm2d.cpp
    │   │   │   ├── nn_InstanceNorm3d.cpp
    │   │   │   ├── nn_LPPool1d.cpp
    │   │   │   ├── nn_LPPool2d.cpp
    │   │   │   ├── nn_LSTM.cpp
    │   │   │   ├── nn_LayerNorm.cpp
    │   │   │   ├── nn_LeakyReLU.cpp
    │   │   │   ├── nn_Linear.cpp
    │   │   │   ├── nn_LocalResponseNorm.cpp
    │   │   │   ├── nn_LogSigmoid.cpp
    │   │   │   ├── nn_LogSoftmax.cpp
    │   │   │   ├── nn_MaxPool1d.cpp
    │   │   │   ├── nn_MaxPool2d.cpp
    │   │   │   ├── nn_MaxPool3d.cpp
    │   │   │   ├── nn_Mish.cpp
    │   │   │   ├── nn_MultiheadAttention.cpp
    │   │   │   ├── nn_PReLU.cpp
    │   │   │   ├── nn_PixelShuffle.cpp
    │   │   │   ├── nn_PixelUnshuffle.cpp
    │   │   │   ├── nn_RMSNorm.cpp
    │   │   │   ├── nn_RNN.cpp
    │   │   │   ├── nn_RReLU.cpp
    │   │   │   ├── nn_ReLU.cpp
    │   │   │   ├── nn_ReLU6.cpp
    │   │   │   ├── nn_ReflectionPad1d.cpp
    │   │   │   ├── nn_ReflectionPad2d.cpp
    │   │   │   ├── nn_ReplicationPad1d.cpp
    │   │   │   ├── nn_ReplicationPad2d.cpp
    │   │   │   ├── nn_ReplicationPad3d.cpp
    │   │   │   ├── nn_SELU.cpp
    │   │   │   ├── nn_SiLU.cpp
    │   │   │   ├── nn_Sigmoid.cpp
    │   │   │   ├── nn_Softmax.cpp
    │   │   │   ├── nn_Softmax2d.cpp
    │   │   │   ├── nn_Softmin.cpp
    │   │   │   ├── nn_Softplus.cpp
    │   │   │   ├── nn_Softshrink.cpp
    │   │   │   ├── nn_Softsign.cpp
    │   │   │   ├── nn_Tanh.cpp
    │   │   │   ├── nn_Tanhshrink.cpp
    │   │   │   ├── nn_Threshold.cpp
    │   │   │   ├── nn_Unfold.cpp
    │   │   │   ├── nn_Upsample.cpp
    │   │   │   ├── nn_UpsamplingBilinear2d.cpp
    │   │   │   ├── nn_UpsamplingNearest2d.cpp
    │   │   │   ├── nn_ZeroPad2d.cpp
    │   │   │   ├── nn_maxunpool2d.cpp
    │   │   │   ├── nn_quantized_Conv2d.cpp
    │   │   │   ├── nn_quantized_DeQuantize.cpp
    │   │   │   ├── nn_quantized_Linear.cpp
    │   │   │   ├── nn_quantized_Quantize.cpp
    │   │   │   ├── torchvision_DeformConv2d.cpp
    │   │   │   └── torchvision_RoIAlign.cpp
    │   │   ├── pass_level1.cpp
    │   │   ├── pass_level1.h
    │   │   ├── pass_level2/
    │   │   │   ├── F_adaptive_avg_pool1d.cpp
    │   │   │   ├── F_adaptive_avg_pool2d.cpp
    │   │   │   ├── F_adaptive_avg_pool3d.cpp
    │   │   │   ├── F_adaptive_max_pool1d.cpp
    │   │   │   ├── F_adaptive_max_pool2d.cpp
    │   │   │   ├── F_adaptive_max_pool3d.cpp
    │   │   │   ├── F_affine_grid.cpp
    │   │   │   ├── F_alpha_dropout.cpp
    │   │   │   ├── F_avg_pool1d.cpp
    │   │   │   ├── F_avg_pool2d.cpp
    │   │   │   ├── F_avg_pool3d.cpp
    │   │   │   ├── F_batch_norm.cpp
    │   │   │   ├── F_celu.cpp
    │   │   │   ├── F_conv1d.cpp
    │   │   │   ├── F_conv2d.cpp
    │   │   │   ├── F_conv3d.cpp
    │   │   │   ├── F_conv_transpose1d.cpp
    │   │   │   ├── F_conv_transpose2d.cpp
    │   │   │   ├── F_conv_transpose3d.cpp
    │   │   │   ├── F_dropout.cpp
    │   │   │   ├── F_dropout23d.cpp
    │   │   │   ├── F_elu.cpp
    │   │   │   ├── F_embedding.cpp
    │   │   │   ├── F_feature_alpha_dropout.cpp
    │   │   │   ├── F_fold.cpp
    │   │   │   ├── F_gelu.cpp
    │   │   │   ├── F_glu.cpp
    │   │   │   ├── F_grid_sample.cpp
    │   │   │   ├── F_group_norm.cpp
    │   │   │   ├── F_hardshrink.cpp
    │   │   │   ├── F_hardsigmoid.cpp
    │   │   │   ├── F_hardswish.cpp
    │   │   │   ├── F_hardtanh.cpp
    │   │   │   ├── F_instance_norm.cpp
    │   │   │   ├── F_interpolate.cpp
    │   │   │   ├── F_layer_norm.cpp
    │   │   │   ├── F_leaky_relu.cpp
    │   │   │   ├── F_linear.cpp
    │   │   │   ├── F_local_response_norm.cpp
    │   │   │   ├── F_log_softmax.cpp
    │   │   │   ├── F_logsigmoid.cpp
    │   │   │   ├── F_lp_pool1d.cpp
    │   │   │   ├── F_lp_pool2d.cpp
    │   │   │   ├── F_max_pool1d.cpp
    │   │   │   ├── F_max_pool2d.cpp
    │   │   │   ├── F_max_pool3d.cpp
    │   │   │   ├── F_mish.cpp
    │   │   │   ├── F_normalize.cpp
    │   │   │   ├── F_pad.cpp
    │   │   │   ├── F_pairwise_distance.cpp
    │   │   │   ├── F_pixel_shuffle.cpp
    │   │   │   ├── F_pixel_unshuffle.cpp
    │   │   │   ├── F_prelu.cpp
    │   │   │   ├── F_relu.cpp
    │   │   │   ├── F_relu6.cpp
    │   │   │   ├── F_rms_norm.cpp
    │   │   │   ├── F_rrelu.cpp
    │   │   │   ├── F_scaled_dot_product_attention.cpp
    │   │   │   ├── F_selu.cpp
    │   │   │   ├── F_sigmoid.cpp
    │   │   │   ├── F_silu.cpp
    │   │   │   ├── F_softmax.cpp
    │   │   │   ├── F_softmin.cpp
    │   │   │   ├── F_softplus.cpp
    │   │   │   ├── F_softshrink.cpp
    │   │   │   ├── F_softsign.cpp
    │   │   │   ├── F_tanh.cpp
    │   │   │   ├── F_tanhshrink.cpp
    │   │   │   ├── F_threshold.cpp
    │   │   │   ├── F_unfold.cpp
    │   │   │   ├── F_upsample.cpp
    │   │   │   ├── F_upsample_bilinear.cpp
    │   │   │   ├── F_upsample_nearest.cpp
    │   │   │   ├── README.md
    │   │   │   ├── Tensor_copy.cpp
    │   │   │   ├── Tensor_expand.cpp
    │   │   │   ├── Tensor_expand_as.cpp
    │   │   │   ├── Tensor_fill.cpp
    │   │   │   ├── Tensor_index.cpp
    │   │   │   ├── Tensor_index_put.cpp
    │   │   │   ├── Tensor_masked_fill.cpp
    │   │   │   ├── Tensor_new_empty.cpp
    │   │   │   ├── Tensor_new_ones.cpp
    │   │   │   ├── Tensor_new_zeros.cpp
    │   │   │   ├── Tensor_permute.cpp
    │   │   │   ├── Tensor_repeat.cpp
    │   │   │   ├── Tensor_reshape.cpp
    │   │   │   ├── Tensor_reshape_as.cpp
    │   │   │   ├── Tensor_select.cpp
    │   │   │   ├── Tensor_size.cpp
    │   │   │   ├── Tensor_slice.cpp
    │   │   │   ├── Tensor_to.cpp
    │   │   │   ├── Tensor_type_as.cpp
    │   │   │   ├── Tensor_unflatten.cpp
    │   │   │   ├── eliminate_contiguous.cpp
    │   │   │   ├── eliminate_contiguous.h
    │   │   │   ├── eliminate_size_numtotensor_int.cpp
    │   │   │   ├── eliminate_size_numtotensor_int.h
    │   │   │   ├── functionize.cpp
    │   │   │   ├── functionize.h
    │   │   │   ├── fuse_constantlist.cpp
    │   │   │   ├── fuse_constantlist.h
    │   │   │   ├── nn_GRU.cpp
    │   │   │   ├── nn_LSTM.cpp
    │   │   │   ├── nn_RNN.cpp
    │   │   │   ├── nn_quantized_FloatFunctional.cpp
    │   │   │   ├── torch_addmm.cpp
    │   │   │   ├── torch_amax.cpp
    │   │   │   ├── torch_amin.cpp
    │   │   │   ├── torch_arange.cpp
    │   │   │   ├── torch_argmax.cpp
    │   │   │   ├── torch_argmin.cpp
    │   │   │   ├── torch_as_strided.cpp
    │   │   │   ├── torch_baddbmm.cpp
    │   │   │   ├── torch_bitwise_and.cpp
    │   │   │   ├── torch_bitwise_left_shift.cpp
    │   │   │   ├── torch_bitwise_not.cpp
    │   │   │   ├── torch_bitwise_or.cpp
    │   │   │   ├── torch_bitwise_right_shift.cpp
    │   │   │   ├── torch_bitwise_xor.cpp
    │   │   │   ├── torch_bmm.cpp
    │   │   │   ├── torch_cat.cpp
    │   │   │   ├── torch_chunk.cpp
    │   │   │   ├── torch_clamp.cpp
    │   │   │   ├── torch_clone.cpp
    │   │   │   ├── torch_complex.cpp
    │   │   │   ├── torch_cross.cpp
    │   │   │   ├── torch_cumprod.cpp
    │   │   │   ├── torch_cumsum.cpp
    │   │   │   ├── torch_dequantize.cpp
    │   │   │   ├── torch_diag.cpp
    │   │   │   ├── torch_einsum.cpp
    │   │   │   ├── torch_empty.cpp
    │   │   │   ├── torch_empty_like.cpp
    │   │   │   ├── torch_eq.cpp
    │   │   │   ├── torch_fft_fft.cpp
    │   │   │   ├── torch_fft_fft2.cpp
    │   │   │   ├── torch_fft_fftn.cpp
    │   │   │   ├── torch_fft_hfft.cpp
    │   │   │   ├── torch_fft_hfft2.cpp
    │   │   │   ├── torch_fft_hfftn.cpp
    │   │   │   ├── torch_fft_ifft.cpp
    │   │   │   ├── torch_fft_ifft2.cpp
    │   │   │   ├── torch_fft_ifftn.cpp
    │   │   │   ├── torch_fft_ihfft.cpp
    │   │   │   ├── torch_fft_ihfft2.cpp
    │   │   │   ├── torch_fft_ihfftn.cpp
    │   │   │   ├── torch_fft_irfft.cpp
    │   │   │   ├── torch_fft_irfft2.cpp
    │   │   │   ├── torch_fft_irfftn.cpp
    │   │   │   ├── torch_fft_rfft.cpp
    │   │   │   ├── torch_fft_rfft2.cpp
    │   │   │   ├── torch_fft_rfftn.cpp
    │   │   │   ├── torch_flatten.cpp
    │   │   │   ├── torch_flip.cpp
    │   │   │   ├── torch_full.cpp
    │   │   │   ├── torch_full_like.cpp
    │   │   │   ├── torch_gather.cpp
    │   │   │   ├── torch_ge.cpp
    │   │   │   ├── torch_gt.cpp
    │   │   │   ├── torch_imag.cpp
    │   │   │   ├── torch_index_select.cpp
    │   │   │   ├── torch_istft.cpp
    │   │   │   ├── torch_le.cpp
    │   │   │   ├── torch_lgamma.cpp
    │   │   │   ├── torch_logical_and.cpp
    │   │   │   ├── torch_logical_not.cpp
    │   │   │   ├── torch_logical_or.cpp
    │   │   │   ├── torch_logical_xor.cpp
    │   │   │   ├── torch_logsumexp.cpp
    │   │   │   ├── torch_lt.cpp
    │   │   │   ├── torch_masked_select.cpp
    │   │   │   ├── torch_matmul.cpp
    │   │   │   ├── torch_max.cpp
    │   │   │   ├── torch_mean.cpp
    │   │   │   ├── torch_min.cpp
    │   │   │   ├── torch_mm.cpp
    │   │   │   ├── torch_mv.cpp
    │   │   │   ├── torch_narrow.cpp
    │   │   │   ├── torch_ne.cpp
    │   │   │   ├── torch_norm.cpp
    │   │   │   ├── torch_normal.cpp
    │   │   │   ├── torch_ones.cpp
    │   │   │   ├── torch_ones_like.cpp
    │   │   │   ├── torch_positive.cpp
    │   │   │   ├── torch_prod.cpp
    │   │   │   ├── torch_quantize_per_tensor.cpp
    │   │   │   ├── torch_randn.cpp
    │   │   │   ├── torch_randn_like.cpp
    │   │   │   ├── torch_real.cpp
    │   │   │   ├── torch_repeat_interleave.cpp
    │   │   │   ├── torch_roll.cpp
    │   │   │   ├── torch_scatter_add.cpp
    │   │   │   ├── torch_slice_scatter.cpp
    │   │   │   ├── torch_split.cpp
    │   │   │   ├── torch_squeeze.cpp
    │   │   │   ├── torch_stack.cpp
    │   │   │   ├── torch_std.cpp
    │   │   │   ├── torch_stft.cpp
    │   │   │   ├── torch_sum.cpp
    │   │   │   ├── torch_t.cpp
    │   │   │   ├── torch_tensor_split.cpp
    │   │   │   ├── torch_tile.cpp
    │   │   │   ├── torch_topk.cpp
    │   │   │   ├── torch_transpose.cpp
    │   │   │   ├── torch_unbind.cpp
    │   │   │   ├── torch_unsqueeze.cpp
    │   │   │   ├── torch_var.cpp
    │   │   │   ├── torch_view_as_complex.cpp
    │   │   │   ├── torch_view_as_real.cpp
    │   │   │   ├── torch_where.cpp
    │   │   │   ├── torch_zeros.cpp
    │   │   │   ├── torch_zeros_like.cpp
    │   │   │   ├── torchaudio_F_inverse_spectrogram.cpp
    │   │   │   └── torchaudio_F_spectrogram.cpp
    │   │   ├── pass_level2.cpp
    │   │   ├── pass_level2.h
    │   │   ├── pass_level3/
    │   │   │   ├── assign_unique_name.cpp
    │   │   │   ├── assign_unique_name.h
    │   │   │   ├── eliminate_noop_math.cpp
    │   │   │   ├── eliminate_noop_math.h
    │   │   │   ├── eliminate_squeeze_unsqueeze_pair.cpp
    │   │   │   ├── eliminate_squeeze_unsqueeze_pair.h
    │   │   │   ├── eliminate_tuple_pair.cpp
    │   │   │   ├── eliminate_tuple_pair.h
    │   │   │   ├── expand_quantization_modules.cpp
    │   │   │   ├── expand_quantization_modules.h
    │   │   │   ├── fuse_dynamic_adaptive_pool.cpp
    │   │   │   ├── fuse_dynamic_adaptive_pool.h
    │   │   │   ├── fuse_einsum_operands.cpp
    │   │   │   ├── fuse_einsum_operands.h
    │   │   │   ├── fuse_expression.cpp
    │   │   │   ├── fuse_expression.h
    │   │   │   ├── fuse_index_expression.cpp
    │   │   │   ├── fuse_index_expression.h
    │   │   │   ├── fuse_maxpool_unpack.cpp
    │   │   │   ├── fuse_maxpool_unpack.h
    │   │   │   ├── fuse_multiheadattention_unpack.cpp
    │   │   │   ├── fuse_multiheadattention_unpack.h
    │   │   │   ├── fuse_op1ton_unpack.cpp
    │   │   │   ├── fuse_op1ton_unpack.h
    │   │   │   ├── fuse_opnto1_tensors.cpp
    │   │   │   ├── fuse_opnto1_tensors.h
    │   │   │   ├── fuse_rnn_unpack.cpp
    │   │   │   ├── fuse_rnn_unpack.h
    │   │   │   ├── rename_F_dropoutnd.cpp
    │   │   │   └── rename_F_dropoutnd.h
    │   │   ├── pass_level3.cpp
    │   │   ├── pass_level3.h
    │   │   ├── pass_level4/
    │   │   │   ├── attribute_pooling.cpp
    │   │   │   ├── attribute_pooling.h
    │   │   │   ├── canonicalize.cpp
    │   │   │   ├── canonicalize.h
    │   │   │   ├── dead_code_elimination.cpp
    │   │   │   ├── dead_code_elimination.h
    │   │   │   ├── fuse_custom_op.cpp
    │   │   │   └── fuse_custom_op.h
    │   │   ├── pass_level4.cpp
    │   │   ├── pass_level4.h
    │   │   ├── pass_level5/
    │   │   │   ├── attribute_unpooling.cpp
    │   │   │   ├── attribute_unpooling.h
    │   │   │   ├── eliminate_dropout.cpp
    │   │   │   ├── eliminate_dropout.h
    │   │   │   ├── eliminate_identity_operator.cpp
    │   │   │   ├── eliminate_identity_operator.h
    │   │   │   ├── eliminate_maxpool_indices.cpp
    │   │   │   ├── eliminate_maxpool_indices.h
    │   │   │   ├── eliminate_noop_cat.cpp
    │   │   │   ├── eliminate_noop_cat.h
    │   │   │   ├── eliminate_noop_einsum.cpp
    │   │   │   ├── eliminate_noop_einsum.h
    │   │   │   ├── eliminate_noop_expand.cpp
    │   │   │   ├── eliminate_noop_expand.h
    │   │   │   ├── eliminate_noop_expression.cpp
    │   │   │   ├── eliminate_noop_expression.h
    │   │   │   ├── eliminate_noop_pad.cpp
    │   │   │   ├── eliminate_noop_pad.h
    │   │   │   ├── eliminate_noop_permute.cpp
    │   │   │   ├── eliminate_noop_permute.h
    │   │   │   ├── eliminate_noop_reshape.cpp
    │   │   │   ├── eliminate_noop_reshape.h
    │   │   │   ├── eliminate_noop_slice.cpp
    │   │   │   ├── eliminate_noop_slice.h
    │   │   │   ├── eliminate_noop_upsample.cpp
    │   │   │   ├── eliminate_noop_upsample.h
    │   │   │   ├── eliminate_reshape_shape_expression.cpp
    │   │   │   ├── eliminate_reshape_shape_expression.h
    │   │   │   ├── eliminate_type_as.cpp
    │   │   │   ├── eliminate_type_as.h
    │   │   │   ├── eval_expression.cpp
    │   │   │   ├── eval_expression.h
    │   │   │   ├── fold_constants.cpp
    │   │   │   ├── fold_constants.h
    │   │   │   ├── fuse_adjacent_permute.cpp
    │   │   │   ├── fuse_adjacent_permute.h
    │   │   │   ├── fuse_adjacent_reshape.cpp
    │   │   │   ├── fuse_adjacent_reshape.h
    │   │   │   ├── fuse_channel_shuffle.cpp
    │   │   │   ├── fuse_channel_shuffle.h
    │   │   │   ├── fuse_constant_expression.cpp
    │   │   │   ├── fuse_constant_expression.h
    │   │   │   ├── fuse_conv1d_batchnorm1d.cpp
    │   │   │   ├── fuse_conv1d_batchnorm1d.h
    │   │   │   ├── fuse_conv2d_batchnorm2d.cpp
    │   │   │   ├── fuse_conv2d_batchnorm2d.h
    │   │   │   ├── fuse_conv3d_batchnorm3d.cpp
    │   │   │   ├── fuse_conv3d_batchnorm3d.h
    │   │   │   ├── fuse_convtranspose1d_batchnorm1d.cpp
    │   │   │   ├── fuse_convtranspose1d_batchnorm1d.h
    │   │   │   ├── fuse_convtranspose2d_batchnorm2d.cpp
    │   │   │   ├── fuse_convtranspose2d_batchnorm2d.h
    │   │   │   ├── fuse_convtranspose3d_batchnorm3d.cpp
    │   │   │   ├── fuse_convtranspose3d_batchnorm3d.h
    │   │   │   ├── fuse_layernorm.cpp
    │   │   │   ├── fuse_layernorm.h
    │   │   │   ├── fuse_linear_batchnorm1d.cpp
    │   │   │   ├── fuse_linear_batchnorm1d.h
    │   │   │   ├── fuse_multiheadattention.cpp
    │   │   │   ├── fuse_multiheadattention.h
    │   │   │   ├── fuse_multiheadattention_sameqkv.cpp
    │   │   │   ├── fuse_multiheadattention_sameqkv.h
    │   │   │   ├── fuse_pad_conv1d.cpp
    │   │   │   ├── fuse_pad_conv1d.h
    │   │   │   ├── fuse_pad_conv2d.cpp
    │   │   │   ├── fuse_pad_conv2d.h
    │   │   │   ├── fuse_pixel_shuffle.cpp
    │   │   │   ├── fuse_pixel_shuffle.h
    │   │   │   ├── fuse_pixel_unshuffle.cpp
    │   │   │   ├── fuse_pixel_unshuffle.h
    │   │   │   ├── fuse_rmsnorm.cpp
    │   │   │   ├── fuse_rmsnorm.h
    │   │   │   ├── fuse_scaled_dot_product_attention.cpp
    │   │   │   ├── fuse_scaled_dot_product_attention.h
    │   │   │   ├── fuse_select_to_unbind.cpp
    │   │   │   ├── fuse_select_to_unbind.h
    │   │   │   ├── fuse_silu.cpp
    │   │   │   ├── fuse_silu.h
    │   │   │   ├── fuse_slice_copy.cpp
    │   │   │   ├── fuse_slice_copy.h
    │   │   │   ├── fuse_slice_indices.cpp
    │   │   │   ├── fuse_slice_indices.h
    │   │   │   ├── fuse_slice_squeeze_to_select.cpp
    │   │   │   ├── fuse_slice_squeeze_to_select.h
    │   │   │   ├── fuse_slice_to_tensor_split.cpp
    │   │   │   ├── fuse_slice_to_tensor_split.h
    │   │   │   ├── fuse_static_batchnorm.cpp
    │   │   │   ├── fuse_static_batchnorm.h
    │   │   │   ├── fuse_static_conv.cpp
    │   │   │   ├── fuse_static_conv.h
    │   │   │   ├── fuse_static_convtranspose.cpp
    │   │   │   ├── fuse_static_convtranspose.h
    │   │   │   ├── fuse_static_embedding.cpp
    │   │   │   ├── fuse_static_embedding.h
    │   │   │   ├── fuse_static_groupnorm.cpp
    │   │   │   ├── fuse_static_groupnorm.h
    │   │   │   ├── fuse_static_instancenorm.cpp
    │   │   │   ├── fuse_static_instancenorm.h
    │   │   │   ├── fuse_static_layernorm.cpp
    │   │   │   ├── fuse_static_layernorm.h
    │   │   │   ├── fuse_static_linear.cpp
    │   │   │   ├── fuse_static_linear.h
    │   │   │   ├── fuse_static_prelu.cpp
    │   │   │   ├── fuse_static_prelu.h
    │   │   │   ├── fuse_static_rmsnorm.cpp
    │   │   │   ├── fuse_static_rmsnorm.h
    │   │   │   ├── fuse_transformers_multiheadattention.cpp
    │   │   │   ├── fuse_transformers_multiheadattention.h
    │   │   │   ├── fuse_transformers_scaled_dot_product_attention.cpp
    │   │   │   ├── fuse_transformers_scaled_dot_product_attention.h
    │   │   │   ├── normalize_einsum_equation.cpp
    │   │   │   ├── normalize_einsum_equation.h
    │   │   │   ├── unroll_rnn_op.cpp
    │   │   │   └── unroll_rnn_op.h
    │   │   ├── pass_level5.cpp
    │   │   ├── pass_level5.h
    │   │   ├── pass_ncnn/
    │   │   │   ├── F_adaptive_avg_pool1d.cpp
    │   │   │   ├── F_adaptive_avg_pool2d.cpp
    │   │   │   ├── F_adaptive_avg_pool3d.cpp
    │   │   │   ├── F_adaptive_max_pool1d.cpp
    │   │   │   ├── F_adaptive_max_pool2d.cpp
    │   │   │   ├── F_adaptive_max_pool3d.cpp
    │   │   │   ├── F_avg_pool1d.cpp
    │   │   │   ├── F_avg_pool2d.cpp
    │   │   │   ├── F_avg_pool3d.cpp
    │   │   │   ├── F_batch_norm.cpp
    │   │   │   ├── F_celu.cpp
    │   │   │   ├── F_conv1d.cpp
    │   │   │   ├── F_conv2d.cpp
    │   │   │   ├── F_conv3d.cpp
    │   │   │   ├── F_conv_transpose1d.cpp
    │   │   │   ├── F_conv_transpose2d.cpp
    │   │   │   ├── F_conv_transpose3d.cpp
    │   │   │   ├── F_elu.cpp
    │   │   │   ├── F_embedding.cpp
    │   │   │   ├── F_fold.cpp
    │   │   │   ├── F_gelu.cpp
    │   │   │   ├── F_glu.cpp
    │   │   │   ├── F_grid_sample.cpp
    │   │   │   ├── F_group_norm.cpp
    │   │   │   ├── F_hardshrink.cpp
    │   │   │   ├── F_hardsigmoid.cpp
    │   │   │   ├── F_hardswish.cpp
    │   │   │   ├── F_hardtanh.cpp
    │   │   │   ├── F_instance_norm.cpp
    │   │   │   ├── F_interpolate.cpp
    │   │   │   ├── F_layer_norm.cpp
    │   │   │   ├── F_leaky_relu.cpp
    │   │   │   ├── F_linear.cpp
    │   │   │   ├── F_local_response_norm.cpp
    │   │   │   ├── F_log_softmax.cpp
    │   │   │   ├── F_logsigmoid.cpp
    │   │   │   ├── F_max_pool1d.cpp
    │   │   │   ├── F_max_pool2d.cpp
    │   │   │   ├── F_max_pool3d.cpp
    │   │   │   ├── F_mish.cpp
    │   │   │   ├── F_normalize.cpp
    │   │   │   ├── F_pad.cpp
    │   │   │   ├── F_pixel_shuffle.cpp
    │   │   │   ├── F_pixel_unshuffle.cpp
    │   │   │   ├── F_prelu.cpp
    │   │   │   ├── F_relu.cpp
    │   │   │   ├── F_relu6.cpp
    │   │   │   ├── F_rms_norm.cpp
    │   │   │   ├── F_scaled_dot_product_attention.cpp
    │   │   │   ├── F_selu.cpp
    │   │   │   ├── F_sigmoid.cpp
    │   │   │   ├── F_silu.cpp
    │   │   │   ├── F_softmax.cpp
    │   │   │   ├── F_softplus.cpp
    │   │   │   ├── F_softshrink.cpp
    │   │   │   ├── F_tanh.cpp
    │   │   │   ├── F_unfold.cpp
    │   │   │   ├── F_upsample.cpp
    │   │   │   ├── F_upsample_bilinear.cpp
    │   │   │   ├── F_upsample_nearest.cpp
    │   │   │   ├── Tensor_expand.cpp
    │   │   │   ├── Tensor_permute.cpp
    │   │   │   ├── Tensor_repeat.cpp
    │   │   │   ├── Tensor_reshape.cpp
    │   │   │   ├── Tensor_reshape_as.cpp
    │   │   │   ├── Tensor_unflatten.cpp
    │   │   │   ├── chain_multi_output.cpp
    │   │   │   ├── chain_multi_output.h
    │   │   │   ├── convert_Tensor_select.cpp
    │   │   │   ├── convert_Tensor_select.h
    │   │   │   ├── convert_Tensor_slice.cpp
    │   │   │   ├── convert_Tensor_slice.h
    │   │   │   ├── convert_Tensor_slice_copy.cpp
    │   │   │   ├── convert_Tensor_slice_copy.h
    │   │   │   ├── convert_attribute.cpp
    │   │   │   ├── convert_attribute.h
    │   │   │   ├── convert_custom_op.cpp
    │   │   │   ├── convert_custom_op.h
    │   │   │   ├── convert_half_to_float.cpp
    │   │   │   ├── convert_half_to_float.h
    │   │   │   ├── convert_input.cpp
    │   │   │   ├── convert_input.h
    │   │   │   ├── convert_module_op.cpp
    │   │   │   ├── convert_module_op.h
    │   │   │   ├── convert_reshape_interp_expression.cpp
    │   │   │   ├── convert_reshape_interp_expression.h
    │   │   │   ├── convert_slice_expression.cpp
    │   │   │   ├── convert_slice_expression.h
    │   │   │   ├── convert_torch_cat.cpp
    │   │   │   ├── convert_torch_cat.h
    │   │   │   ├── convert_torch_chunk.cpp
    │   │   │   ├── convert_torch_chunk.h
    │   │   │   ├── convert_torch_einsum.cpp
    │   │   │   ├── convert_torch_einsum.h
    │   │   │   ├── convert_torch_split.cpp
    │   │   │   ├── convert_torch_split.h
    │   │   │   ├── convert_torch_stack.cpp
    │   │   │   ├── convert_torch_stack.h
    │   │   │   ├── convert_torch_tensor_split.cpp
    │   │   │   ├── convert_torch_tensor_split.h
    │   │   │   ├── convert_torch_unbind.cpp
    │   │   │   ├── convert_torch_unbind.h
    │   │   │   ├── eliminate_noop.cpp
    │   │   │   ├── eliminate_noop.h
    │   │   │   ├── eliminate_output.cpp
    │   │   │   ├── eliminate_output.h
    │   │   │   ├── expand_expression.cpp
    │   │   │   ├── expand_expression.h
    │   │   │   ├── fuse_binaryop_eltwise.cpp
    │   │   │   ├── fuse_binaryop_eltwise.h
    │   │   │   ├── fuse_convert_rotaryembed.cpp
    │   │   │   ├── fuse_convert_rotaryembed.h
    │   │   │   ├── fuse_convert_shufflechannel_slice.cpp
    │   │   │   ├── fuse_convert_shufflechannel_slice.h
    │   │   │   ├── fuse_convolution1d_activation.cpp
    │   │   │   ├── fuse_convolution1d_activation.h
    │   │   │   ├── fuse_convolution_activation.cpp
    │   │   │   ├── fuse_convolution_activation.h
    │   │   │   ├── fuse_convolutiondepthwise1d_activation.cpp
    │   │   │   ├── fuse_convolutiondepthwise1d_activation.h
    │   │   │   ├── fuse_convolutiondepthwise_activation.cpp
    │   │   │   ├── fuse_convolutiondepthwise_activation.h
    │   │   │   ├── fuse_deconvolution_activation.cpp
    │   │   │   ├── fuse_deconvolution_activation.h
    │   │   │   ├── fuse_deconvolutiondepthwise_activation.cpp
    │   │   │   ├── fuse_deconvolutiondepthwise_activation.h
    │   │   │   ├── fuse_innerproduct_activation.cpp
    │   │   │   ├── fuse_innerproduct_activation.h
    │   │   │   ├── fuse_padding_convolution.cpp
    │   │   │   ├── fuse_padding_convolution.h
    │   │   │   ├── fuse_padding_convolutiondepthwise.cpp
    │   │   │   ├── fuse_padding_convolutiondepthwise.h
    │   │   │   ├── fuse_transpose_matmul.cpp
    │   │   │   ├── fuse_transpose_matmul.h
    │   │   │   ├── insert_reshape_global_pooling.cpp
    │   │   │   ├── insert_reshape_global_pooling.h
    │   │   │   ├── insert_reshape_linear.cpp
    │   │   │   ├── insert_reshape_linear.h
    │   │   │   ├── insert_reshape_numpy_binaryop_broadcast.cpp
    │   │   │   ├── insert_reshape_numpy_binaryop_broadcast.h
    │   │   │   ├── insert_reshape_pooling.cpp
    │   │   │   ├── insert_reshape_pooling.h
    │   │   │   ├── insert_split.cpp
    │   │   │   ├── insert_split.h
    │   │   │   ├── nn_AdaptiveAvgPool1d.cpp
    │   │   │   ├── nn_AdaptiveAvgPool2d.cpp
    │   │   │   ├── nn_AdaptiveAvgPool3d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool1d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool2d.cpp
    │   │   │   ├── nn_AdaptiveMaxPool3d.cpp
    │   │   │   ├── nn_AvgPool1d.cpp
    │   │   │   ├── nn_AvgPool2d.cpp
    │   │   │   ├── nn_AvgPool3d.cpp
    │   │   │   ├── nn_BatchNorm1d.cpp
    │   │   │   ├── nn_BatchNorm2d.cpp
    │   │   │   ├── nn_BatchNorm3d.cpp
    │   │   │   ├── nn_CELU.cpp
    │   │   │   ├── nn_ChannelShuffle.cpp
    │   │   │   ├── nn_ConstantPad1d.cpp
    │   │   │   ├── nn_ConstantPad2d.cpp
    │   │   │   ├── nn_ConstantPad3d.cpp
    │   │   │   ├── nn_Conv1d.cpp
    │   │   │   ├── nn_Conv2d.cpp
    │   │   │   ├── nn_Conv3d.cpp
    │   │   │   ├── nn_ConvTranspose1d.cpp
    │   │   │   ├── nn_ConvTranspose2d.cpp
    │   │   │   ├── nn_ConvTranspose3d.cpp
    │   │   │   ├── nn_ELU.cpp
    │   │   │   ├── nn_Embedding.cpp
    │   │   │   ├── nn_Fold.cpp
    │   │   │   ├── nn_GELU.cpp
    │   │   │   ├── nn_GLU.cpp
    │   │   │   ├── nn_GRU.cpp
    │   │   │   ├── nn_GroupNorm.cpp
    │   │   │   ├── nn_Hardshrink.cpp
    │   │   │   ├── nn_Hardsigmoid.cpp
    │   │   │   ├── nn_Hardswish.cpp
    │   │   │   ├── nn_Hardtanh.cpp
    │   │   │   ├── nn_InstanceNorm2d.cpp
    │   │   │   ├── nn_LSTM.cpp
    │   │   │   ├── nn_LayerNorm.cpp
    │   │   │   ├── nn_LeakyReLU.cpp
    │   │   │   ├── nn_Linear.cpp
    │   │   │   ├── nn_LocalResponseNorm.cpp
    │   │   │   ├── nn_LogSigmoid.cpp
    │   │   │   ├── nn_LogSoftmax.cpp
    │   │   │   ├── nn_MaxPool1d.cpp
    │   │   │   ├── nn_MaxPool2d.cpp
    │   │   │   ├── nn_MaxPool3d.cpp
    │   │   │   ├── nn_Mish.cpp
    │   │   │   ├── nn_MultiheadAttention.cpp
    │   │   │   ├── nn_PReLU.cpp
    │   │   │   ├── nn_PixelShuffle.cpp
    │   │   │   ├── nn_PixelUnshuffle.cpp
    │   │   │   ├── nn_RMSNorm.cpp
    │   │   │   ├── nn_RNN.cpp
    │   │   │   ├── nn_ReLU.cpp
    │   │   │   ├── nn_ReLU6.cpp
    │   │   │   ├── nn_ReflectionPad1d.cpp
    │   │   │   ├── nn_ReflectionPad2d.cpp
    │   │   │   ├── nn_ReplicationPad1d.cpp
    │   │   │   ├── nn_ReplicationPad2d.cpp
    │   │   │   ├── nn_ReplicationPad3d.cpp
    │   │   │   ├── nn_SELU.cpp
    │   │   │   ├── nn_SiLU.cpp
    │   │   │   ├── nn_Sigmoid.cpp
    │   │   │   ├── nn_Softmax.cpp
    │   │   │   ├── nn_Softmax2d.cpp
    │   │   │   ├── nn_Softplus.cpp
    │   │   │   ├── nn_Softshrink.cpp
    │   │   │   ├── nn_Tanh.cpp
    │   │   │   ├── nn_Unfold.cpp
    │   │   │   ├── nn_Upsample.cpp
    │   │   │   ├── nn_UpsamplingBilinear2d.cpp
    │   │   │   ├── nn_UpsamplingNearest2d.cpp
    │   │   │   ├── nn_ZeroPad2d.cpp
    │   │   │   ├── solve_batch_index.cpp
    │   │   │   ├── solve_batch_index.h
    │   │   │   ├── torch_addmm.cpp
    │   │   │   ├── torch_amax.cpp
    │   │   │   ├── torch_amin.cpp
    │   │   │   ├── torch_bmm.cpp
    │   │   │   ├── torch_clamp.cpp
    │   │   │   ├── torch_clone.cpp
    │   │   │   ├── torch_cumsum.cpp
    │   │   │   ├── torch_diag.cpp
    │   │   │   ├── torch_flatten.cpp
    │   │   │   ├── torch_flip.cpp
    │   │   │   ├── torch_istft.cpp
    │   │   │   ├── torch_logsumexp.cpp
    │   │   │   ├── torch_matmul.cpp
    │   │   │   ├── torch_max.cpp
    │   │   │   ├── torch_mean.cpp
    │   │   │   ├── torch_min.cpp
    │   │   │   ├── torch_mm.cpp
    │   │   │   ├── torch_norm.cpp
    │   │   │   ├── torch_prod.cpp
    │   │   │   ├── torch_roll.cpp
    │   │   │   ├── torch_slice_scatter.cpp
    │   │   │   ├── torch_squeeze.cpp
    │   │   │   ├── torch_stft.cpp
    │   │   │   ├── torch_sum.cpp
    │   │   │   ├── torch_t.cpp
    │   │   │   ├── torch_transpose.cpp
    │   │   │   ├── torch_unsqueeze.cpp
    │   │   │   ├── torchaudio_F_inverse_spectrogram.cpp
    │   │   │   ├── torchaudio_F_spectrogram.cpp
    │   │   │   └── torchvision_DeformConv2d.cpp
    │   │   ├── pass_ncnn.cpp
    │   │   ├── pass_ncnn.h
    │   │   ├── pass_onnx/
    │   │   │   ├── canonicalize.cpp
    │   │   │   ├── canonicalize.h
    │   │   │   ├── dead_code_elimination.cpp
    │   │   │   ├── dead_code_elimination.h
    │   │   │   ├── eliminate_initializer_input.cpp
    │   │   │   ├── eliminate_initializer_input.h
    │   │   │   ├── eliminate_noop.cpp
    │   │   │   ├── eliminate_noop.h
    │   │   │   ├── fold_constants.cpp
    │   │   │   ├── fold_constants.h
    │   │   │   ├── fuse_constant_as_attribute.cpp
    │   │   │   ├── fuse_constant_as_attribute.h
    │   │   │   ├── inline_containers.cpp
    │   │   │   ├── inline_containers.h
    │   │   │   ├── inline_if_graph.cpp
    │   │   │   ├── inline_if_graph.h
    │   │   │   ├── model_stat.cpp
    │   │   │   ├── model_stat.h
    │   │   │   ├── shape_inference.cpp
    │   │   │   └── shape_inference.h
    │   │   ├── pass_onnx.cpp
    │   │   ├── pass_onnx.h
    │   │   ├── pass_tnn/
    │   │   │   ├── fuse_shape_list_construct.cpp
    │   │   │   ├── fuse_shape_list_construct.h
    │   │   │   ├── fuse_shape_size.cpp
    │   │   │   ├── fuse_shape_size.h
    │   │   │   ├── lower_concat.cpp
    │   │   │   ├── lower_concat.h
    │   │   │   ├── lower_convolution_activation.cpp
    │   │   │   ├── lower_convolution_activation.h
    │   │   │   ├── lower_power.cpp
    │   │   │   └── lower_power.h
    │   │   ├── save_ncnn.cpp
    │   │   ├── save_ncnn.h
    │   │   ├── save_onnx.cpp
    │   │   ├── save_onnx.h
    │   │   ├── storezip.cpp
    │   │   ├── storezip.h
    │   │   ├── utils.cpp
    │   │   └── utils.h
    │   └── tests/
    │       ├── CMakeLists.txt
    │       ├── ncnn/
    │       │   ├── CMakeLists.txt
    │       │   ├── test_F_adaptive_avg_pool1d.py
    │       │   ├── test_F_adaptive_avg_pool2d.py
    │       │   ├── test_F_adaptive_avg_pool3d.py
    │       │   ├── test_F_adaptive_max_pool1d.py
    │       │   ├── test_F_adaptive_max_pool2d.py
    │       │   ├── test_F_adaptive_max_pool3d.py
    │       │   ├── test_F_alpha_dropout.py
    │       │   ├── test_F_avg_pool1d.py
    │       │   ├── test_F_avg_pool2d.py
    │       │   ├── test_F_avg_pool3d.py
    │       │   ├── test_F_batch_norm.py
    │       │   ├── test_F_celu.py
    │       │   ├── test_F_conv1d.py
    │       │   ├── test_F_conv2d.py
    │       │   ├── test_F_conv3d.py
    │       │   ├── test_F_conv_transpose1d.py
    │       │   ├── test_F_conv_transpose2d.py
    │       │   ├── test_F_conv_transpose3d.py
    │       │   ├── test_F_dropout.py
    │       │   ├── test_F_dropout2d.py
    │       │   ├── test_F_dropout3d.py
    │       │   ├── test_F_elu.py
    │       │   ├── test_F_embedding.py
    │       │   ├── test_F_feature_alpha_dropout.py
    │       │   ├── test_F_fold.py
    │       │   ├── test_F_gelu.py
    │       │   ├── test_F_glu.py
    │       │   ├── test_F_grid_sample.py
    │       │   ├── test_F_group_norm.py
    │       │   ├── test_F_hardshrink.py
    │       │   ├── test_F_hardsigmoid.py
    │       │   ├── test_F_hardswish.py
    │       │   ├── test_F_hardtanh.py
    │       │   ├── test_F_interpolate.py
    │       │   ├── test_F_layer_norm.py
    │       │   ├── test_F_leaky_relu.py
    │       │   ├── test_F_local_response_norm.py
    │       │   ├── test_F_log_softmax.py
    │       │   ├── test_F_logsigmoid.py
    │       │   ├── test_F_max_pool1d.py
    │       │   ├── test_F_max_pool2d.py
    │       │   ├── test_F_max_pool3d.py
    │       │   ├── test_F_mish.py
    │       │   ├── test_F_normalize.py
    │       │   ├── test_F_pad.py
    │       │   ├── test_F_pixel_shuffle.py
    │       │   ├── test_F_pixel_unshuffle.py
    │       │   ├── test_F_prelu.py
    │       │   ├── test_F_relu.py
    │       │   ├── test_F_relu6.py
    │       │   ├── test_F_rms_norm.py
    │       │   ├── test_F_scaled_dot_product_attention.py
    │       │   ├── test_F_selu.py
    │       │   ├── test_F_sigmoid.py
    │       │   ├── test_F_silu.py
    │       │   ├── test_F_softmax.py
    │       │   ├── test_F_softshrink.py
    │       │   ├── test_F_tanh.py
    │       │   ├── test_F_unfold.py
    │       │   ├── test_F_upsample.py
    │       │   ├── test_F_upsample_bilinear.py
    │       │   ├── test_F_upsample_nearest.py
    │       │   ├── test_Tensor_expand.py
    │       │   ├── test_Tensor_permute.py
    │       │   ├── test_Tensor_repeat.py
    │       │   ├── test_Tensor_reshape.py
    │       │   ├── test_Tensor_reshape_as.py
    │       │   ├── test_Tensor_slice.py
    │       │   ├── test_Tensor_slice_copy.py
    │       │   ├── test_Tensor_unflatten.py
    │       │   ├── test_Tensor_view.py
    │       │   ├── test_convnext_tiny.py
    │       │   ├── test_mobilenet_v2.py
    │       │   ├── test_mobilenet_v3_small.py
    │       │   ├── test_ncnn_fuse_binaryop_eltwise.py
    │       │   ├── test_ncnn_fuse_pad_conv.py
    │       │   ├── test_ncnn_fuse_shufflechannel_slice.py
    │       │   ├── test_ncnn_fuse_transpose_matmul.py
    │       │   ├── test_ncnn_interp_expr.py
    │       │   ├── test_ncnn_numpy_binaryop_broadcast.py
    │       │   ├── test_ncnn_reshape_expr.py
    │       │   ├── test_ncnn_slice_expr.py
    │       │   ├── test_ncnn_solve_batch_index.py
    │       │   ├── test_nn_AdaptiveAvgPool1d.py
    │       │   ├── test_nn_AdaptiveAvgPool2d.py
    │       │   ├── test_nn_AdaptiveAvgPool3d.py
    │       │   ├── test_nn_AdaptiveMaxPool1d.py
    │       │   ├── test_nn_AdaptiveMaxPool2d.py
    │       │   ├── test_nn_AdaptiveMaxPool3d.py
    │       │   ├── test_nn_AlphaDropout.py
    │       │   ├── test_nn_AvgPool1d.py
    │       │   ├── test_nn_AvgPool2d.py
    │       │   ├── test_nn_AvgPool3d.py
    │       │   ├── test_nn_BatchNorm1d.py
    │       │   ├── test_nn_BatchNorm2d.py
    │       │   ├── test_nn_BatchNorm3d.py
    │       │   ├── test_nn_CELU.py
    │       │   ├── test_nn_ChannelShuffle.py
    │       │   ├── test_nn_ConstantPad1d.py
    │       │   ├── test_nn_ConstantPad2d.py
    │       │   ├── test_nn_ConstantPad3d.py
    │       │   ├── test_nn_Conv1d.py
    │       │   ├── test_nn_Conv2d.py
    │       │   ├── test_nn_Conv3d.py
    │       │   ├── test_nn_ConvTranspose1d.py
    │       │   ├── test_nn_ConvTranspose2d.py
    │       │   ├── test_nn_ConvTranspose3d.py
    │       │   ├── test_nn_Dropout.py
    │       │   ├── test_nn_Dropout2d.py
    │       │   ├── test_nn_Dropout3d.py
    │       │   ├── test_nn_ELU.py
    │       │   ├── test_nn_Embedding.py
    │       │   ├── test_nn_Fold.py
    │       │   ├── test_nn_GELU.py
    │       │   ├── test_nn_GLU.py
    │       │   ├── test_nn_GRU.py
    │       │   ├── test_nn_GroupNorm.py
    │       │   ├── test_nn_Hardshrink.py
    │       │   ├── test_nn_Hardsigmoid.py
    │       │   ├── test_nn_Hardswish.py
    │       │   ├── test_nn_Hardtanh.py
    │       │   ├── test_nn_Identity.py
    │       │   ├── test_nn_InstanceNorm2d.py
    │       │   ├── test_nn_LSTM.py
    │       │   ├── test_nn_LayerNorm.py
    │       │   ├── test_nn_LeakyReLU.py
    │       │   ├── test_nn_Linear.py
    │       │   ├── test_nn_LocalResponseNorm.py
    │       │   ├── test_nn_LogSigmoid.py
    │       │   ├── test_nn_LogSoftmax.py
    │       │   ├── test_nn_MaxPool1d.py
    │       │   ├── test_nn_MaxPool2d.py
    │       │   ├── test_nn_MaxPool3d.py
    │       │   ├── test_nn_Mish.py
    │       │   ├── test_nn_MultiheadAttention.py
    │       │   ├── test_nn_PReLU.py
    │       │   ├── test_nn_PixelShuffle.py
    │       │   ├── test_nn_PixelUnshuffle.py
    │       │   ├── test_nn_RMSNorm.py
    │       │   ├── test_nn_RNN.py
    │       │   ├── test_nn_ReLU.py
    │       │   ├── test_nn_ReLU6.py
    │       │   ├── test_nn_ReflectionPad1d.py
    │       │   ├── test_nn_ReflectionPad2d.py
    │       │   ├── test_nn_ReplicationPad1d.py
    │       │   ├── test_nn_ReplicationPad2d.py
    │       │   ├── test_nn_ReplicationPad3d.py
    │       │   ├── test_nn_SELU.py
    │       │   ├── test_nn_SiLU.py
    │       │   ├── test_nn_Sigmoid.py
    │       │   ├── test_nn_Softmax.py
    │       │   ├── test_nn_Softmax2d.py
    │       │   ├── test_nn_Softshrink.py
    │       │   ├── test_nn_Tanh.py
    │       │   ├── test_nn_Unfold.py
    │       │   ├── test_nn_Upsample.py
    │       │   ├── test_nn_UpsamplingBilinear2d.py
    │       │   ├── test_nn_UpsamplingNearest2d.py
    │       │   ├── test_nn_ZeroPad2d.py
    │       │   ├── test_resnet18.py
    │       │   ├── test_shufflenet_v2_x1_0.py
    │       │   ├── test_squeezenet1_1.py
    │       │   ├── test_torch_abs.py
    │       │   ├── test_torch_acos.py
    │       │   ├── test_torch_addmm.py
    │       │   ├── test_torch_amax.py
    │       │   ├── test_torch_amin.py
    │       │   ├── test_torch_asin.py
    │       │   ├── test_torch_atan.py
    │       │   ├── test_torch_atan2.py
    │       │   ├── test_torch_bmm.py
    │       │   ├── test_torch_cat.py
    │       │   ├── test_torch_ceil.py
    │       │   ├── test_torch_chunk.py
    │       │   ├── test_torch_clamp.py
    │       │   ├── test_torch_clone.py
    │       │   ├── test_torch_cos.py
    │       │   ├── test_torch_cumsum.py
    │       │   ├── test_torch_diag.py
    │       │   ├── test_torch_einsum.py
    │       │   ├── test_torch_exp.py
    │       │   ├── test_torch_flatten.py
    │       │   ├── test_torch_flip.py
    │       │   ├── test_torch_floor.py
    │       │   ├── test_torch_istft.py
    │       │   ├── test_torch_log.py
    │       │   ├── test_torch_log10.py
    │       │   ├── test_torch_logsumexp.py
    │       │   ├── test_torch_matmul.py
    │       │   ├── test_torch_max.py
    │       │   ├── test_torch_maximum.py
    │       │   ├── test_torch_mean.py
    │       │   ├── test_torch_min.py
    │       │   ├── test_torch_minimum.py
    │       │   ├── test_torch_mm.py
    │       │   ├── test_torch_neg.py
    │       │   ├── test_torch_norm.py
    │       │   ├── test_torch_pow.py
    │       │   ├── test_torch_prod.py
    │       │   ├── test_torch_reciprocal.py
    │       │   ├── test_torch_roll.py
    │       │   ├── test_torch_round.py
    │       │   ├── test_torch_rsqrt.py
    │       │   ├── test_torch_sin.py
    │       │   ├── test_torch_slice_scatter.py
    │       │   ├── test_torch_sqrt.py
    │       │   ├── test_torch_square.py
    │       │   ├── test_torch_squeeze.py
    │       │   ├── test_torch_stack.py
    │       │   ├── test_torch_stft.py
    │       │   ├── test_torch_sum.py
    │       │   ├── test_torch_t.py
    │       │   ├── test_torch_tan.py
    │       │   ├── test_torch_tanh.py
    │       │   ├── test_torch_tensor_split.py
    │       │   ├── test_torch_transpose.py
    │       │   ├── test_torch_trunc.py
    │       │   ├── test_torch_unbind.py
    │       │   ├── test_torch_unsqueeze.py
    │       │   ├── test_torchaudio_F_inverse_spectrogram.py
    │       │   ├── test_torchaudio_F_spectrogram.py
    │       │   ├── test_torchaudio_InverseSpectrogram.py
    │       │   ├── test_torchaudio_Spectrogram.py
    │       │   ├── test_torchvision_DeformConv2d.py
    │       │   ├── test_transformers_deepseek_v3_attention.py
    │       │   ├── test_transformers_qwen2_attention.py
    │       │   ├── test_transformers_qwen3_attention.py
    │       │   └── test_vit_b_32.py
    │       ├── onnx/
    │       │   ├── CMakeLists.txt
    │       │   ├── test_F_adaptive_avg_pool1d.py
    │       │   ├── test_F_adaptive_avg_pool2d.py
    │       │   ├── test_F_adaptive_avg_pool3d.py
    │       │   ├── test_F_adaptive_max_pool1d.py
    │       │   ├── test_F_adaptive_max_pool2d.py
    │       │   ├── test_F_adaptive_max_pool3d.py
    │       │   ├── test_F_avg_pool1d.py
    │       │   ├── test_F_avg_pool2d.py
    │       │   ├── test_F_avg_pool3d.py
    │       │   ├── test_F_batch_norm.py
    │       │   ├── test_F_celu.py
    │       │   ├── test_F_conv1d.py
    │       │   ├── test_F_conv2d.py
    │       │   ├── test_F_conv3d.py
    │       │   ├── test_F_conv_transpose1d.py
    │       │   ├── test_F_conv_transpose2d.py
    │       │   ├── test_F_conv_transpose3d.py
    │       │   ├── test_F_elu.py
    │       │   ├── test_F_gelu.py
    │       │   ├── test_F_group_norm.py
    │       │   ├── test_F_hardshrink.py
    │       │   ├── test_F_hardsigmoid.py
    │       │   ├── test_F_hardswish.py
    │       │   ├── test_F_hardtanh.py
    │       │   ├── test_F_interpolate.py
    │       │   ├── test_F_layer_norm.py
    │       │   ├── test_F_leaky_relu.py
    │       │   ├── test_F_linear.py
    │       │   ├── test_F_local_response_norm.py
    │       │   ├── test_F_log_softmax.py
    │       │   ├── test_F_logsigmoid.py
    │       │   ├── test_F_max_pool1d.py
    │       │   ├── test_F_max_pool2d.py
    │       │   ├── test_F_max_pool3d.py
    │       │   ├── test_F_mish.py
    │       │   ├── test_F_normalize.py
    │       │   ├── test_F_pad.py
    │       │   ├── test_F_pixel_shuffle.py
    │       │   ├── test_F_pixel_unshuffle.py
    │       │   ├── test_F_prelu.py
    │       │   ├── test_F_relu.py
    │       │   ├── test_F_relu6.py
    │       │   ├── test_F_scaled_dot_product_attention.py
    │       │   ├── test_F_selu.py
    │       │   ├── test_F_sigmoid.py
    │       │   ├── test_F_silu.py
    │       │   ├── test_F_softmax.py
    │       │   ├── test_F_softmin.py
    │       │   ├── test_F_softplus.py
    │       │   ├── test_F_softshrink.py
    │       │   ├── test_F_softsign.py
    │       │   ├── test_F_tanh.py
    │       │   ├── test_F_tanhshrink.py
    │       │   ├── test_F_upsample.py
    │       │   ├── test_F_upsample_bilinear.py
    │       │   ├── test_F_upsample_nearest.py
    │       │   ├── test_Tensor_expand.py
    │       │   ├── test_Tensor_permute.py
    │       │   ├── test_Tensor_repeat.py
    │       │   ├── test_Tensor_reshape.py
    │       │   ├── test_Tensor_reshape_as.py
    │       │   ├── test_Tensor_select.py
    │       │   ├── test_Tensor_slice.py
    │       │   ├── test_Tensor_unflatten.py
    │       │   ├── test_Tensor_view.py
    │       │   ├── test_convnext_tiny.py
    │       │   ├── test_mobilenet_v2.py
    │       │   ├── test_mobilenet_v3_small.py
    │       │   ├── test_nn_AdaptiveAvgPool1d.py
    │       │   ├── test_nn_AdaptiveAvgPool2d.py
    │       │   ├── test_nn_AdaptiveAvgPool3d.py
    │       │   ├── test_nn_AdaptiveMaxPool1d.py
    │       │   ├── test_nn_AdaptiveMaxPool2d.py
    │       │   ├── test_nn_AdaptiveMaxPool3d.py
    │       │   ├── test_nn_AvgPool1d.py
    │       │   ├── test_nn_AvgPool2d.py
    │       │   ├── test_nn_AvgPool3d.py
    │       │   ├── test_nn_BatchNorm1d.py
    │       │   ├── test_nn_BatchNorm2d.py
    │       │   ├── test_nn_BatchNorm3d.py
    │       │   ├── test_nn_CELU.py
    │       │   ├── test_nn_ConstantPad1d.py
    │       │   ├── test_nn_ConstantPad2d.py
    │       │   ├── test_nn_ConstantPad3d.py
    │       │   ├── test_nn_Conv1d.py
    │       │   ├── test_nn_Conv2d.py
    │       │   ├── test_nn_Conv3d.py
    │       │   ├── test_nn_ConvTranspose1d.py
    │       │   ├── test_nn_ConvTranspose2d.py
    │       │   ├── test_nn_ConvTranspose3d.py
    │       │   ├── test_nn_ELU.py
    │       │   ├── test_nn_GELU.py
    │       │   ├── test_nn_GRU.py
    │       │   ├── test_nn_GroupNorm.py
    │       │   ├── test_nn_Hardshrink.py
    │       │   ├── test_nn_Hardsigmoid.py
    │       │   ├── test_nn_Hardswish.py
    │       │   ├── test_nn_Hardtanh.py
    │       │   ├── test_nn_InstanceNorm1d.py
    │       │   ├── test_nn_InstanceNorm2d.py
    │       │   ├── test_nn_InstanceNorm3d.py
    │       │   ├── test_nn_LSTM.py
    │       │   ├── test_nn_LayerNorm.py
    │       │   ├── test_nn_LeakyReLU.py
    │       │   ├── test_nn_Linear.py
    │       │   ├── test_nn_LocalResponseNorm.py
    │       │   ├── test_nn_LogSigmoid.py
    │       │   ├── test_nn_LogSoftmax.py
    │       │   ├── test_nn_MaxPool1d.py
    │       │   ├── test_nn_MaxPool2d.py
    │       │   ├── test_nn_MaxPool3d.py
    │       │   ├── test_nn_Mish.py
    │       │   ├── test_nn_MultiheadAttention.py
    │       │   ├── test_nn_PReLU.py
    │       │   ├── test_nn_PixelShuffle.py
    │       │   ├── test_nn_PixelUnshuffle.py
    │       │   ├── test_nn_RNN.py
    │       │   ├── test_nn_ReLU.py
    │       │   ├── test_nn_ReLU6.py
    │       │   ├── test_nn_ReflectionPad1d.py
    │       │   ├── test_nn_ReflectionPad2d.py
    │       │   ├── test_nn_ReplicationPad1d.py
    │       │   ├── test_nn_ReplicationPad2d.py
    │       │   ├── test_nn_ReplicationPad3d.py
    │       │   ├── test_nn_SELU.py
    │       │   ├── test_nn_SiLU.py
    │       │   ├── test_nn_Sigmoid.py
    │       │   ├── test_nn_Softmax.py
    │       │   ├── test_nn_Softmin.py
    │       │   ├── test_nn_Softplus.py
    │       │   ├── test_nn_Softshrink.py
    │       │   ├── test_nn_Softsign.py
    │       │   ├── test_nn_Tanh.py
    │       │   ├── test_nn_Tanhshrink.py
    │       │   ├── test_nn_Upsample.py
    │       │   ├── test_nn_UpsamplingBilinear2d.py
    │       │   ├── test_nn_UpsamplingNearest2d.py
    │       │   ├── test_nn_ZeroPad2d.py
    │       │   ├── test_onnx_activation_ops.py
    │       │   ├── test_onnx_conv_ops.py
    │       │   ├── test_onnx_dense_ops.py
    │       │   ├── test_onnx_fuse_channel_shuffle.py
    │       │   ├── test_onnx_fuse_pixel_shuffle.py
    │       │   ├── test_onnx_fuse_pixel_unshuffle.py
    │       │   ├── test_onnx_layout_ops.py
    │       │   ├── test_onnx_math_ops.py
    │       │   ├── test_onnx_normalize_ops.py
    │       │   ├── test_onnx_opset21_ops.py
    │       │   ├── test_onnx_pool_ops.py
    │       │   ├── test_onnx_reduce_ops.py
    │       │   ├── test_onnx_rnn_ops.py
    │       │   ├── test_resnet18.py
    │       │   ├── test_shufflenet_v2_x1_0.py
    │       │   ├── test_squeezenet1_1.py
    │       │   ├── test_swin_t.py
    │       │   ├── test_torch_cat.py
    │       │   ├── test_torch_ceil.py
    │       │   ├── test_torch_chunk.py
    │       │   ├── test_torch_clamp.py
    │       │   ├── test_torch_flatten.py
    │       │   ├── test_torch_flip.py
    │       │   ├── test_torch_floor.py
    │       │   ├── test_torch_logical_and.py
    │       │   ├── test_torch_logical_not.py
    │       │   ├── test_torch_logical_or.py
    │       │   ├── test_torch_logical_xor.py
    │       │   ├── test_torch_max.py
    │       │   ├── test_torch_maximum.py
    │       │   ├── test_torch_mean.py
    │       │   ├── test_torch_min.py
    │       │   ├── test_torch_minimum.py
    │       │   ├── test_torch_norm.py
    │       │   ├── test_torch_prod.py
    │       │   ├── test_torch_roll.py
    │       │   ├── test_torch_split.py
    │       │   ├── test_torch_squeeze.py
    │       │   ├── test_torch_stack.py
    │       │   ├── test_torch_sum.py
    │       │   ├── test_torch_transpose.py
    │       │   ├── test_torch_unbind.py
    │       │   ├── test_torch_unsqueeze.py
    │       │   ├── test_transformers_albert_attention.py
    │       │   ├── test_transformers_bart_attention.py
    │       │   ├── test_transformers_bert_attention.py
    │       │   ├── test_transformers_bert_generation_attention.py
    │       │   ├── test_transformers_blenderbot_attention.py
    │       │   ├── test_transformers_camembert_attention.py
    │       │   ├── test_transformers_chinese_clip_attention.py
    │       │   ├── test_transformers_clip_attention.py
    │       │   ├── test_transformers_ctrl_attention.py
    │       │   ├── test_transformers_deberta_attention.py
    │       │   ├── test_transformers_distilbert_attention.py
    │       │   ├── test_transformers_electra_attention.py
    │       │   ├── test_transformers_flaubert_attention.py
    │       │   ├── test_transformers_fsmt_attention.py
    │       │   ├── test_transformers_funnel_attention.py
    │       │   ├── test_transformers_gpt2_attention.py
    │       │   ├── test_transformers_layoutlm_attention.py
    │       │   ├── test_transformers_lxmert_attention.py
    │       │   ├── test_transformers_m2m_100_attention.py
    │       │   ├── test_transformers_marian_attention.py
    │       │   ├── test_transformers_mbart_attention.py
    │       │   ├── test_transformers_mobilebert_attention.py
    │       │   ├── test_transformers_mt5_attention.py
    │       │   ├── test_transformers_openai_attention.py
    │       │   ├── test_transformers_pegasus_attention.py
    │       │   ├── test_transformers_prophetnet_attention.py
    │       │   ├── test_transformers_reformer_attention.py
    │       │   ├── test_transformers_roberta_attention.py
    │       │   ├── test_transformers_squeezebert_attention.py
    │       │   ├── test_transformers_t5_attention.py
    │       │   ├── test_transformers_xlm_attention.py
    │       │   ├── test_transformers_xlm_roberta_attention.py
    │       │   └── test_vit_b_32.py
    │       ├── run_test.cmake
    │       ├── test_F_adaptive_avg_pool1d.py
    │       ├── test_F_adaptive_avg_pool2d.py
    │       ├── test_F_adaptive_avg_pool3d.py
    │       ├── test_F_adaptive_max_pool1d.py
    │       ├── test_F_adaptive_max_pool2d.py
    │       ├── test_F_adaptive_max_pool3d.py
    │       ├── test_F_affine_grid.py
    │       ├── test_F_alpha_dropout.py
    │       ├── test_F_avg_pool1d.py
    │       ├── test_F_avg_pool2d.py
    │       ├── test_F_avg_pool3d.py
    │       ├── test_F_batch_norm.py
    │       ├── test_F_celu.py
    │       ├── test_F_conv1d.py
    │       ├── test_F_conv2d.py
    │       ├── test_F_conv3d.py
    │       ├── test_F_conv_transpose1d.py
    │       ├── test_F_conv_transpose2d.py
    │       ├── test_F_conv_transpose3d.py
    │       ├── test_F_dropout.py
    │       ├── test_F_dropout2d.py
    │       ├── test_F_dropout3d.py
    │       ├── test_F_elu.py
    │       ├── test_F_embedding.py
    │       ├── test_F_feature_alpha_dropout.py
    │       ├── test_F_fold.py
    │       ├── test_F_gelu.py
    │       ├── test_F_glu.py
    │       ├── test_F_grid_sample.py
    │       ├── test_F_group_norm.py
    │       ├── test_F_hardshrink.py
    │       ├── test_F_hardsigmoid.py
    │       ├── test_F_hardswish.py
    │       ├── test_F_hardtanh.py
    │       ├── test_F_instance_norm.py
    │       ├── test_F_interpolate.py
    │       ├── test_F_layer_norm.py
    │       ├── test_F_leaky_relu.py
    │       ├── test_F_linear.py
    │       ├── test_F_local_response_norm.py
    │       ├── test_F_log_softmax.py
    │       ├── test_F_logsigmoid.py
    │       ├── test_F_lp_pool1d.py
    │       ├── test_F_lp_pool2d.py
    │       ├── test_F_max_pool1d.py
    │       ├── test_F_max_pool2d.py
    │       ├── test_F_max_pool3d.py
    │       ├── test_F_mish.py
    │       ├── test_F_normalize.py
    │       ├── test_F_pad.py
    │       ├── test_F_pairwise_distance.py
    │       ├── test_F_pixel_shuffle.py
    │       ├── test_F_pixel_unshuffle.py
    │       ├── test_F_prelu.py
    │       ├── test_F_relu.py
    │       ├── test_F_relu6.py
    │       ├── test_F_rms_norm.py
    │       ├── test_F_rrelu.py
    │       ├── test_F_scaled_dot_product_attention.py
    │       ├── test_F_selu.py
    │       ├── test_F_sigmoid.py
    │       ├── test_F_silu.py
    │       ├── test_F_softmax.py
    │       ├── test_F_softmin.py
    │       ├── test_F_softplus.py
    │       ├── test_F_softshrink.py
    │       ├── test_F_softsign.py
    │       ├── test_F_tanh.py
    │       ├── test_F_tanhshrink.py
    │       ├── test_F_threshold.py
    │       ├── test_F_unfold.py
    │       ├── test_F_upsample.py
    │       ├── test_F_upsample_bilinear.py
    │       ├── test_F_upsample_nearest.py
    │       ├── test_Tensor_expand.py
    │       ├── test_Tensor_fill.py
    │       ├── test_Tensor_index.py
    │       ├── test_Tensor_index_put.py
    │       ├── test_Tensor_masked_fill.py
    │       ├── test_Tensor_new_empty.py
    │       ├── test_Tensor_new_full.py
    │       ├── test_Tensor_new_ones.py
    │       ├── test_Tensor_new_zeros.py
    │       ├── test_Tensor_permute.py
    │       ├── test_Tensor_repeat.py
    │       ├── test_Tensor_reshape.py
    │       ├── test_Tensor_reshape_as.py
    │       ├── test_Tensor_select.py
    │       ├── test_Tensor_slice.py
    │       ├── test_Tensor_slice_copy.py
    │       ├── test_Tensor_to.py
    │       ├── test_Tensor_type_as.py
    │       ├── test_Tensor_unflatten.py
    │       ├── test_Tensor_view.py
    │       ├── test_convnext_tiny.py
    │       ├── test_ir_complex.py
    │       ├── test_mobilenet_v2.py
    │       ├── test_mobilenet_v3_small.py
    │       ├── test_nn_AdaptiveAvgPool1d.py
    │       ├── test_nn_AdaptiveAvgPool2d.py
    │       ├── test_nn_AdaptiveAvgPool3d.py
    │       ├── test_nn_AdaptiveMaxPool1d.py
    │       ├── test_nn_AdaptiveMaxPool2d.py
    │       ├── test_nn_AdaptiveMaxPool3d.py
    │       ├── test_nn_AlphaDropout.py
    │       ├── test_nn_AvgPool1d.py
    │       ├── test_nn_AvgPool2d.py
    │       ├── test_nn_AvgPool3d.py
    │       ├── test_nn_BatchNorm1d.py
    │       ├── test_nn_BatchNorm2d.py
    │       ├── test_nn_BatchNorm3d.py
    │       ├── test_nn_CELU.py
    │       ├── test_nn_ChannelShuffle.py
    │       ├── test_nn_ConstantPad1d.py
    │       ├── test_nn_ConstantPad2d.py
    │       ├── test_nn_ConstantPad3d.py
    │       ├── test_nn_Conv1d.py
    │       ├── test_nn_Conv2d.py
    │       ├── test_nn_Conv3d.py
    │       ├── test_nn_ConvTranspose1d.py
    │       ├── test_nn_ConvTranspose2d.py
    │       ├── test_nn_ConvTranspose3d.py
    │       ├── test_nn_Dropout.py
    │       ├── test_nn_Dropout2d.py
    │       ├── test_nn_Dropout3d.py
    │       ├── test_nn_ELU.py
    │       ├── test_nn_Embedding.py
    │       ├── test_nn_Fold.py
    │       ├── test_nn_GELU.py
    │       ├── test_nn_GLU.py
    │       ├── test_nn_GRU.py
    │       ├── test_nn_GroupNorm.py
    │       ├── test_nn_Hardshrink.py
    │       ├── test_nn_Hardsigmoid.py
    │       ├── test_nn_Hardswish.py
    │       ├── test_nn_Hardtanh.py
    │       ├── test_nn_Identity.py
    │       ├── test_nn_InstanceNorm1d.py
    │       ├── test_nn_InstanceNorm2d.py
    │       ├── test_nn_InstanceNorm3d.py
    │       ├── test_nn_LPPool1d.py
    │       ├── test_nn_LPPool2d.py
    │       ├── test_nn_LSTM.py
    │       ├── test_nn_LayerNorm.py
    │       ├── test_nn_LeakyReLU.py
    │       ├── test_nn_Linear.py
    │       ├── test_nn_LocalResponseNorm.py
    │       ├── test_nn_LogSigmoid.py
    │       ├── test_nn_LogSoftmax.py
    │       ├── test_nn_MaxPool1d.py
    │       ├── test_nn_MaxPool2d.py
    │       ├── test_nn_MaxPool3d.py
    │       ├── test_nn_Mish.py
    │       ├── test_nn_MultiheadAttention.py
    │       ├── test_nn_PReLU.py
    │       ├── test_nn_PixelShuffle.py
    │       ├── test_nn_PixelUnshuffle.py
    │       ├── test_nn_RMSNorm.py
    │       ├── test_nn_RNN.py
    │       ├── test_nn_RReLU.py
    │       ├── test_nn_ReLU.py
    │       ├── test_nn_ReLU6.py
    │       ├── test_nn_ReflectionPad1d.py
    │       ├── test_nn_ReflectionPad2d.py
    │       ├── test_nn_ReplicationPad1d.py
    │       ├── test_nn_ReplicationPad2d.py
    │       ├── test_nn_ReplicationPad3d.py
    │       ├── test_nn_SELU.py
    │       ├── test_nn_SiLU.py
    │       ├── test_nn_Sigmoid.py
    │       ├── test_nn_Softmax.py
    │       ├── test_nn_Softmax2d.py
    │       ├── test_nn_Softmin.py
    │       ├── test_nn_Softplus.py
    │       ├── test_nn_Softshrink.py
    │       ├── test_nn_Softsign.py
    │       ├── test_nn_Tanh.py
    │       ├── test_nn_Tanhshrink.py
    │       ├── test_nn_Threshold.py
    │       ├── test_nn_Unfold.py
    │       ├── test_nn_Upsample.py
    │       ├── test_nn_UpsamplingBilinear2d.py
    │       ├── test_nn_UpsamplingNearest2d.py
    │       ├── test_nn_ZeroPad2d.py
    │       ├── test_pnnx_eliminate_noop_cat.py
    │       ├── test_pnnx_eliminate_noop_expand.py
    │       ├── test_pnnx_eliminate_noop_math.py
    │       ├── test_pnnx_eliminate_noop_upsample.py
    │       ├── test_pnnx_expression.py
    │       ├── test_pnnx_fold_constant.py
    │       ├── test_pnnx_fuse_adjacent_permute.py
    │       ├── test_pnnx_fuse_adjacent_reshape.py
    │       ├── test_pnnx_fuse_channel_shuffle.py
    │       ├── test_pnnx_fuse_conv1d_batchnorm1d.py
    │       ├── test_pnnx_fuse_conv2d_batchnorm2d.py
    │       ├── test_pnnx_fuse_conv3d_batchnorm3d.py
    │       ├── test_pnnx_fuse_convtranspose1d_batchnorm1d.py
    │       ├── test_pnnx_fuse_convtranspose2d_batchnorm2d.py
    │       ├── test_pnnx_fuse_convtranspose3d_batchnorm3d.py
    │       ├── test_pnnx_fuse_input_unpack.py
    │       ├── test_pnnx_fuse_layernorm.py
    │       ├── test_pnnx_fuse_linear_batchnorm1d.py
    │       ├── test_pnnx_fuse_multiheadattention.py
    │       ├── test_pnnx_fuse_pad_conv1d.py
    │       ├── test_pnnx_fuse_pad_conv2d.py
    │       ├── test_pnnx_fuse_pixel_shuffle.py
    │       ├── test_pnnx_fuse_pixel_unshuffle.py
    │       ├── test_pnnx_fuse_rmsnorm.py
    │       ├── test_pnnx_fuse_scaled_dot_product_attention.py
    │       ├── test_pnnx_fuse_select_to_unbind.py
    │       ├── test_pnnx_fuse_slice_to_tensor_split.py
    │       ├── test_quantization_shufflenet_v2_x1_0.py
    │       ├── test_resnet18.py
    │       ├── test_shufflenet_v2_x1_0.py
    │       ├── test_squeezenet1_1.py
    │       ├── test_swin_t.py
    │       ├── test_torch_abs.py
    │       ├── test_torch_acos.py
    │       ├── test_torch_acosh.py
    │       ├── test_torch_addmm.py
    │       ├── test_torch_amax.py
    │       ├── test_torch_amin.py
    │       ├── test_torch_arange.py
    │       ├── test_torch_argmax.py
    │       ├── test_torch_argmin.py
    │       ├── test_torch_asin.py
    │       ├── test_torch_asinh.py
    │       ├── test_torch_atan.py
    │       ├── test_torch_atan2.py
    │       ├── test_torch_atanh.py
    │       ├── test_torch_bitwise_and.py
    │       ├── test_torch_bitwise_left_shift.py
    │       ├── test_torch_bitwise_not.py
    │       ├── test_torch_bitwise_or.py
    │       ├── test_torch_bitwise_right_shift.py
    │       ├── test_torch_bitwise_xor.py
    │       ├── test_torch_bmm.py
    │       ├── test_torch_cat.py
    │       ├── test_torch_ceil.py
    │       ├── test_torch_chunk.py
    │       ├── test_torch_clamp.py
    │       ├── test_torch_clone.py
    │       ├── test_torch_complex.py
    │       ├── test_torch_cos.py
    │       ├── test_torch_cosh.py
    │       ├── test_torch_cross.py
    │       ├── test_torch_cumprod.py
    │       ├── test_torch_cumsum.py
    │       ├── test_torch_diag.py
    │       ├── test_torch_einsum.py
    │       ├── test_torch_eq.py
    │       ├── test_torch_exp.py
    │       ├── test_torch_fft_fft.py
    │       ├── test_torch_fft_fft2.py
    │       ├── test_torch_fft_fftn.py
    │       ├── test_torch_fft_hfft.py
    │       ├── test_torch_fft_hfft2.py
    │       ├── test_torch_fft_hfftn.py
    │       ├── test_torch_fft_ifft.py
    │       ├── test_torch_fft_ifft2.py
    │       ├── test_torch_fft_ifftn.py
    │       ├── test_torch_fft_ihfft.py
    │       ├── test_torch_fft_ihfft2.py
    │       ├── test_torch_fft_ihfftn.py
    │       ├── test_torch_fft_irfft.py
    │       ├── test_torch_fft_irfft2.py
    │       ├── test_torch_fft_irfftn.py
    │       ├── test_torch_fft_rfft.py
    │       ├── test_torch_fft_rfft2.py
    │       ├── test_torch_fft_rfftn.py
    │       ├── test_torch_flatten.py
    │       ├── test_torch_flip.py
    │       ├── test_torch_floor.py
    │       ├── test_torch_full.py
    │       ├── test_torch_full_like.py
    │       ├── test_torch_gather.py
    │       ├── test_torch_ge.py
    │       ├── test_torch_gt.py
    │       ├── test_torch_imag.py
    │       ├── test_torch_index_select.py
    │       ├── test_torch_istft.py
    │       ├── test_torch_le.py
    │       ├── test_torch_lgamma.py
    │       ├── test_torch_log.py
    │       ├── test_torch_log10.py
    │       ├── test_torch_logaddexp.py
    │       ├── test_torch_logical_and.py
    │       ├── test_torch_logical_not.py
    │       ├── test_torch_logical_or.py
    │       ├── test_torch_logical_xor.py
    │       ├── test_torch_logsumexp.py
    │       ├── test_torch_lt.py
    │       ├── test_torch_masked_select.py
    │       ├── test_torch_matmul.py
    │       ├── test_torch_max.py
    │       ├── test_torch_maximum.py
    │       ├── test_torch_mean.py
    │       ├── test_torch_min.py
    │       ├── test_torch_minimum.py
    │       ├── test_torch_mm.py
    │       ├── test_torch_mv.py
    │       ├── test_torch_narrow.py
    │       ├── test_torch_ne.py
    │       ├── test_torch_neg.py
    │       ├── test_torch_norm.py
    │       ├── test_torch_ones.py
    │       ├── test_torch_ones_like.py
    │       ├── test_torch_positive.py
    │       ├── test_torch_pow.py
    │       ├── test_torch_prod.py
    │       ├── test_torch_real.py
    │       ├── test_torch_reciprocal.py
    │       ├── test_torch_repeat_interleave.py
    │       ├── test_torch_roll.py
    │       ├── test_torch_round.py
    │       ├── test_torch_rsqrt.py
    │       ├── test_torch_scatter_add.py
    │       ├── test_torch_sign.py
    │       ├── test_torch_sin.py
    │       ├── test_torch_sinh.py
    │       ├── test_torch_slice_scatter.py
    │       ├── test_torch_split.py
    │       ├── test_torch_sqrt.py
    │       ├── test_torch_square.py
    │       ├── test_torch_squeeze.py
    │       ├── test_torch_stack.py
    │       ├── test_torch_std.py
    │       ├── test_torch_stft.py
    │       ├── test_torch_sum.py
    │       ├── test_torch_t.py
    │       ├── test_torch_tan.py
    │       ├── test_torch_tanh.py
    │       ├── test_torch_tensor_split.py
    │       ├── test_torch_tile.py
    │       ├── test_torch_topk.py
    │       ├── test_torch_transpose.py
    │       ├── test_torch_trunc.py
    │       ├── test_torch_unbind.py
    │       ├── test_torch_unsqueeze.py
    │       ├── test_torch_view_as_complex.py
    │       ├── test_torch_view_as_real.py
    │       ├── test_torch_where.py
    │       ├── test_torch_zeros.py
    │       ├── test_torch_zeros_like.py
    │       ├── test_torchaudio_F_inverse_spectrogram.py
    │       ├── test_torchaudio_F_spectrogram.py
    │       ├── test_torchaudio_InverseSpectrogram.py
    │       ├── test_torchaudio_Spectrogram.py
    │       ├── test_torchvision_DeformConv2d.py
    │       ├── test_torchvision_RoIAlign.py
    │       ├── test_transformers_albert_attention.py
    │       ├── test_transformers_bart_attention.py
    │       ├── test_transformers_bert_attention.py
    │       ├── test_transformers_bert_generation_attention.py
    │       ├── test_transformers_blenderbot_attention.py
    │       ├── test_transformers_camembert_attention.py
    │       ├── test_transformers_chinese_clip_attention.py
    │       ├── test_transformers_clip_attention.py
    │       ├── test_transformers_ctrl_attention.py
    │       ├── test_transformers_deberta_attention.py
    │       ├── test_transformers_deepseek_v3_attention.py
    │       ├── test_transformers_distilbert_attention.py
    │       ├── test_transformers_electra_attention.py
    │       ├── test_transformers_flaubert_attention.py
    │       ├── test_transformers_fsmt_attention.py
    │       ├── test_transformers_funnel_attention.py
    │       ├── test_transformers_gpt2_attention.py
    │       ├── test_transformers_layoutlm_attention.py
    │       ├── test_transformers_longformer_attention.py
    │       ├── test_transformers_lxmert_attention.py
    │       ├── test_transformers_m2m_100_attention.py
    │       ├── test_transformers_marian_attention.py
    │       ├── test_transformers_mbart_attention.py
    │       ├── test_transformers_mobilebert_attention.py
    │       ├── test_transformers_mt5_attention.py
    │       ├── test_transformers_openai_attention.py
    │       ├── test_transformers_pegasus_attention.py
    │       ├── test_transformers_prophetnet_attention.py
    │       ├── test_transformers_qwen2_attention.py
    │       ├── test_transformers_qwen3_attention.py
    │       ├── test_transformers_reformer_attention.py
    │       ├── test_transformers_roberta_attention.py
    │       ├── test_transformers_squeezebert_attention.py
    │       ├── test_transformers_t5_attention.py
    │       ├── test_transformers_xlm_attention.py
    │       ├── test_transformers_xlm_roberta_attention.py
    │       ├── test_transformers_xlnet_attention.py
    │       └── test_vit_b_32.py
    ├── pytorch/
    │   └── README.md
    ├── quantize/
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── imreadwrite.cpp
    │   ├── imreadwrite.h
    │   ├── ncnn2int8.cpp
    │   ├── ncnn2table.cpp
    │   └── npy.hpp
    └── tensorflow/
        └── readme.txt
Download .txt
Showing preview only (267K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (2590 symbols across 629 files)

FILE: benchmark/RankCards/Rcards.h
  type TModel (line 37) | struct TModel
  function Store (line 123) | void Store(const TModel& model)
  function Sum (line 135) | float Sum(void)
  function Ratio (line 144) | float Ratio(const TModelSet& Rset)
  type TBoard (line 173) | struct TBoard
  function FileExists (line 183) | inline bool FileExists(const std::string& name)
  function FileCopy (line 189) | inline void FileCopy(const std::string& Src, const std::string& Dst)
  function lcase (line 198) | static inline void lcase(std::string& s)
  function std (line 207) | static inline std::string lcase_copy(std::string s)
  function ltrim (line 214) | static inline void ltrim(std::string& s)
  function rtrim (line 222) | static inline void rtrim(std::string& s)
  function trim (line 231) | static inline void trim(std::string& s)
  function std (line 238) | static inline std::string ltrim_copy(std::string s)
  function std (line 245) | static inline std::string rtrim_copy(std::string s)
  function std (line 252) | static inline std::string trim_copy(std::string s)
  function GetNameAver (line 258) | static inline void GetNameAver(std::string line, TModel& model)

FILE: benchmark/RankCards/main.cpp
  function compareByRatio (line 17) | bool compareByRatio(const TBoard& a, const TBoard& b)
  function main (line 22) | int main(int argc, char** argv)

FILE: benchmark/benchncnn.cpp
  class DataReaderFromEmpty (line 24) | class DataReaderFromEmpty : public ncnn::DataReader
    method scan (line 27) | virtual int scan(const char* format, void* p) const
    method read (line 31) | virtual size_t read(void* buf, size_t size) const
  function benchmark (line 51) | void benchmark(const char* comment, const std::vector<ncnn::Mat>& _in, c...
  function benchmark (line 170) | void benchmark(const char* comment, const ncnn::Mat& _in, const ncnn::Op...
  function show_usage (line 177) | void show_usage()
  function parse_shape_list (line 184) | static std::vector<ncnn::Mat> parse_shape_list(char* s)
  function main (line 246) | int main(int argc, char** argv)

FILE: examples/arcface.cpp
  type Bbox (line 39) | struct Bbox
    method Bbox (line 43) | Bbox()
    method Bbox (line 47) | Bbox(float x1,
    method Bbox (line 57) | Bbox apply_image_scale(const cv::Mat& original_image,
    method get_label_name (line 77) | std::string get_label_name(const std::vector<std::string>& classes)
    method area (line 83) | float area() const
    method crop_bbox (line 89) | cv::Mat crop_bbox(const cv::Mat& originalImage) const
    method get_rect (line 118) | cv::Rect_<float> get_rect() const
  function print_bbox (line 135) | static void print_bbox(Bbox& bbox)
  function qsort_descent_inplace (line 141) | static void qsort_descent_inplace(std::vector<Bbox>& faceobjects, int le...
  function qsort_descent_inplace (line 178) | static void qsort_descent_inplace(std::vector<Bbox>& faceobjects)
  function calculate_iou (line 185) | float calculate_iou(const Bbox& box1, const Bbox& box2)
  function non_maximum_supression (line 204) | static std::vector<int>
  function scale_wh (line 252) | static std::vector<float> scale_wh(float w0, float h0, float w1, float h1)
  type ImagePreProcessResults (line 262) | struct ImagePreProcessResults
    method ImagePreProcessResults (line 267) | ImagePreProcessResults(ncnn::Mat result, float img_scale, float pad_w,...
  type DetectionResult (line 273) | struct DetectionResult
  function ImagePreProcessResults (line 279) | static ImagePreProcessResults preprocess_yolo_kpts(cv::Mat& input_image,...
    method ImagePreProcessResults (line 267) | ImagePreProcessResults(ncnn::Mat result, float img_scale, float pad_w,...
  function DetectionResult (line 309) | static DetectionResult parse_yolo_keypoints_results(ncnn::Mat& result,
  function get_similarity (line 401) | static inline float get_similarity(std::vector<float> f1, std::vector<fl...
  function estimate_norm (line 413) | static int estimate_norm(float* transform_matrix, const float* lmk, int ...
  function norm_crop (line 458) | static int norm_crop(cv::Mat& output, const cv::Mat& input, const float*...
  function normalize_arcface (line 474) | void normalize_arcface(std::vector<float>& feature)
  function get_face (line 488) | static int get_face(const cv::Mat& rgb, DetectionResult& result)
  function get_embedding (line 525) | static int get_embedding(const cv::Mat& rgb, std::vector<float>& result)
  function main (line 575) | int main(int argc, char** argv)

FILE: examples/fasterrcnn.cpp
  type Object (line 16) | struct Object
  function intersection_area (line 23) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 29) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 66) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 74) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function detect_fasterrcnn (line 111) | static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& ob...
  function draw_objects (line 284) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 329) | int main(int argc, char** argv)

FILE: examples/mobilenetssd.cpp
  type Object (line 16) | struct Object
  function detect_mobilenet (line 23) | static int detect_mobilenet(const cv::Mat& bgr, std::vector<Object>& obj...
  function draw_objects (line 75) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 120) | int main(int argc, char** argv)

FILE: examples/mobilenetv2ssdlite.cpp
  class Noop (line 16) | class Noop : public ncnn::Layer
  type Object (line 21) | struct Object
  function detect_mobilenetv2 (line 28) | static int detect_mobilenetv2(const cv::Mat& bgr, std::vector<Object>& o...
  function draw_objects (line 82) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 127) | int main(int argc, char** argv)

FILE: examples/mobilenetv3ssdlite.cpp
  function T (line 21) | const T& clamp(const T& v, const T& lo, const T& hi)
  type Object (line 27) | struct Object
  function detect_mobilenetv3 (line 34) | static int detect_mobilenetv3(const cv::Mat& bgr, std::vector<Object>& o...
  function draw_objects (line 93) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 141) | int main(int argc, char** argv)

FILE: examples/nanodet.cpp
  type Object (line 18) | struct Object
  function intersection_area (line 25) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 31) | static void qsort_descent_inplace(std::vector<Object>& faceobjects, int ...
  function qsort_descent_inplace (line 68) | static void qsort_descent_inplace(std::vector<Object>& faceobjects)
  function nms_sorted_bboxes (line 76) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function generate_proposals (line 113) | static void generate_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma...
  function detect_nanodet (line 212) | static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 342) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 391) | int main(int argc, char** argv)

FILE: examples/nanodetplus_pnnx.cpp
  type Object (line 18) | struct Object
  function intersection_area (line 25) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 31) | static void qsort_descent_inplace(std::vector<Object>& faceobjects, int ...
  function qsort_descent_inplace (line 68) | static void qsort_descent_inplace(std::vector<Object>& faceobjects)
  function nms_sorted_bboxes (line 76) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function sigmoid (line 113) | static inline float sigmoid(float x)
  function generate_proposals (line 118) | static void generate_proposals(const ncnn::Mat& pred, int stride, const ...
  function detect_nanodet (line 210) | static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 348) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 397) | int main(int argc, char** argv)

FILE: examples/p2pnet.cpp
  type CrowdPoint (line 17) | struct CrowdPoint
  function shift (line 23) | static void shift(int w, int h, int stride, std::vector<float> anchor_po...
  function generate_anchor_points (line 72) | static void generate_anchor_points(int stride, int row, int line, std::v...
  function generate_anchor_points (line 112) | static void generate_anchor_points(int img_w, int img_h, std::vector<int...
  function detect_crowd (line 135) | static int detect_crowd(const cv::Mat& bgr, std::vector<CrowdPoint>& cro...
  function draw_result (line 195) | static void draw_result(const cv::Mat& bgr, const std::vector<CrowdPoint...
  function main (line 209) | int main(int argc, char** argv)

FILE: examples/peleenetssd_seg.cpp
  type Object (line 16) | struct Object
  function detect_peleenet (line 23) | static int detect_peleenet(const cv::Mat& bgr, std::vector<Object>& obje...
  function draw_objects (line 78) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 163) | int main(int argc, char** argv)

FILE: examples/piper.cpp
  class relative_embeddings_k_module (line 33) | class relative_embeddings_k_module : public ncnn::Layer
    method relative_embeddings_k_module (line 36) | relative_embeddings_k_module()
    method forward (line 41) | virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob,...
  class relative_embeddings_v_module (line 77) | class relative_embeddings_v_module : public ncnn::Layer
    method relative_embeddings_v_module (line 80) | relative_embeddings_v_module()
    method forward (line 85) | virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob,...
  class piecewise_rational_quadratic_transform_module (line 121) | class piecewise_rational_quadratic_transform_module : public ncnn::Layer
    method piecewise_rational_quadratic_transform_module (line 124) | piecewise_rational_quadratic_transform_module()
    method forward (line 129) | virtual int forward(const std::vector<ncnn::Mat>& bottom_blobs, std::v...
  function is_word_eos (line 297) | static bool is_word_eos(const char* word)
  function find_word_id (line 303) | static void find_word_id(const std::map<unsigned int, std::vector<const ...
  function simple_phonemize (line 323) | static void simple_phonemize(const char* text, std::vector<int>& sequenc...
  function path_attention (line 480) | static void path_attention(const ncnn::Mat& logw, const ncnn::Mat& m_p, ...
  function tts_piper (line 518) | static int tts_piper(const char* text, int speaker_id, std::vector<short...
  function save_pcm_to_wav (line 677) | static void save_pcm_to_wav(const char* path, const short* pcm, int num_...
  function main (line 718) | int main(int argc, char** argv)

FILE: examples/ppocrv5.cpp
  type Character (line 28) | struct Character
  type Object (line 34) | struct Object
  function contour_score (line 42) | static double contour_score(const cv::Mat& binary, const std::vector<cv:...
  function get_rotate_crop_image (line 71) | static cv::Mat get_rotate_crop_image(const cv::Mat& bgr, const Object& o...
  class PPOCRv5 (line 142) | class PPOCRv5
  function detect_ppocrv5 (line 382) | static int detect_ppocrv5(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 398) | static int draw_objects(const cv::Mat& bgr, const std::vector<Object>& o...
  function main (line 517) | int main(int argc, char** argv)

FILE: examples/retinaface.cpp
  type FaceObject (line 16) | struct FaceObject
  function intersection_area (line 23) | static inline float intersection_area(const FaceObject& a, const FaceObj...
  function qsort_descent_inplace (line 29) | static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, ...
  function qsort_descent_inplace (line 66) | static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
  function nms_sorted_bboxes (line 74) | static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects...
  function generate_anchors (line 109) | static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios...
  function generate_proposals (line 146) | static void generate_proposals(const ncnn::Mat& anchors, int feat_stride...
  function detect_retinaface (line 228) | static int detect_retinaface(const cv::Mat& bgr, std::vector<FaceObject>...
  function draw_faceobjects (line 359) | static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceO...
  function main (line 402) | int main(int argc, char** argv)

FILE: examples/rfcn.cpp
  type Object (line 16) | struct Object
  function intersection_area (line 23) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 29) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 66) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 74) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function detect_rfcn (line 111) | static int detect_rfcn(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 283) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 328) | int main(int argc, char** argv)

FILE: examples/rvm.cpp
  function detect_rvm (line 78) | static int detect_rvm(const cv::Mat& bgr, cv::Mat& fgr, cv::Mat& pha, cv...
  function draw_objects (line 263) | static void draw_objects(const cv::Mat& bgr, const cv::Mat& fgr, const c...
  function main (line 311) | int main(int argc, char** argv)

FILE: examples/scrfd.cpp
  type FaceObject (line 16) | struct FaceObject
  function intersection_area (line 22) | static inline float intersection_area(const FaceObject& a, const FaceObj...
  function qsort_descent_inplace (line 28) | static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, ...
  function qsort_descent_inplace (line 65) | static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
  function nms_sorted_bboxes (line 73) | static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects...
  function generate_anchors (line 108) | static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios...
  function generate_proposals (line 145) | static void generate_proposals(const ncnn::Mat& anchors, int feat_stride...
  function detect_scrfd (line 211) | static int detect_scrfd(const cv::Mat& bgr, std::vector<FaceObject>& fac...
  function draw_faceobjects (line 365) | static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceO...
  function main (line 402) | int main(int argc, char** argv)

FILE: examples/scrfd_crowdhuman.cpp
  type FaceObject (line 16) | struct FaceObject
  function intersection_area (line 22) | static inline float intersection_area(const FaceObject& a, const FaceObj...
  function qsort_descent_inplace (line 28) | static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, ...
  function qsort_descent_inplace (line 65) | static void qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
  function nms_sorted_bboxes (line 73) | static void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects...
  function generate_anchors (line 108) | static ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios...
  function generate_proposals (line 145) | static void generate_proposals(const ncnn::Mat& anchors, int feat_stride...
  function detect_scrfd (line 211) | static int detect_scrfd(const cv::Mat& bgr, std::vector<FaceObject>& fac...
  function draw_faceobjects (line 402) | static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceO...
  function main (line 439) | int main(int argc, char** argv)

FILE: examples/shufflenetv2.cpp
  function detect_shufflenetv2 (line 16) | static int detect_shufflenetv2(const cv::Mat& bgr, std::vector<float>& c...
  function print_topk (line 66) | static int print_topk(const std::vector<float>& cls_scores, int topk)
  function main (line 91) | int main(int argc, char** argv)

FILE: examples/simplepose.cpp
  type KeyPoint (line 17) | struct KeyPoint
  function detect_posenet (line 23) | static int detect_posenet(const cv::Mat& bgr, std::vector<KeyPoint>& key...
  function draw_pose (line 96) | static void draw_pose(const cv::Mat& bgr, const std::vector<KeyPoint>& k...
  function main (line 133) | int main(int argc, char** argv)

FILE: examples/squeezenet.cpp
  function detect_squeezenet (line 16) | static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls...
  function print_topk (line 49) | static int print_topk(const std::vector<float>& cls_scores, int topk)
  function main (line 74) | int main(int argc, char** argv)

FILE: examples/squeezenet_c_api.cpp
  function detect_squeezenet (line 16) | static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls...
  function print_topk (line 64) | static int print_topk(const std::vector<float>& cls_scores, int topk)
  function main (line 89) | int main(int argc, char** argv)

FILE: examples/squeezenetssd.cpp
  type Object (line 16) | struct Object
  function detect_squeezenet (line 23) | static int detect_squeezenet(const cv::Mat& bgr, std::vector<Object>& ob...
  function draw_objects (line 75) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 120) | int main(int argc, char** argv)

FILE: examples/whisper.cpp
  class Tokenizer (line 88) | class Tokenizer
    method generate_byte_decoder (line 96) | void generate_byte_decoder()
    method utf8_to_codepoints (line 132) | std::vector<uint32_t> utf8_to_codepoints(const std::string& s) const
    method load (line 173) | bool load(const char* vocab_path)
    method decode (line 210) | std::string decode(const std::vector<int>& tokens) const
  class Result (line 285) | class Result
  class Whisper (line 295) | class Whisper
  function log_softmax_inplace (line 414) | static void log_softmax_inplace(ncnn::Mat& m)
  function load_wav_samples (line 829) | static int load_wav_samples(const char* wavpath, std::vector<short>& sam...
  function main (line 902) | int main(int argc, char** argv)

FILE: examples/yolact.cpp
  type Object (line 16) | struct Object
  function intersection_area (line 25) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 31) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 68) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 76) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function detect_yolact (line 113) | static int detect_yolact(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 351) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 515) | int main(int argc, char** argv)

FILE: examples/yolo11.cpp
  type Object (line 91) | struct Object
  function intersection_area (line 98) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 104) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 141) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 149) | static void nms_sorted_bboxes(const std::vector<Object>& objects, std::v...
  function sigmoid (line 186) | static inline float sigmoid(float x)
  function generate_proposals (line 191) | static void generate_proposals(const ncnn::Mat& pred, int stride, const ...
  function generate_proposals (line 287) | static void generate_proposals(const ncnn::Mat& pred, const std::vector<...
  function detect_yolo11 (line 306) | static int detect_yolo11(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 408) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 481) | int main(int argc, char** argv)

FILE: examples/yolo11_cls.cpp
  type Object (line 27) | struct Object
  function get_topk (line 33) | static void get_topk(const ncnn::Mat& cls_scores, int topk, std::vector<...
  function detect_yolo11_cls (line 55) | static int detect_yolo11_cls(const cv::Mat& bgr, std::vector<Object>& ob...
  function draw_objects (line 117) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 291) | int main(int argc, char** argv)

FILE: examples/yolo11_obb.cpp
  type Object (line 111) | struct Object
  function intersection_area (line 118) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 128) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 165) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 173) | static void nms_sorted_bboxes(const std::vector<Object>& objects, std::v...
  function sigmoid (line 210) | static inline float sigmoid(float x)
  function generate_proposals (line 215) | static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& p...
  function generate_proposals (line 320) | static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& p...
  function detect_yolo11_obb (line 340) | static int detect_yolo11_obb(const cv::Mat& bgr, std::vector<Object>& ob...
  function draw_objects (line 438) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 519) | int main(int argc, char** argv)

FILE: examples/yolo11_pose.cpp
  type KeyPoint (line 112) | struct KeyPoint
  type Object (line 118) | struct Object
  function intersection_area (line 126) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 132) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 169) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 177) | static void nms_sorted_bboxes(const std::vector<Object>& objects, std::v...
  function sigmoid (line 214) | static inline float sigmoid(float x)
  function generate_proposals (line 219) | static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& p...
  function generate_proposals (line 312) | static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& p...
  function detect_yolo11_pose (line 332) | static int detect_yolo11_pose(const cv::Mat& bgr, std::vector<Object>& o...
  function draw_objects (line 448) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 558) | int main(int argc, char** argv)

FILE: examples/yolo11_seg.cpp
  type Object (line 114) | struct Object
  function intersection_area (line 123) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 129) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 166) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 174) | static void nms_sorted_bboxes(const std::vector<Object>& objects, std::v...
  function sigmoid (line 211) | static inline float sigmoid(float x)
  function generate_proposals (line 216) | static void generate_proposals(const ncnn::Mat& pred, int stride, const ...
  function generate_proposals (line 313) | static void generate_proposals(const ncnn::Mat& pred, const std::vector<...
  function detect_yolo11_seg (line 341) | static int detect_yolo11_seg(const cv::Mat& bgr, std::vector<Object>& ob...
  function draw_objects (line 532) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 621) | int main(int argc, char** argv)

FILE: examples/yolov2.cpp
  type Object (line 16) | struct Object
  function detect_yolov2 (line 23) | static int detect_yolov2(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 79) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 124) | int main(int argc, char** argv)

FILE: examples/yolov3.cpp
  type Object (line 16) | struct Object
  function detect_yolov3 (line 23) | static int detect_yolov3(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 76) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 121) | int main(int argc, char** argv)

FILE: examples/yolov4.cpp
  type Object (line 25) | struct Object
  function init_yolov4 (line 32) | static int init_yolov4(ncnn::Net* yolov4, int* target_size)
  function detect_yolov4 (line 64) | static int detect_yolov4(const cv::Mat& bgr, std::vector<Object>& object...
  function draw_objects (line 101) | static int draw_objects(const cv::Mat& bgr, const std::vector<Object>& o...
  function main (line 166) | int main(int argc, char** argv)

FILE: examples/yolov5.cpp
  class YoloV5Focus (line 25) | class YoloV5Focus : public ncnn::Layer
    method YoloV5Focus (line 28) | YoloV5Focus()
    method forward (line 33) | virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob,...
  type Object (line 74) | struct Object
  function intersection_area (line 81) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 87) | static void qsort_descent_inplace(std::vector<Object>& faceobjects, int ...
  function qsort_descent_inplace (line 124) | static void qsort_descent_inplace(std::vector<Object>& faceobjects)
  function nms_sorted_bboxes (line 132) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function sigmoid (line 169) | static inline float sigmoid(float x)
  function generate_proposals (line 174) | static void generate_proposals(const ncnn::Mat& anchors, int stride, con...
  function detect_yolov5 (line 262) | static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 438) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 487) | int main(int argc, char** argv)

FILE: examples/yolov5_pnnx.cpp
  type Object (line 18) | struct Object
  function intersection_area (line 25) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 31) | static void qsort_descent_inplace(std::vector<Object>& faceobjects, int ...
  function qsort_descent_inplace (line 68) | static void qsort_descent_inplace(std::vector<Object>& faceobjects)
  function nms_sorted_bboxes (line 76) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function sigmoid (line 113) | static inline float sigmoid(float x)
  function generate_proposals (line 118) | static void generate_proposals(const ncnn::Mat& anchors, int stride, con...
  function detect_yolov5 (line 193) | static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 346) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 395) | int main(int argc, char** argv)

FILE: examples/yolov7.cpp
  type Object (line 20) | struct Object
  function intersection_area (line 27) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 33) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 70) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 78) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function sigmoid (line 115) | static inline float sigmoid(float x)
  function generate_proposals (line 120) | static void generate_proposals(const ncnn::Mat& anchors, int stride, con...
  function detect_yolov7 (line 203) | static int detect_yolov7(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 349) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 427) | int main(int argc, char** argv)

FILE: examples/yolov7_pnnx.cpp
  type Object (line 18) | struct Object
  function intersection_area (line 25) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 31) | static void qsort_descent_inplace(std::vector<Object>& faceobjects, int ...
  function qsort_descent_inplace (line 68) | static void qsort_descent_inplace(std::vector<Object>& faceobjects)
  function nms_sorted_bboxes (line 76) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function sigmoid (line 113) | static inline float sigmoid(float x)
  function generate_proposals (line 118) | static void generate_proposals(const ncnn::Mat& anchors, int stride, con...
  function detect_yolov7 (line 191) | static int detect_yolov7(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 345) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 394) | int main(int argc, char** argv)

FILE: examples/yolov8.cpp
  type Object (line 60) | struct Object
  function intersection_area (line 67) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 73) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 110) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 118) | static void nms_sorted_bboxes(const std::vector<Object>& objects, std::v...
  function sigmoid (line 155) | static inline float sigmoid(float x)
  function generate_proposals (line 160) | static void generate_proposals(const ncnn::Mat& pred, int stride, const ...
  function generate_proposals (line 256) | static void generate_proposals(const ncnn::Mat& pred, const std::vector<...
  function detect_yolov8 (line 275) | static int detect_yolov8(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects_coco (line 385) | static void draw_objects_coco(const cv::Mat& bgr, const std::vector<Obje...
  function draw_objects_oiv (line 458) | static void draw_objects_oiv(const cv::Mat& bgr, const std::vector<Objec...
  function main (line 597) | int main(int argc, char** argv)

FILE: examples/yolov8_cls.cpp
  type Object (line 27) | struct Object
  function get_topk (line 33) | static void get_topk(const ncnn::Mat& cls_scores, int topk, std::vector<...
  function detect_yolov8_cls (line 55) | static int detect_yolov8_cls(const cv::Mat& bgr, std::vector<Object>& ob...
  function draw_objects (line 117) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 291) | int main(int argc, char** argv)

FILE: examples/yolov8_obb.cpp
  type Object (line 80) | struct Object
  function intersection_area (line 87) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 97) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 134) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 142) | static void nms_sorted_bboxes(const std::vector<Object>& objects, std::v...
  function sigmoid (line 179) | static inline float sigmoid(float x)
  function generate_proposals (line 184) | static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& p...
  function generate_proposals (line 289) | static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& p...
  function detect_yolov8_obb (line 309) | static int detect_yolov8_obb(const cv::Mat& bgr, std::vector<Object>& ob...
  function draw_objects (line 407) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 488) | int main(int argc, char** argv)

FILE: examples/yolov8_pose.cpp
  type KeyPoint (line 81) | struct KeyPoint
  type Object (line 87) | struct Object
  function intersection_area (line 95) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 101) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 138) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 146) | static void nms_sorted_bboxes(const std::vector<Object>& objects, std::v...
  function sigmoid (line 183) | static inline float sigmoid(float x)
  function generate_proposals (line 188) | static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& p...
  function generate_proposals (line 281) | static void generate_proposals(const ncnn::Mat& pred, const ncnn::Mat& p...
  function detect_yolov8_pose (line 301) | static int detect_yolov8_pose(const cv::Mat& bgr, std::vector<Object>& o...
  function draw_objects (line 417) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 527) | int main(int argc, char** argv)

FILE: examples/yolov8_seg.cpp
  type Object (line 83) | struct Object
  function intersection_area (line 92) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 98) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 135) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 143) | static void nms_sorted_bboxes(const std::vector<Object>& objects, std::v...
  function sigmoid (line 180) | static inline float sigmoid(float x)
  function generate_proposals (line 185) | static void generate_proposals(const ncnn::Mat& pred, int stride, const ...
  function generate_proposals (line 282) | static void generate_proposals(const ncnn::Mat& pred, const std::vector<...
  function detect_yolov8_seg (line 310) | static int detect_yolov8_seg(const cv::Mat& bgr, std::vector<Object>& ob...
  function draw_objects (line 501) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 590) | int main(int argc, char** argv)

FILE: examples/yoloworld.cpp
  type Object (line 55) | struct Object
  function intersection_area (line 62) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 68) | static void qsort_descent_inplace(std::vector<Object>& objects, int left...
  function qsort_descent_inplace (line 105) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 113) | static void nms_sorted_bboxes(const std::vector<Object>& objects, std::v...
  function generate_proposals (line 150) | static void generate_proposals(const ncnn::Mat& pred, float prob_thresho...
  function detect_yoloworld (line 192) | static int detect_yoloworld(const cv::Mat& bgr, std::vector<Object>& obj...
  function draw_objects (line 297) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 370) | int main(int argc, char** argv)

FILE: examples/yolox.cpp
  class YoloV5Focus (line 24) | class YoloV5Focus : public ncnn::Layer
    method YoloV5Focus (line 27) | YoloV5Focus()
    method forward (line 32) | virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob,...
  type Object (line 72) | struct Object
  type GridAndStride (line 79) | struct GridAndStride
  function intersection_area (line 86) | static inline float intersection_area(const Object& a, const Object& b)
  function qsort_descent_inplace (line 92) | static void qsort_descent_inplace(std::vector<Object>& faceobjects, int ...
  function qsort_descent_inplace (line 129) | static void qsort_descent_inplace(std::vector<Object>& objects)
  function nms_sorted_bboxes (line 137) | static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, st...
  function generate_grids_and_stride (line 174) | static void generate_grids_and_stride(const int target_w, const int targ...
  function generate_yolox_proposals (line 195) | static void generate_yolox_proposals(std::vector<GridAndStride> grid_str...
  function detect_yolox (line 242) | static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
  function draw_objects (line 341) | static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& ...
  function main (line 390) | int main(int argc, char** argv)

FILE: python/examples/peleenetssd.py
  function draw_detection_objects_seg (line 10) | def draw_detection_objects_seg(image, class_names, objects, mat_map):

FILE: python/examples/yolact.py
  function draw_result (line 11) | def draw_result(image, class_names, boxes, masks, classes, scores):

FILE: python/ncnn/model_zoo/fasterrcnn.py
  class Faster_RCNN (line 10) | class Faster_RCNN:
    method __init__ (line 11) | def __init__(
    method __del__ (line 69) | def __del__(self):
    method __call__ (line 72) | def __call__(self, img):
    method nms_sorted_bboxes (line 204) | def nms_sorted_bboxes(self, objects, nms_threshold):

FILE: python/ncnn/model_zoo/mobilenetssd.py
  class MobileNet_SSD (line 9) | class MobileNet_SSD:
    method __init__ (line 10) | def __init__(self, target_size=300, num_threads=1, use_gpu=False):
    method __del__ (line 52) | def __del__(self):
    method __call__ (line 55) | def __call__(self, img):

FILE: python/ncnn/model_zoo/mobilenetv2ssdlite.py
  class Noop (line 9) | class Noop(ncnn.Layer):
  function Noop_layer_creator (line 13) | def Noop_layer_creator():
  class MobileNetV2_SSDLite (line 17) | class MobileNetV2_SSDLite:
    method __init__ (line 18) | def __init__(self, target_size=300, num_threads=1, use_gpu=False):
    method __del__ (line 61) | def __del__(self):
    method __call__ (line 64) | def __call__(self, img):

FILE: python/ncnn/model_zoo/mobilenetv3ssdlite.py
  function clamp (line 10) | def clamp(v, lo, hi):
  class MobileNetV3_SSDLite (line 19) | class MobileNetV3_SSDLite:
    method __init__ (line 20) | def __init__(self, target_size=300, num_threads=1, use_gpu=False):
    method __del__ (line 61) | def __del__(self):
    method __call__ (line 64) | def __call__(self, img):

FILE: python/ncnn/model_zoo/model_store.py
  function merge_file (line 74) | def merge_file(root, files_in, file_out, remove=True):
  function short_hash (line 84) | def short_hash(name):
  function get_model_file (line 92) | def get_model_file(name, tag=None, root=os.path.join("~", ".ncnn", "mode...
  function purge (line 187) | def purge(root=os.path.join("~", ".ncnn", "models")):

FILE: python/ncnn/model_zoo/model_zoo.py
  function get_model (line 50) | def get_model(name, **kwargs):
  function get_model_list (line 60) | def get_model_list():

FILE: python/ncnn/model_zoo/nanodet.py
  class NanoDet (line 11) | class NanoDet:
    method __init__ (line 12) | def __init__(
    method __del__ (line 126) | def __del__(self):
    method __call__ (line 129) | def __call__(self, img):

FILE: python/ncnn/model_zoo/peleenetssd.py
  class PeleeNet_SSD (line 9) | class PeleeNet_SSD:
    method __init__ (line 10) | def __init__(self, target_size=304, num_threads=1, use_gpu=False):
    method __del__ (line 42) | def __del__(self):
    method __call__ (line 45) | def __call__(self, img):

FILE: python/ncnn/model_zoo/retinaface.py
  class RetinaFace (line 10) | class RetinaFace:
    method __init__ (line 11) | def __init__(
    method __del__ (line 30) | def __del__(self):
    method __call__ (line 33) | def __call__(self, img):
    method detect_stride32 (line 81) | def detect_stride32(self, ex):
    method detect_stride16 (line 106) | def detect_stride16(self, ex):
    method detect_stride8 (line 131) | def detect_stride8(self, ex):
    method generate_anchors (line 156) | def generate_anchors(self, base_size, ratios, scales):
    method generate_proposals (line 190) | def generate_proposals(
    method nms_sorted_bboxes (line 289) | def nms_sorted_bboxes(self, faceobjects, nms_threshold):

FILE: python/ncnn/model_zoo/rfcn.py
  class RFCN (line 10) | class RFCN:
    method __init__ (line 11) | def __init__(
    method __del__ (line 66) | def __del__(self):
    method __call__ (line 69) | def __call__(self, img):
    method nms_sorted_bboxes (line 206) | def nms_sorted_bboxes(self, objects, nms_threshold):

FILE: python/ncnn/model_zoo/shufflenetv2.py
  class ShuffleNetV2 (line 9) | class ShuffleNetV2:
    method __init__ (line 10) | def __init__(self, target_size=224, num_threads=1, use_gpu=False):
    method __del__ (line 28) | def __del__(self):
    method __call__ (line 31) | def __call__(self, img):

FILE: python/ncnn/model_zoo/simplepose.py
  class SimplePose (line 9) | class SimplePose:
    method __init__ (line 10) | def __init__(
    method __del__ (line 35) | def __del__(self):
    method __call__ (line 38) | def __call__(self, img):

FILE: python/ncnn/model_zoo/squeezenet.py
  class SqueezeNet (line 9) | class SqueezeNet:
    method __init__ (line 10) | def __init__(self, target_size=227, num_threads=1, use_gpu=False):
    method __del__ (line 26) | def __del__(self):
    method __call__ (line 29) | def __call__(self, img):

FILE: python/ncnn/model_zoo/squeezenetssd.py
  class SqueezeNet_SSD (line 9) | class SqueezeNet_SSD:
    method __init__ (line 10) | def __init__(self, target_size=300, num_threads=1, use_gpu=False):
    method __del__ (line 53) | def __del__(self):
    method __call__ (line 56) | def __call__(self, img):

FILE: python/ncnn/model_zoo/yolact.py
  class Yolact (line 12) | class Yolact:
    method __init__ (line 13) | def __init__(
    method __del__ (line 136) | def __del__(self):
    method __call__ (line 139) | def __call__(self, img):
    method make_priors (line 186) | def make_priors(self):
    method decode (line 214) | def decode(self, loc, priors, img_w, img_h):
    method detect (line 267) | def detect(self, conf_preds, loc_data, prior_data, mask_data, img_w, i...

FILE: python/ncnn/model_zoo/yolov2.py
  class MobileNet_YoloV2 (line 9) | class MobileNet_YoloV2:
    method __init__ (line 10) | def __init__(self, target_size=416, num_threads=1, use_gpu=False):
    method __del__ (line 53) | def __del__(self):
    method __call__ (line 56) | def __call__(self, img):

FILE: python/ncnn/model_zoo/yolov3.py
  class MobileNetV2_YoloV3 (line 9) | class MobileNetV2_YoloV3:
    method __init__ (line 10) | def __init__(self, target_size=352, num_threads=1, use_gpu=False):
    method __del__ (line 53) | def __del__(self):
    method __call__ (line 56) | def __call__(self, img):

FILE: python/ncnn/model_zoo/yolov4.py
  class YoloV4_Base (line 9) | class YoloV4_Base:
    method __init__ (line 10) | def __init__(self, tiny, target_size, num_threads=1, use_gpu=False):
    method __del__ (line 116) | def __del__(self):
    method __call__ (line 119) | def __call__(self, img):
  class YoloV4_Tiny (line 172) | class YoloV4_Tiny(YoloV4_Base):
    method __init__ (line 173) | def __init__(self, **kwargs):
  class YoloV4 (line 177) | class YoloV4(YoloV4_Base):
    method __init__ (line 178) | def __init__(self, **kwargs):

FILE: python/ncnn/model_zoo/yolov5.py
  class YoloV5Focus (line 12) | class YoloV5Focus(ncnn.Layer):
    method __init__ (line 15) | def __init__(self):
    method forward (line 21) | def forward(self, bottom_blob, top_blob, opt):
  function YoloV5Focus_layer_creator (line 39) | def YoloV5Focus_layer_creator():
  function YoloV5Focus_layer_destroyer (line 43) | def YoloV5Focus_layer_destroyer(layer):
  class YoloV5s (line 50) | class YoloV5s:
    method __init__ (line 51) | def __init__(
    method __del__ (line 174) | def __del__(self):
    method __call__ (line 177) | def __call__(self, img):
    method non_max_suppression (line 263) | def non_max_suppression(

FILE: python/ncnn/model_zoo/yolov7.py
  function sigmoid (line 18) | def sigmoid(val):
  function calcOverlap (line 23) | def calcOverlap(r1, r2):
  function calcIntersection (line 40) | def calcIntersection(r1 : Detect_Object, r2 : Detect_Object):
  function IOU (line 47) | def IOU(r1 : Detect_Object, r2 : Detect_Object):
  function NMS (line 59) | def NMS(detections, iou_thresh=0.45):
  class YoloV7_Base (line 85) | class YoloV7_Base:
    method __init__ (line 86) | def __init__(self, target_size, num_threads=1, use_gpu=False, use_stri...
    method __del__ (line 188) | def __del__(self):
    method __call__ (line 191) | def __call__(self, img):
  class YoloV7_Tiny (line 304) | class YoloV7_Tiny(YoloV7_Base):
    method __init__ (line 305) | def __init__(self, **kwargs):

FILE: python/ncnn/model_zoo/yolov8.py
  class YoloV8s (line 12) | class YoloV8s:
    method __init__ (line 13) | def __init__(
    method __del__ (line 126) | def __del__(self):
    method __call__ (line 129) | def __call__(self, img):
    method non_max_suppression (line 214) | def non_max_suppression(

FILE: python/ncnn/utils/download.py
  function check_sha1 (line 12) | def check_sha1(filename, sha1_hash):
  function download (line 38) | def download(url, path=None, overwrite=False, sha1_hash=None):

FILE: python/ncnn/utils/functional.py
  function xywh2xyxy (line 7) | def xywh2xyxy(x):
  function xyxy2xywh (line 17) | def xyxy2xywh(x):
  function make_grid (line 27) | def make_grid(nx=20, ny=20):
  function sigmoid (line 33) | def sigmoid(x):
  function softmax (line 37) | def softmax(x):
  function iou_of (line 46) | def iou_of(boxes0, boxes1, eps=1e-5):
  function area_of (line 65) | def area_of(left_top, right_bottom):
  function nms (line 79) | def nms(boxes, scores, iou_threshold, top_k=-1, candidate_size=200):

FILE: python/ncnn/utils/objects.py
  class Point (line 7) | class Point(object):
    method __init__ (line 8) | def __init__(self):
  class Rect (line 13) | class Rect(object):
    method __init__ (line 14) | def __init__(self, x=0, y=0, w=0, h=0):
    method area (line 20) | def area(self):
    method intersection_area (line 23) | def intersection_area(self, b):
  class Detect_Object (line 31) | class Detect_Object(object):
    method __init__ (line 32) | def __init__(self, label=0, prob=0, x=0, y=0, w=0, h=0):
  class Face_Object (line 38) | class Face_Object(object):
    method __init__ (line 39) | def __init__(self):
  class KeyPoint (line 45) | class KeyPoint(object):
    method __init__ (line 46) | def __init__(self):

FILE: python/ncnn/utils/visual.py
  function draw_detection_objects (line 8) | def draw_detection_objects(image, class_names, objects, min_prob=0.0):
  function print_topk (line 57) | def print_topk(cls_scores, topk):
  function draw_faceobjects (line 65) | def draw_faceobjects(image, faceobjects):
  function draw_pose (line 147) | def draw_pose(image, keypoints):

FILE: python/src/main.cpp
  class DataReaderFromMemoryCopy (line 25) | class DataReaderFromMemoryCopy : public DataReaderFromMemory
    method DataReaderFromMemoryCopy (line 28) | explicit DataReaderFromMemoryCopy(const unsigned char*& mem)
    method reference (line 33) | virtual size_t reference(size_t size, const void** buf) const
  type LayerFactory (line 39) | struct LayerFactory
  function PYBIND11_MODULE (line 106) | PYBIND11_MODULE(ncnn, m)

FILE: python/src/pybind11_allocator.h
  function fastFree (line 18) | void fastFree(void* ptr) override
  function fastFree (line 33) | void fastFree(void* ptr) override
  function clear (line 45) | void clear() override
  function fastFree (line 53) | void fastFree(ncnn::VkBufferMemory* ptr) override
  function flush (line 57) | int flush(ncnn::VkBufferMemory* ptr) override
  function invalidate (line 61) | int invalidate(ncnn::VkBufferMemory* ptr) override
  function clear (line 72) | void clear() override
  function fastFree (line 80) | void fastFree(ncnn::VkBufferMemory* ptr) override
  function clear (line 91) | void clear() override
  function fastFree (line 99) | void fastFree(ncnn::VkImageMemory* ptr) override

FILE: python/src/pybind11_datareader.h
  function class (line 9) | class DataReaderFromEmpty : public ncnn::DataReader
  function scan (line 31) | int scan(const char* format, void* p) const override
  function read (line 36) | size_t read(void* buf, size_t size) const override
  function scan (line 48) | int scan(const char* format, void* p) const override
  function read (line 53) | size_t read(void* buf, size_t size) const override

FILE: python/src/pybind11_layer.h
  function class (line 10) | class PyLayer : public ncnn::Layer
  function virtual (line 22) | virtual int load_model(const ncnn::ModelBin& mb)
  function virtual (line 31) | virtual int create_pipeline(const ncnn::Option& opt)
  function virtual (line 40) | virtual int destroy_pipeline(const ncnn::Option& opt)
  function virtual (line 60) | virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, c...
  function virtual (line 71) | virtual int forward_inplace(std::vector<ncnn::Mat>& bottom_top_blobs, co...
  function virtual (line 80) | virtual int forward_inplace(ncnn::Mat& bottom_top_blob, const ncnn::Opti...
  function virtual (line 114) | virtual int forward(const ncnn::VkMat& bottom_blob, ncnn::VkMat& top_blo...
  function virtual (line 126) | virtual int forward_inplace(std::vector<ncnn::VkMat>& bottom_top_blobs, ...
  function virtual (line 136) | virtual int forward_inplace(ncnn::VkMat& bottom_top_blob, ncnn::VkComput...

FILE: python/tests/benchmark.py
  function benchmark (line 22) | def benchmark(comment, _in, opt):

FILE: python/tests/test_allocator.py
  function test_pool_allocator (line 9) | def test_pool_allocator():
  function test_unlocked_pool_allocator (line 19) | def test_unlocked_pool_allocator():

FILE: python/tests/test_blob.py
  function test_blob (line 9) | def test_blob():

FILE: python/tests/test_extractor.py
  function test_extractor (line 11) | def test_extractor():
  function test_extractor_index (line 42) | def test_extractor_index():

FILE: python/tests/test_mat.py
  function test_mat_dims1 (line 11) | def test_mat_dims1():
  function test_mat_dims2 (line 43) | def test_mat_dims2():
  function test_mat_dims3 (line 89) | def test_mat_dims3():
  function test_mat_dims4 (line 143) | def test_mat_dims4():
  function test_numpy (line 201) | def test_numpy():
  function test_fill (line 286) | def test_fill():
  function test_clone (line 293) | def test_clone():
  function test_clone_from (line 349) | def test_clone_from():
  function test_reshape (line 407) | def test_reshape():
  function test_create (line 479) | def test_create():
  function test_create_like (line 500) | def test_create_like():
  function test_addref_release (line 528) | def test_addref_release():
  function test_empty (line 539) | def test_empty():
  function test_total (line 547) | def test_total():
  function test_elembits (line 558) | def test_elembits():
  function test_shape (line 567) | def test_shape():
  function test_channel_depth_row (line 582) | def test_channel_depth_row():
  function test_channel_row (line 595) | def test_channel_row():
  function test_channel_range (line 605) | def test_channel_range():
  function test_depth_range (line 616) | def test_depth_range():
  function test_row_range (line 627) | def test_row_range():
  function test_range (line 633) | def test_range():
  function test_getitem_setitem (line 639) | def test_getitem_setitem():
  function test_from_pixels (line 654) | def test_from_pixels():
  function test_from_pixels_resize (line 672) | def test_from_pixels_resize():
  function test_from_pixels_roi (line 704) | def test_from_pixels_roi():
  function test_from_pixels_roi_resize (line 724) | def test_from_pixels_roi_resize():
  function test_substract_mean_normalize (line 748) | def test_substract_mean_normalize():

FILE: python/tests/test_net.py
  function test_net (line 10) | def test_net():
  function test_net_mem (line 34) | def test_net_mem():
  function test_net_vulkan (line 60) | def test_net_vulkan():
  function test_custom_layer (line 86) | def test_custom_layer():
  function test_vulkan_device_index (line 139) | def test_vulkan_device_index():
  function test_vulkan_device_vkdev (line 150) | def test_vulkan_device_vkdev():

FILE: python/tests/test_option.py
  function test_option (line 9) | def test_option():

FILE: python/tests/test_paramdict.py
  function test_paramdict (line 9) | def test_paramdict():

FILE: python/tests/test_vulkan_allocator.py
  function test_vk_blob_allocator (line 9) | def test_vk_blob_allocator():
  function test_vk_weight_allocator (line 36) | def test_vk_weight_allocator():
  function test_vk_staging_allocator (line 63) | def test_vk_staging_allocator():
  function test_vk_weight_staging_allocator (line 90) | def test_vk_weight_staging_allocator():

FILE: python/tests/test_vulkan_device.py
  function check_gpuinfo (line 9) | def check_gpuinfo(gpuinfo):
  function test_gpu_api (line 18) | def test_gpu_api():
  function test_vulkan_device (line 37) | def test_vulkan_device():

FILE: setup.py
  function find_version (line 14) | def find_version():
  class InstallCommand (line 44) | class InstallCommand(install):
    method initialize_options (line 48) | def initialize_options(self):
    method finalize_options (line 52) | def finalize_options(self):
    method run (line 55) | def run(self):
  class CMakeExtension (line 69) | class CMakeExtension(Extension):
    method __init__ (line 70) | def __init__(self, name, sourcedir=""):
  class CMakeBuild (line 75) | class CMakeBuild(build_ext):
    method build_extension (line 76) | def build_extension(self, ext):

FILE: src/allocator.cpp
  type ncnn (line 13) | namespace ncnn {
    class PoolAllocatorPrivate (line 19) | class PoolAllocatorPrivate
    function PoolAllocator (line 62) | PoolAllocator& PoolAllocator::operator=(const PoolAllocator&)
    class UnlockedPoolAllocatorPrivate (line 202) | class UnlockedPoolAllocatorPrivate
    function UnlockedPoolAllocator (line 243) | UnlockedPoolAllocator& UnlockedPoolAllocator::operator=(const Unlocked...
    function round_up (line 369) | static inline size_t round_up(size_t n, size_t multiple)
    function round_down (line 374) | static inline size_t round_down(size_t n, size_t multiple)
    function VkBuffer (line 423) | VkBuffer VkAllocator::create_buffer(size_t size, VkBufferUsageFlags us...
    function VkDeviceMemory (line 446) | VkDeviceMemory VkAllocator::allocate_memory(size_t size, uint32_t memo...
    function VkDeviceMemory (line 465) | VkDeviceMemory VkAllocator::allocate_dedicated_memory(size_t size, uin...
    function VkDeviceMemory (line 491) | VkDeviceMemory VkAllocator::allocate_import_host_memory(size_t size, u...
    function VkImage (line 517) | VkImage VkAllocator::create_image(int width, int height, int depth, Vk...
    function VkImageView (line 549) | VkImageView VkAllocator::create_imageview(VkImage image, VkFormat format)
    function least_common_multiple (line 579) | static inline size_t least_common_multiple(size_t a, size_t b)
    class VkBlobAllocatorPrivate (line 596) | class VkBlobAllocatorPrivate
    function VkBlobAllocator (line 645) | VkBlobAllocator& VkBlobAllocator::operator=(const VkBlobAllocator&)
    function VkBufferMemory (line 695) | VkBufferMemory* VkBlobAllocator::fastMalloc(size_t size)
    function VkImageMemory (line 901) | VkImageMemory* VkBlobAllocator::fastMalloc(int w, int h, int c, size_t...
    class VkWeightAllocatorPrivate (line 1195) | class VkWeightAllocatorPrivate
    function VkWeightAllocator (line 1259) | VkWeightAllocator& VkWeightAllocator::operator=(const VkWeightAllocator&)
    function VkBufferMemory (line 1349) | VkBufferMemory* VkWeightAllocator::fastMalloc(size_t size)
    function VkImageMemory (line 1649) | VkImageMemory* VkWeightAllocator::fastMalloc(int w, int h, int c, size...
    class VkStagingAllocatorPrivate (line 2020) | class VkStagingAllocatorPrivate
    function VkStagingAllocator (line 2048) | VkStagingAllocator& VkStagingAllocator::operator=(const VkStagingAlloc...
    function VkBufferMemory (line 2083) | VkBufferMemory* VkStagingAllocator::fastMalloc(size_t size)
    function VkImageMemory (line 2151) | VkImageMemory* VkStagingAllocator::fastMalloc(int w, int h, int c, siz...
    class VkWeightStagingAllocatorPrivate (line 2193) | class VkWeightStagingAllocatorPrivate
    function VkWeightStagingAllocator (line 2215) | VkWeightStagingAllocator& VkWeightStagingAllocator::operator=(const Vk...
    function VkBufferMemory (line 2220) | VkBufferMemory* VkWeightStagingAllocator::fastMalloc(size_t size)
    function VkImageMemory (line 2272) | VkImageMemory* VkWeightStagingAllocator::fastMalloc(int /*w*/, int /*h...
    function VkAndroidHardwareBufferImageAllocator (line 2305) | VkAndroidHardwareBufferImageAllocator& VkAndroidHardwareBufferImageAll...
    function VkBufferMemory (line 2310) | VkBufferMemory* VkAndroidHardwareBufferImageAllocator::fastMalloc(size...
    function VkImageMemory (line 2319) | VkImageMemory* VkAndroidHardwareBufferImageAllocator::fastMalloc(int /...

FILE: src/allocator.h
  function namespace (line 22) | namespace ncnn {

FILE: src/benchmark.cpp
  type ncnn (line 42) | namespace ncnn {
    function get_current_time (line 44) | double get_current_time()
    function sleep (line 67) | void sleep(unsigned long long int milliseconds)
    function benchmark (line 89) | void benchmark(const Layer* layer, double start, double end)
    function benchmark (line 96) | void benchmark(const Layer* layer, const Mat& bottom_blob, Mat& top_bl...

FILE: src/benchmark.h
  function namespace (line 11) | namespace ncnn {

FILE: src/blob.cpp
  type ncnn (line 6) | namespace ncnn {

FILE: src/blob.h
  function namespace (line 10) | namespace ncnn {

FILE: src/c_api.cpp
  function ncnn_version_number (line 42) | int ncnn_version_number()
  class PoolAllocator_c_api (line 48) | class PoolAllocator_c_api : public ncnn::PoolAllocator
    method PoolAllocator_c_api (line 51) | PoolAllocator_c_api(ncnn_allocator_t _allocator)
    method fastFree (line 62) | virtual void fastFree(void* ptr)
  function __ncnn_PoolAllocator_fast_free (line 76) | static void __ncnn_PoolAllocator_fast_free(ncnn_allocator_t allocator, v...
  class UnlockedPoolAllocator_c_api (line 81) | class UnlockedPoolAllocator_c_api : public ncnn::UnlockedPoolAllocator
    method UnlockedPoolAllocator_c_api (line 84) | UnlockedPoolAllocator_c_api(ncnn_allocator_t _allocator)
    method fastFree (line 95) | virtual void fastFree(void* ptr)
  function __ncnn_UnlockedPoolAllocator_fast_free (line 109) | static void __ncnn_UnlockedPoolAllocator_fast_free(ncnn_allocator_t allo...
  function ncnn_allocator_t (line 114) | ncnn_allocator_t ncnn_allocator_create_pool_allocator()
  function ncnn_allocator_t (line 123) | ncnn_allocator_t ncnn_allocator_create_unlocked_pool_allocator()
  function ncnn_allocator_destroy (line 132) | void ncnn_allocator_destroy(ncnn_allocator_t allocator)
  function ncnn_option_t (line 142) | ncnn_option_t ncnn_option_create()
  function ncnn_option_destroy (line 147) | void ncnn_option_destroy(ncnn_option_t opt)
  function ncnn_option_get_num_threads (line 152) | int ncnn_option_get_num_threads(const ncnn_option_t opt)
  function ncnn_option_set_num_threads (line 157) | void ncnn_option_set_num_threads(ncnn_option_t opt, int num_threads)
  function ncnn_option_set_blob_allocator (line 162) | void ncnn_option_set_blob_allocator(ncnn_option_t opt, ncnn_allocator_t ...
  function ncnn_option_set_workspace_allocator (line 167) | void ncnn_option_set_workspace_allocator(ncnn_option_t opt, ncnn_allocat...
  function ncnn_option_get_use_vulkan_compute (line 172) | int ncnn_option_get_use_vulkan_compute(const ncnn_option_t opt)
  function ncnn_option_get_use_local_pool_allocator (line 182) | int ncnn_option_get_use_local_pool_allocator(const ncnn_option_t opt)
  function ncnn_option_get_use_winograd_convolution (line 187) | int ncnn_option_get_use_winograd_convolution(const ncnn_option_t opt)
  function ncnn_option_get_use_sgemm_convolution (line 192) | int ncnn_option_get_use_sgemm_convolution(const ncnn_option_t opt)
  function ncnn_option_get_use_packing_layout (line 197) | int ncnn_option_get_use_packing_layout(const ncnn_option_t opt)
  function ncnn_option_get_use_fp16_packed (line 202) | int ncnn_option_get_use_fp16_packed(const ncnn_option_t opt)
  function ncnn_option_get_use_fp16_storage (line 207) | int ncnn_option_get_use_fp16_storage(const ncnn_option_t opt)
  function ncnn_option_get_use_fp16_arithmetic (line 212) | int ncnn_option_get_use_fp16_arithmetic(const ncnn_option_t opt)
  function ncnn_option_get_use_int8_packed (line 217) | int ncnn_option_get_use_int8_packed(const ncnn_option_t opt)
  function ncnn_option_get_use_int8_storage (line 222) | int ncnn_option_get_use_int8_storage(const ncnn_option_t opt)
  function ncnn_option_get_use_int8_arithmetic (line 227) | int ncnn_option_get_use_int8_arithmetic(const ncnn_option_t opt)
  function ncnn_option_get_use_bf16_packed (line 232) | int ncnn_option_get_use_bf16_packed(const ncnn_option_t opt)
  function ncnn_option_get_use_bf16_storage (line 237) | int ncnn_option_get_use_bf16_storage(const ncnn_option_t opt)
  function ncnn_option_get_use_shader_local_memory (line 242) | int ncnn_option_get_use_shader_local_memory(const ncnn_option_t opt)
  function ncnn_option_get_use_cooperative_matrix (line 252) | int ncnn_option_get_use_cooperative_matrix(const ncnn_option_t opt)
  function ncnn_option_set_use_vulkan_compute (line 262) | void ncnn_option_set_use_vulkan_compute(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_local_pool_allocator (line 272) | void ncnn_option_set_use_local_pool_allocator(ncnn_option_t opt, int ena...
  function ncnn_option_set_use_winograd_convolution (line 277) | void ncnn_option_set_use_winograd_convolution(ncnn_option_t opt, int ena...
  function ncnn_option_set_use_sgemm_convolution (line 282) | void ncnn_option_set_use_sgemm_convolution(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_packing_layout (line 287) | void ncnn_option_set_use_packing_layout(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_fp16_packed (line 292) | void ncnn_option_set_use_fp16_packed(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_fp16_storage (line 297) | void ncnn_option_set_use_fp16_storage(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_fp16_arithmetic (line 302) | void ncnn_option_set_use_fp16_arithmetic(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_int8_packed (line 307) | void ncnn_option_set_use_int8_packed(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_int8_storage (line 312) | void ncnn_option_set_use_int8_storage(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_int8_arithmetic (line 317) | void ncnn_option_set_use_int8_arithmetic(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_bf16_packed (line 322) | void ncnn_option_set_use_bf16_packed(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_bf16_storage (line 327) | void ncnn_option_set_use_bf16_storage(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_shader_local_memory (line 332) | void ncnn_option_set_use_shader_local_memory(ncnn_option_t opt, int enable)
  function ncnn_option_set_use_cooperative_matrix (line 342) | void ncnn_option_set_use_cooperative_matrix(ncnn_option_t opt, int enable)
  function ncnn_mat_t (line 353) | ncnn_mat_t ncnn_mat_create()
  function ncnn_mat_t (line 358) | ncnn_mat_t ncnn_mat_create_1d(int w, ncnn_allocator_t allocator)
  function ncnn_mat_t (line 363) | ncnn_mat_t ncnn_mat_create_2d(int w, int h, ncnn_allocator_t allocator)
  function ncnn_mat_t (line 368) | ncnn_mat_t ncnn_mat_create_3d(int w, int h, int c, ncnn_allocator_t allo...
  function ncnn_mat_t (line 373) | ncnn_mat_t ncnn_mat_create_4d(int w, int h, int d, int c, ncnn_allocator...
  function ncnn_mat_t (line 378) | ncnn_mat_t ncnn_mat_create_external_1d(int w, void* data, ncnn_allocator...
  function ncnn_mat_t (line 383) | ncnn_mat_t ncnn_mat_create_external_2d(int w, int h, void* data, ncnn_al...
  function ncnn_mat_t (line 388) | ncnn_mat_t ncnn_mat_create_external_3d(int w, int h, int c, void* data, ...
  function ncnn_mat_t (line 393) | ncnn_mat_t ncnn_mat_create_external_4d(int w, int h, int d, int c, void*...
  function ncnn_mat_t (line 398) | ncnn_mat_t ncnn_mat_create_1d_elem(int w, size_t elemsize, int elempack,...
  function ncnn_mat_t (line 403) | ncnn_mat_t ncnn_mat_create_2d_elem(int w, int h, size_t elemsize, int el...
  function ncnn_mat_t (line 408) | ncnn_mat_t ncnn_mat_create_3d_elem(int w, int h, int c, size_t elemsize,...
  function ncnn_mat_t (line 413) | ncnn_mat_t ncnn_mat_create_4d_elem(int w, int h, int d, int c, size_t el...
  function ncnn_mat_t (line 418) | ncnn_mat_t ncnn_mat_create_external_1d_elem(int w, void* data, size_t el...
  function ncnn_mat_t (line 423) | ncnn_mat_t ncnn_mat_create_external_2d_elem(int w, int h, void* data, si...
  function ncnn_mat_t (line 428) | ncnn_mat_t ncnn_mat_create_external_3d_elem(int w, int h, int c, void* d...
  function ncnn_mat_t (line 433) | ncnn_mat_t ncnn_mat_create_external_4d_elem(int w, int h, int d, int c, ...
  function ncnn_mat_destroy (line 438) | void ncnn_mat_destroy(ncnn_mat_t mat)
  function ncnn_mat_fill_float (line 443) | void ncnn_mat_fill_float(ncnn_mat_t mat, float v)
  function ncnn_mat_t (line 448) | ncnn_mat_t ncnn_mat_clone(const ncnn_mat_t mat, ncnn_allocator_t allocator)
  function ncnn_mat_t (line 453) | ncnn_mat_t ncnn_mat_reshape_1d(const ncnn_mat_t mat, int w, ncnn_allocat...
  function ncnn_mat_t (line 458) | ncnn_mat_t ncnn_mat_reshape_2d(const ncnn_mat_t mat, int w, int h, ncnn_...
  function ncnn_mat_t (line 463) | ncnn_mat_t ncnn_mat_reshape_3d(const ncnn_mat_t mat, int w, int h, int c...
  function ncnn_mat_t (line 468) | ncnn_mat_t ncnn_mat_reshape_4d(const ncnn_mat_t mat, int w, int h, int d...
  function ncnn_mat_get_dims (line 473) | int ncnn_mat_get_dims(const ncnn_mat_t mat)
  function ncnn_mat_get_w (line 478) | int ncnn_mat_get_w(const ncnn_mat_t mat)
  function ncnn_mat_get_h (line 483) | int ncnn_mat_get_h(const ncnn_mat_t mat)
  function ncnn_mat_get_d (line 488) | int ncnn_mat_get_d(const ncnn_mat_t mat)
  function ncnn_mat_get_c (line 493) | int ncnn_mat_get_c(const ncnn_mat_t mat)
  function ncnn_mat_get_elemsize (line 498) | size_t ncnn_mat_get_elemsize(const ncnn_mat_t mat)
  function ncnn_mat_get_elempack (line 503) | int ncnn_mat_get_elempack(const ncnn_mat_t mat)
  function ncnn_mat_get_cstep (line 508) | size_t ncnn_mat_get_cstep(const ncnn_mat_t mat)
  function ncnn_mat_t (line 526) | ncnn_mat_t ncnn_mat_from_pixels(const unsigned char* pixels, int type, i...
  function ncnn_mat_t (line 531) | ncnn_mat_t ncnn_mat_from_pixels_resize(const unsigned char* pixels, int ...
  function ncnn_mat_t (line 536) | ncnn_mat_t ncnn_mat_from_pixels_roi(const unsigned char* pixels, int typ...
  function ncnn_mat_t (line 541) | ncnn_mat_t ncnn_mat_from_pixels_roi_resize(const unsigned char* pixels, ...
  function ncnn_mat_to_pixels (line 546) | void ncnn_mat_to_pixels(const ncnn_mat_t mat, unsigned char* pixels, int...
  function ncnn_mat_to_pixels_resize (line 551) | void ncnn_mat_to_pixels_resize(const ncnn_mat_t mat, unsigned char* pixe...
  function ncnn_mat_substract_mean_normalize (line 558) | void ncnn_mat_substract_mean_normalize(ncnn_mat_t mat, const float* mean...
  function ncnn_convert_packing (line 563) | void ncnn_convert_packing(const ncnn_mat_t src, ncnn_mat_t* dst, int ele...
  function ncnn_flatten (line 570) | void ncnn_flatten(const ncnn_mat_t src, ncnn_mat_t* dst, const ncnn_opti...
  function ncnn_blob_get_producer (line 585) | int ncnn_blob_get_producer(const ncnn_blob_t blob)
  function ncnn_blob_get_consumer (line 590) | int ncnn_blob_get_consumer(const ncnn_blob_t blob)
  function ncnn_blob_get_shape (line 595) | void ncnn_blob_get_shape(const ncnn_blob_t blob, int* dims, int* w, int*...
  function ncnn_paramdict_t (line 605) | ncnn_paramdict_t ncnn_paramdict_create()
  function ncnn_paramdict_destroy (line 610) | void ncnn_paramdict_destroy(ncnn_paramdict_t pd)
  function ncnn_paramdict_get_type (line 615) | int ncnn_paramdict_get_type(const ncnn_paramdict_t pd, int id)
  function ncnn_paramdict_get_int (line 620) | int ncnn_paramdict_get_int(const ncnn_paramdict_t pd, int id, int def)
  function ncnn_paramdict_get_float (line 625) | float ncnn_paramdict_get_float(const ncnn_paramdict_t pd, int id, float ...
  function ncnn_mat_t (line 630) | ncnn_mat_t ncnn_paramdict_get_array(ncnn_paramdict_t pd, int id, const n...
  function ncnn_paramdict_set_int (line 635) | void ncnn_paramdict_set_int(ncnn_paramdict_t pd, int id, int i)
  function ncnn_paramdict_set_float (line 640) | void ncnn_paramdict_set_float(ncnn_paramdict_t pd, int id, float f)
  function ncnn_paramdict_set_array (line 645) | void ncnn_paramdict_set_array(ncnn_paramdict_t pd, int id, ncnn_mat_t v)
  class DataReader_c_api (line 651) | class DataReader_c_api : public ncnn::DataReader
    method DataReader_c_api (line 654) | DataReader_c_api(ncnn_datareader_t _dr)
    method scan (line 661) | virtual int scan(const char* format, void* p) const
    method read (line 667) | virtual size_t read(void* buf, size_t size) const
  function __ncnn_DataReader_scan (line 677) | static int __ncnn_DataReader_scan(ncnn_datareader_t dr, const char* form...
  function __ncnn_DataReader_read (line 683) | static size_t __ncnn_DataReader_read(ncnn_datareader_t dr, void* buf, si...
  class DataReaderFromStdio_c_api (line 689) | class DataReaderFromStdio_c_api : public ncnn::DataReaderFromStdio
    method DataReaderFromStdio_c_api (line 692) | DataReaderFromStdio_c_api(FILE* fp, ncnn_datareader_t _dr)
    method scan (line 699) | virtual int scan(const char* format, void* p) const
    method read (line 705) | virtual size_t read(void* buf, size_t size) const
  function __ncnn_DataReaderFromStdio_scan (line 715) | static int __ncnn_DataReaderFromStdio_scan(ncnn_datareader_t dr, const c...
  function __ncnn_DataReaderFromStdio_read (line 721) | static size_t __ncnn_DataReaderFromStdio_read(ncnn_datareader_t dr, void...
  class DataReaderFromMemory_c_api (line 727) | class DataReaderFromMemory_c_api : public ncnn::DataReaderFromMemory
    method DataReaderFromMemory_c_api (line 730) | DataReaderFromMemory_c_api(const unsigned char*& mem, ncnn_datareader_...
    method scan (line 737) | virtual int scan(const char* format, void* p) const
    method read (line 743) | virtual size_t read(void* buf, size_t size) const
  function __ncnn_DataReaderFromMemory_scan (line 753) | static int __ncnn_DataReaderFromMemory_scan(ncnn_datareader_t dr, const ...
  function __ncnn_DataReaderFromMemory_read (line 759) | static size_t __ncnn_DataReaderFromMemory_read(ncnn_datareader_t dr, voi...
  function ncnn_datareader_t (line 764) | ncnn_datareader_t ncnn_datareader_create()
  function ncnn_datareader_t (line 776) | ncnn_datareader_t ncnn_datareader_create_from_stdio(FILE* fp)
  function ncnn_datareader_t (line 788) | ncnn_datareader_t ncnn_datareader_create_from_memory(const unsigned char...
  function ncnn_datareader_destroy (line 799) | void ncnn_datareader_destroy(ncnn_datareader_t dr)
  class ModelBinFromDataReader_c_api (line 806) | class ModelBinFromDataReader_c_api : public ncnn::ModelBinFromDataReader
    method ModelBinFromDataReader_c_api (line 809) | ModelBinFromDataReader_c_api(ncnn_modelbin_t _mb, const DataReader& dr)
    method Mat (line 815) | virtual Mat load(int w, int type) const
    method Mat (line 823) | virtual Mat load(int w, int h, int type) const
    method Mat (line 831) | virtual Mat load(int w, int h, int c, int type) const
  function ncnn_mat_t (line 843) | static ncnn_mat_t __ncnn_ModelBinFromDataReader_load_1d(const ncnn_model...
  function ncnn_mat_t (line 848) | static ncnn_mat_t __ncnn_ModelBinFromDataReader_load_2d(const ncnn_model...
  function ncnn_mat_t (line 853) | static ncnn_mat_t __ncnn_ModelBinFromDataReader_load_3d(const ncnn_model...
  class ModelBinFromMatArray_c_api (line 858) | class ModelBinFromMatArray_c_api : public ncnn::ModelBinFromMatArray
    method ModelBinFromMatArray_c_api (line 861) | ModelBinFromMatArray_c_api(ncnn_modelbin_t _mb, const Mat* weights)
    method Mat (line 867) | virtual Mat load(int w, int type) const
    method Mat (line 875) | virtual Mat load(int w, int h, int type) const
    method Mat (line 883) | virtual Mat load(int w, int h, int c, int type) const
  function ncnn_mat_t (line 895) | static ncnn_mat_t __ncnn_ModelBinFromMatArray_load_1d(const ncnn_modelbi...
  function ncnn_mat_t (line 900) | static ncnn_mat_t __ncnn_ModelBinFromMatArray_load_2d(const ncnn_modelbi...
  function ncnn_mat_t (line 905) | static ncnn_mat_t __ncnn_ModelBinFromMatArray_load_3d(const ncnn_modelbi...
  function ncnn_modelbin_t (line 910) | ncnn_modelbin_t ncnn_modelbin_create_from_datareader(const ncnn_dataread...
  function ncnn_modelbin_t (line 920) | ncnn_modelbin_t ncnn_modelbin_create_from_mat_array(const ncnn_mat_t* we...
  function ncnn_modelbin_destroy (line 935) | void ncnn_modelbin_destroy(ncnn_modelbin_t mb)
  function ncnn_mat_t (line 941) | static ncnn_mat_t __ncnn_modelbin_load_1d(const ncnn_modelbin_t mb, int ...
  function ncnn_mat_t (line 946) | static ncnn_mat_t __ncnn_modelbin_load_2d(const ncnn_modelbin_t mb, int ...
  function ncnn_mat_t (line 951) | static ncnn_mat_t __ncnn_modelbin_load_3d(const ncnn_modelbin_t mb, int ...
  class Layer_c_api (line 957) | class Layer_c_api : public Layer
    method Layer_c_api (line 960) | Layer_c_api(ncnn_layer_t _layer)
    method load_param (line 966) | virtual int load_param(const ParamDict& pd)
    method load_model (line 971) | virtual int load_model(const ModelBin& mb)
    method create_pipeline (line 981) | virtual int create_pipeline(const Option& opt)
    method destroy_pipeline (line 986) | virtual int destroy_pipeline(const Option& opt)
    method forward (line 991) | virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<...
    method forward (line 1010) | virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Optio...
    method forward_inplace (line 1019) | virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const ...
    method forward_inplace (line 1030) | virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) c...
  function __ncnn_Layer_load_param (line 1039) | static int __ncnn_Layer_load_param(ncnn_layer_t layer, const ncnn_paramd...
  function __ncnn_Layer_load_model (line 1044) | static int __ncnn_Layer_load_model(ncnn_layer_t layer, const ncnn_modelb...
  function __ncnn_Layer_create_pipeline (line 1049) | static int __ncnn_Layer_create_pipeline(ncnn_layer_t layer, const ncnn_o...
  function __ncnn_Layer_destroy_pipeline (line 1054) | static int __ncnn_Layer_destroy_pipeline(ncnn_layer_t layer, const ncnn_...
  function __ncnn_Layer_forward_1 (line 1059) | static int __ncnn_Layer_forward_1(const ncnn_layer_t layer, const ncnn_m...
  function __ncnn_Layer_forward_n (line 1067) | static int __ncnn_Layer_forward_n(const ncnn_layer_t layer, const ncnn_m...
  function __ncnn_Layer_forward_inplace_1 (line 1083) | static int __ncnn_Layer_forward_inplace_1(const ncnn_layer_t layer, ncnn...
  function __ncnn_Layer_forward_inplace_n (line 1088) | static int __ncnn_Layer_forward_inplace_n(const ncnn_layer_t layer, ncnn...
  function __ncnn_layer_load_param (line 1098) | static int __ncnn_layer_load_param(ncnn_layer_t layer, const ncnn_paramd...
  function __ncnn_layer_load_model (line 1103) | static int __ncnn_layer_load_model(ncnn_layer_t layer, const ncnn_modelb...
  function __ncnn_layer_create_pipeline (line 1108) | static int __ncnn_layer_create_pipeline(ncnn_layer_t layer, const ncnn_o...
  function __ncnn_layer_destroy_pipeline (line 1113) | static int __ncnn_layer_destroy_pipeline(ncnn_layer_t layer, const ncnn_...
  function __ncnn_layer_forward_1 (line 1118) | static int __ncnn_layer_forward_1(const ncnn_layer_t layer, const ncnn_m...
  function __ncnn_layer_forward_n (line 1126) | static int __ncnn_layer_forward_n(const ncnn_layer_t layer, const ncnn_m...
  function __ncnn_layer_forward_inplace_1 (line 1142) | static int __ncnn_layer_forward_inplace_1(const ncnn_layer_t layer, ncnn...
  function __ncnn_layer_forward_inplace_n (line 1147) | static int __ncnn_layer_forward_inplace_n(const ncnn_layer_t layer, ncnn...
  function ncnn_layer_t (line 1157) | ncnn_layer_t ncnn_layer_create()
  function ncnn_layer_t (line 1172) | ncnn_layer_t ncnn_layer_create_by_typeindex(int typeindex)
  function ncnn_layer_t (line 1194) | ncnn_layer_t ncnn_layer_create_by_type(const char* type)
  function ncnn_layer_type_to_index (line 1215) | int ncnn_layer_type_to_index(const char* type)
  function ncnn_layer_destroy (line 1221) | void ncnn_layer_destroy(ncnn_layer_t layer)
  function ncnn_layer_get_typeindex (line 1234) | int ncnn_layer_get_typeindex(const ncnn_layer_t layer)
  function ncnn_layer_get_one_blob_only (line 1246) | int ncnn_layer_get_one_blob_only(const ncnn_layer_t layer)
  function ncnn_layer_get_support_inplace (line 1251) | int ncnn_layer_get_support_inplace(const ncnn_layer_t layer)
  function ncnn_layer_get_support_vulkan (line 1256) | int ncnn_layer_get_support_vulkan(const ncnn_layer_t layer)
  function ncnn_layer_get_support_packing (line 1266) | int ncnn_layer_get_support_packing(const ncnn_layer_t layer)
  function ncnn_layer_get_support_bf16_storage (line 1271) | int ncnn_layer_get_support_bf16_storage(const ncnn_layer_t layer)
  function ncnn_layer_get_support_fp16_storage (line 1276) | int ncnn_layer_get_support_fp16_storage(const ncnn_layer_t layer)
  function ncnn_layer_get_support_vulkan_packing (line 1281) | int ncnn_layer_get_support_vulkan_packing(const ncnn_layer_t layer)
  function ncnn_layer_get_support_any_packing (line 1291) | int ncnn_layer_get_support_any_packing(const ncnn_layer_t layer)
  function ncnn_layer_get_support_vulkan_any_packing (line 1296) | int ncnn_layer_get_support_vulkan_any_packing(const ncnn_layer_t layer)
  function ncnn_layer_set_one_blob_only (line 1306) | void ncnn_layer_set_one_blob_only(ncnn_layer_t layer, int enable)
  function ncnn_layer_set_support_inplace (line 1311) | void ncnn_layer_set_support_inplace(ncnn_layer_t layer, int enable)
  function ncnn_layer_set_support_vulkan (line 1316) | void ncnn_layer_set_support_vulkan(ncnn_layer_t layer, int enable)
  function ncnn_layer_set_support_packing (line 1326) | void ncnn_layer_set_support_packing(ncnn_layer_t layer, int enable)
  function ncnn_layer_set_support_bf16_storage (line 1331) | void ncnn_layer_set_support_bf16_storage(ncnn_layer_t layer, int enable)
  function ncnn_layer_set_support_fp16_storage (line 1336) | void ncnn_layer_set_support_fp16_storage(ncnn_layer_t layer, int enable)
  function ncnn_layer_set_support_vulkan_packing (line 1341) | void ncnn_layer_set_support_vulkan_packing(ncnn_layer_t layer, int enable)
  function ncnn_layer_set_support_any_packing (line 1351) | void ncnn_layer_set_support_any_packing(ncnn_layer_t layer, int enable)
  function ncnn_layer_set_support_vulkan_any_packing (line 1356) | void ncnn_layer_set_support_vulkan_any_packing(ncnn_layer_t layer, int e...
  function ncnn_layer_get_bottom_count (line 1366) | int ncnn_layer_get_bottom_count(const ncnn_layer_t layer)
  function ncnn_layer_get_bottom (line 1371) | int ncnn_layer_get_bottom(const ncnn_layer_t layer, int i)
  function ncnn_layer_get_top_count (line 1376) | int ncnn_layer_get_top_count(const ncnn_layer_t layer)
  function ncnn_layer_get_top (line 1381) | int ncnn_layer_get_top(const ncnn_layer_t layer, int i)
  function ncnn_blob_get_bottom_shape (line 1386) | void ncnn_blob_get_bottom_shape(const ncnn_layer_t layer, int i, int* di...
  function ncnn_blob_get_top_shape (line 1395) | void ncnn_blob_get_top_shape(const ncnn_layer_t layer, int i, int* dims,...
  function ncnn_net_t (line 1405) | ncnn_net_t ncnn_net_create()
  function ncnn_net_destroy (line 1413) | void ncnn_net_destroy(ncnn_net_t net)
  function ncnn_option_t (line 1426) | ncnn_option_t ncnn_net_get_option(ncnn_net_t net)
  function ncnn_net_set_option (line 1431) | void ncnn_net_set_option(ncnn_net_t net, ncnn_option_t opt)
  function ncnn_net_set_vulkan_device (line 1437) | void ncnn_net_set_vulkan_device(ncnn_net_t net, int device_index)
  function __Layer_c_api_layer_destroyer (line 1464) | static void __Layer_c_api_layer_destroyer(::ncnn::Layer* layer, void* us...
  function ncnn_net_register_custom_layer_by_type (line 1474) | void ncnn_net_register_custom_layer_by_type(ncnn_net_t net, const char* ...
  function ncnn_net_register_custom_layer_by_typeindex (line 1486) | void ncnn_net_register_custom_layer_by_typeindex(ncnn_net_t net, int typ...
  function ncnn_net_load_param (line 1499) | int ncnn_net_load_param(ncnn_net_t net, const char* path)
  function ncnn_net_load_param_bin (line 1505) | int ncnn_net_load_param_bin(ncnn_net_t net, const char* path)
  function ncnn_net_load_model (line 1510) | int ncnn_net_load_model(ncnn_net_t net, const char* path)
  function ncnn_net_load_param_w (line 1517) | int ncnn_net_load_param_w(ncnn_net_t net, const wchar_t* path)
  function ncnn_net_load_param_bin_w (line 1523) | int ncnn_net_load_param_bin_w(ncnn_net_t net, const wchar_t* path)
  function ncnn_net_load_model_w (line 1528) | int ncnn_net_load_model_w(ncnn_net_t net, const wchar_t* path)
  function ncnn_net_load_param_memory (line 1537) | int ncnn_net_load_param_memory(ncnn_net_t net, const char* mem)
  function ncnn_net_load_param_bin_memory (line 1544) | size_t ncnn_net_load_param_bin_memory(ncnn_net_t net, const unsigned cha...
  function ncnn_net_load_model_memory (line 1549) | size_t ncnn_net_load_model_memory(ncnn_net_t net, const unsigned char* mem)
  function ncnn_net_load_param_datareader (line 1555) | int ncnn_net_load_param_datareader(ncnn_net_t net, const ncnn_datareader...
  function ncnn_net_load_param_bin_datareader (line 1561) | int ncnn_net_load_param_bin_datareader(ncnn_net_t net, const ncnn_datare...
  function ncnn_net_load_model_datareader (line 1566) | int ncnn_net_load_model_datareader(ncnn_net_t net, const ncnn_datareader...
  function ncnn_net_clear (line 1571) | void ncnn_net_clear(ncnn_net_t net)
  function ncnn_net_get_input_count (line 1576) | int ncnn_net_get_input_count(const ncnn_net_t net)
  function ncnn_net_get_output_count (line 1581) | int ncnn_net_get_output_count(const ncnn_net_t net)
  function ncnn_net_get_input_index (line 1598) | int ncnn_net_get_input_index(const ncnn_net_t net, int i)
  function ncnn_net_get_output_index (line 1603) | int ncnn_net_get_output_index(const ncnn_net_t net, int i)
  function ncnn_extractor_t (line 1609) | ncnn_extractor_t ncnn_extractor_create(ncnn_net_t net)
  function ncnn_extractor_destroy (line 1614) | void ncnn_extractor_destroy(ncnn_extractor_t ex)
  function ncnn_extractor_set_option (line 1619) | void ncnn_extractor_set_option(ncnn_extractor_t ex, const ncnn_option_t ...
  function ncnn_extractor_input (line 1626) | int ncnn_extractor_input(ncnn_extractor_t ex, const char* name, const nc...
  function ncnn_extractor_extract (line 1631) | int ncnn_extractor_extract(ncnn_extractor_t ex, const char* name, ncnn_m...
  function ncnn_extractor_input_index (line 1640) | int ncnn_extractor_input_index(ncnn_extractor_t ex, int index, const ncn...
  function ncnn_extractor_extract_index (line 1645) | int ncnn_extractor_extract_index(ncnn_extractor_t ex, int index, ncnn_ma...
  function ncnn_copy_make_border (line 1653) | void ncnn_copy_make_border(const ncnn_mat_t src, ncnn_mat_t dst, int top...
  function ncnn_copy_make_border_3d (line 1659) | void ncnn_copy_make_border_3d(const ncnn_mat_t src, ncnn_mat_t dst, int ...
  function ncnn_copy_cut_border (line 1665) | void ncnn_copy_cut_border(const ncnn_mat_t src, ncnn_mat_t dst, int top,...
  function ncnn_copy_cut_border_3d (line 1671) | void ncnn_copy_cut_border_3d(const ncnn_mat_t src, ncnn_mat_t dst, int t...
  function ncnn_draw_rectangle_c1 (line 1678) | void ncnn_draw_rectangle_c1(unsigned char* pixels, int w, int h, int rx,...
  function ncnn_draw_rectangle_c2 (line 1683) | void ncnn_draw_rectangle_c2(unsigned char* pixels, int w, int h, int rx,...
  function ncnn_draw_rectangle_c3 (line 1688) | void ncnn_draw_rectangle_c3(unsigned char* pixels, int w, int h, int rx,...
  function ncnn_draw_rectangle_c4 (line 1693) | void ncnn_draw_rectangle_c4(unsigned char* pixels, int w, int h, int rx,...
  function ncnn_draw_text_c1 (line 1698) | void ncnn_draw_text_c1(unsigned char* pixels, int w, int h, const char* ...
  function ncnn_draw_text_c2 (line 1703) | void ncnn_draw_text_c2(unsigned char* pixels, int w, int h, const char* ...
  function ncnn_draw_text_c3 (line 1708) | void ncnn_draw_text_c3(unsigned char* pixels, int w, int h, const char* ...
  function ncnn_draw_text_c4 (line 1713) | void ncnn_draw_text_c4(unsigned char* pixels, int w, int h, const char* ...
  function ncnn_draw_circle_c1 (line 1718) | void ncnn_draw_circle_c1(unsigned char* pixels, int w, int h, int cx, in...
  function ncnn_draw_circle_c2 (line 1723) | void ncnn_draw_circle_c2(unsigned char* pixels, int w, int h, int cx, in...
  function ncnn_draw_circle_c3 (line 1728) | void ncnn_draw_circle_c3(unsigned char* pixels, int w, int h, int cx, in...
  function ncnn_draw_circle_c4 (line 1733) | void ncnn_draw_circle_c4(unsigned char* pixels, int w, int h, int cx, in...
  function ncnn_draw_line_c1 (line 1738) | void ncnn_draw_line_c1(unsigned char* pixels, int w, int h, int x0, int ...
  function ncnn_draw_line_c2 (line 1743) | void ncnn_draw_line_c2(unsigned char* pixels, int w, int h, int x0, int ...
  function ncnn_draw_line_c3 (line 1748) | void ncnn_draw_line_c3(unsigned char* pixels, int w, int h, int x0, int ...
  function ncnn_draw_line_c4 (line 1753) | void ncnn_draw_line_c4(unsigned char* pixels, int w, int h, int x0, int ...

FILE: src/c_api.h
  type __ncnn_allocator_t (line 22) | struct __ncnn_allocator_t
  function __ncnn_allocator_t (line 23) | struct NCNN_EXPORT __ncnn_allocator_t
  type __ncnn_option_t (line 36) | struct __ncnn_option_t
  type __ncnn_mat_t (line 80) | struct __ncnn_mat_t
  type __ncnn_blob_t (line 145) | struct __ncnn_blob_t
  type __ncnn_paramdict_t (line 157) | struct __ncnn_paramdict_t
  type __ncnn_datareader_t (line 173) | struct __ncnn_datareader_t
  function __ncnn_datareader_t (line 174) | struct NCNN_EXPORT __ncnn_datareader_t
  type __ncnn_modelbin_t (line 192) | struct __ncnn_modelbin_t
  function __ncnn_modelbin_t (line 193) | struct NCNN_EXPORT __ncnn_modelbin_t
  type __ncnn_layer_t (line 207) | struct __ncnn_layer_t
  function __ncnn_layer_t (line 208) | struct NCNN_EXPORT __ncnn_layer_t
  type ncnn_layer_t (line 271) | typedef ncnn_layer_t (*ncnn_layer_creator_t)(void* userdata);
  type __ncnn_net_custom_layer_factory_t (line 274) | struct __ncnn_net_custom_layer_factory_t
  type __ncnn_net_custom_layer_factory_t (line 275) | struct __ncnn_net_custom_layer_factory_t
  type __ncnn_net_t (line 284) | struct __ncnn_net_t
  type __ncnn_net_t (line 285) | struct __ncnn_net_t
  type __ncnn_extractor_t (line 348) | struct __ncnn_extractor_t

FILE: src/command.cpp
  type ncnn (line 11) | namespace ncnn {
    class VkComputePrivate (line 13) | class VkComputePrivate
      type record (line 42) | struct record
    type ImportAndroidHardwareBufferDescriptorInfo (line 1673) | struct ImportAndroidHardwareBufferDescriptorInfo
    type ImportAndroidHardwareBufferDescriptorInfo (line 1737) | struct ImportAndroidHardwareBufferDescriptorInfo
    class VkTransferPrivate (line 2294) | class VkTransferPrivate

FILE: src/command.h
  function namespace (line 13) | namespace ncnn {

FILE: src/cpu.cpp
  function ncnn_kmp_env_initializer (line 146) | __attribute__((constructor)) void ncnn_kmp_env_initializer()
  function is_being_debugged (line 258) | static bool is_being_debugged()
  function get_elf_hwcap_from_getauxval (line 367) | static unsigned int get_elf_hwcap_from_getauxval(unsigned int type)
  function get_elf_hwcap_from_proc_self_auxv (line 407) | static unsigned int get_elf_hwcap_from_proc_self_auxv(unsigned int type)
  function get_elf_hwcap (line 453) | static unsigned int get_elf_hwcap(unsigned int type)
  function get_hw_cpufamily (line 488) | static unsigned int get_hw_cpufamily()
  function cpu_type_t (line 496) | static cpu_type_t get_hw_cputype()
  function cpu_subtype_t (line 504) | static cpu_subtype_t get_hw_cpusubtype()
  function get_hw_capability (line 512) | static int get_hw_capability(const char* cap)
  function x86_cpuid (line 522) | static inline void x86_cpuid(int level, unsigned int out[4])
  function x86_cpuid_sublevel (line 537) | static inline void x86_cpuid_sublevel(int level, int sublevel, unsigned ...
  function x86_get_xcr0 (line 552) | static inline int x86_get_xcr0()
  function get_cpu_support_x86_avx (line 569) | static int get_cpu_support_x86_avx()
  function get_cpu_support_x86_fma (line 590) | static int get_cpu_support_x86_fma()
  function get_cpu_support_x86_xop (line 611) | static int get_cpu_support_x86_xop()
  function get_cpu_support_x86_f16c (line 624) | static int get_cpu_support_x86_f16c()
  function get_cpu_support_x86_avx2 (line 638) | static int get_cpu_support_x86_avx2()
  function get_cpu_support_x86_avx_vnni (line 660) | static int get_cpu_support_x86_avx_vnni()
  function get_cpu_support_x86_avx_vnni_int8 (line 682) | static int get_cpu_support_x86_avx_vnni_int8()
  function get_cpu_support_x86_avx_vnni_int16 (line 704) | static int get_cpu_support_x86_avx_vnni_int16()
  function get_cpu_support_x86_avx_ne_convert (line 726) | static int get_cpu_support_x86_avx_ne_convert()
  function get_cpu_support_x86_avx512 (line 748) | static int get_cpu_support_x86_avx512()
  function get_cpu_support_x86_avx512_vnni (line 782) | static int get_cpu_support_x86_avx512_vnni()
  function get_cpu_support_x86_avx512_bf16 (line 812) | static int get_cpu_support_x86_avx512_bf16()
  function get_cpu_support_x86_avx512_fp16 (line 838) | static int get_cpu_support_x86_avx512_fp16()
  function get_cpucount (line 869) | static int get_cpucount()
  function get_thread_siblings (line 919) | static int get_thread_siblings(int cpuid)
  function get_physical_cpucount (line 987) | static int get_physical_cpucount()
  function get_data_cache_size (line 1065) | static int get_data_cache_size(int cpuid, int level)
  function get_big_cpu_data_cache_size (line 1209) | static int get_big_cpu_data_cache_size(int level)
  function get_cpu_level2_cachesize (line 1230) | static int get_cpu_level2_cachesize()
  function get_cpu_level3_cachesize (line 1300) | static int get_cpu_level3_cachesize()
  function get_smt_cpu_mask (line 1351) | static ncnn::CpuSet get_smt_cpu_mask()
  function get_max_freq_mhz (line 1393) | static std::vector<int> get_max_freq_mhz()
  function set_sched_affinity (line 1433) | static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask)
  function get_max_freq_khz (line 1447) | static int get_max_freq_khz(int cpuid)
  function is_smt_cpu (line 1521) | static bool is_smt_cpu(int cpuid)
  function set_sched_affinity (line 1554) | static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask)
  function set_sched_affinity (line 1575) | static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask)
  function initialize_cpu_thread_affinity_mask (line 1610) | static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ...
  function midr_info_t (line 1841) | midr_info_t(unsigned int _midr)
  function get_midr_from_sysfs (line 1847) | static unsigned int get_midr_from_sysfs(int cpuid)
  function get_midr_from_proc_cpuinfo (line 1868) | static int get_midr_from_proc_cpuinfo(std::vector<unsigned int>& midrs)
  function get_midr_from_register (line 2035) | static unsigned int get_midr_from_register()
  function get_sched_affinity (line 2044) | static int get_sched_affinity(ncnn::CpuSet& thread_affinity_mask)
  function midr_is_a53_a55 (line 2065) | static int midr_is_a53_a55(unsigned int midr)
  function detect_cpu_is_arm_a53_a55 (line 2084) | static int detect_cpu_is_arm_a53_a55()
  function initialize_global_cpu_info (line 2143) | static void initialize_global_cpu_info()
  function try_initialize_global_cpu_info (line 2257) | static inline void try_initialize_global_cpu_info()
  type ncnn (line 2266) | namespace ncnn {
    function cpu_support_arm_edsp (line 2407) | int cpu_support_arm_edsp()
    function cpu_support_arm_neon (line 2425) | int cpu_support_arm_neon()
    function cpu_support_arm_vfpv4 (line 2445) | int cpu_support_arm_vfpv4()
    function cpu_support_arm_asimdhp (line 2465) | int cpu_support_arm_asimdhp()
    function cpu_support_arm_cpuid (line 2483) | int cpu_support_arm_cpuid()
    function cpu_support_arm_asimddp (line 2501) | int cpu_support_arm_asimddp()
    function cpu_support_arm_asimdfhm (line 2519) | int cpu_support_arm_asimdfhm()
    function cpu_support_arm_bf16 (line 2537) | int cpu_support_arm_bf16()
    function cpu_support_arm_i8mm (line 2555) | int cpu_support_arm_i8mm()
    function cpu_support_arm_sve (line 2573) | int cpu_support_arm_sve()
    function cpu_support_arm_sve2 (line 2591) | int cpu_support_arm_sve2()
    function cpu_support_arm_svebf16 (line 2609) | int cpu_support_arm_svebf16()
    function cpu_support_arm_svei8mm (line 2627) | int cpu_support_arm_svei8mm()
    function cpu_support_arm_svef32mm (line 2645) | int cpu_support_arm_svef32mm()
    function cpu_support_x86_avx (line 2663) | int cpu_support_x86_avx()
    function cpu_support_x86_fma (line 2673) | int cpu_support_x86_fma()
    function cpu_support_x86_xop (line 2683) | int cpu_support_x86_xop()
    function cpu_support_x86_f16c (line 2693) | int cpu_support_x86_f16c()
    function cpu_support_x86_avx2 (line 2703) | int cpu_support_x86_avx2()
    function cpu_support_x86_avx_vnni (line 2713) | int cpu_support_x86_avx_vnni()
    function cpu_support_x86_avx_vnni_int8 (line 2723) | int cpu_support_x86_avx_vnni_int8()
    function cpu_support_x86_avx_vnni_int16 (line 2733) | int cpu_support_x86_avx_vnni_int16()
    function cpu_support_x86_avx_ne_convert (line 2743) | int cpu_support_x86_avx_ne_convert()
    function cpu_support_x86_avx512 (line 2753) | int cpu_support_x86_avx512()
    function cpu_support_x86_avx512_vnni (line 2763) | int cpu_support_x86_avx512_vnni()
    function cpu_support_x86_avx512_bf16 (line 2773) | int cpu_support_x86_avx512_bf16()
    function cpu_support_x86_avx512_fp16 (line 2783) | int cpu_support_x86_avx512_fp16()
    function cpu_support_mips_msa (line 2793) | int cpu_support_mips_msa()
    function cpu_support_loongarch_lsx (line 2807) | int cpu_support_loongarch_lsx()
    function cpu_support_loongarch_lasx (line 2821) | int cpu_support_loongarch_lasx()
    function cpu_support_loongson_mmi (line 2835) | int cpu_support_loongson_mmi()
    function cpu_support_riscv_v (line 2849) | int cpu_support_riscv_v()
    function cpu_support_riscv_zfh (line 2863) | int cpu_support_riscv_zfh()
    function cpu_support_riscv_zvfh (line 2877) | int cpu_support_riscv_zvfh()
    function cpu_support_riscv_xtheadvector (line 2891) | int cpu_support_riscv_xtheadvector()
    function cpu_riscv_vlenb (line 2905) | int cpu_riscv_vlenb()
    function get_cpu_count (line 2929) | int get_cpu_count()
    function get_little_cpu_count (line 2935) | int get_little_cpu_count()
    function get_big_cpu_count (line 2941) | int get_big_cpu_count()
    function get_physical_cpu_count (line 2948) | int get_physical_cpu_count()
    function get_physical_little_cpu_count (line 2954) | int get_physical_little_cpu_count()
    function get_physical_big_cpu_count (line 2963) | int get_physical_big_cpu_count()
    function get_cpu_level2_cache_size (line 2972) | int get_cpu_level2_cache_size()
    function get_cpu_level3_cache_size (line 2978) | int get_cpu_level3_cache_size()
    function get_cpu_powersave (line 2984) | int get_cpu_powersave()
    function set_cpu_powersave (line 2990) | int set_cpu_powersave(int powersave)
    function CpuSet (line 3010) | const CpuSet& get_cpu_thread_affinity_mask(int powersave)
    function set_cpu_thread_affinity (line 3028) | int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask)
    function is_current_thread_running_on_a53_a55 (line 3110) | int is_current_thread_running_on_a53_a55()
    function get_omp_num_threads (line 3166) | int get_omp_num_threads()
    function set_omp_num_threads (line 3175) | void set_omp_num_threads(int num_threads)
    function get_omp_dynamic (line 3184) | int get_omp_dynamic()
    function set_omp_dynamic (line 3193) | void set_omp_dynamic(int dynamic)
    function get_omp_thread_num (line 3202) | int get_omp_thread_num()
    function get_kmp_blocktime (line 3211) | int get_kmp_blocktime()
    function set_kmp_blocktime (line 3220) | void set_kmp_blocktime(int time_ms)
    function get_flush_denormals (line 3231) | int get_flush_denormals()
    function set_flush_denormals (line 3240) | int set_flush_denormals(int flush_denormals)

FILE: src/cpu.h
  function namespace (line 19) | namespace ncnn {

FILE: src/datareader.cpp
  type ncnn (line 8) | namespace ncnn {
    class DataReaderFromStdioPrivate (line 36) | class DataReaderFromStdioPrivate
      method DataReaderFromStdioPrivate (line 39) | DataReaderFromStdioPrivate(FILE* _fp)
    function DataReaderFromStdio (line 61) | DataReaderFromStdio& DataReaderFromStdio::operator=(const DataReaderFr...
    class DataReaderFromMemoryPrivate (line 79) | class DataReaderFromMemoryPrivate
      method DataReaderFromMemoryPrivate (line 82) | DataReaderFromMemoryPrivate(const unsigned char*& _mem)
    function DataReaderFromMemory (line 104) | DataReaderFromMemory& DataReaderFromMemory::operator=(const DataReader...
    class DataReaderFromAndroidAssetPrivate (line 144) | class DataReaderFromAndroidAssetPrivate
      method DataReaderFromAndroidAssetPrivate (line 147) | DataReaderFromAndroidAssetPrivate(AAsset* _asset)
    function DataReaderFromAndroidAsset (line 170) | DataReaderFromAndroidAsset& DataReaderFromAndroidAsset::operator=(cons...

FILE: src/datareader.h
  function namespace (line 18) | namespace ncnn {

FILE: src/expression.cpp
  type ncnn (line 8) | namespace ncnn {
    function count_expression_blobs (line 10) | int count_expression_blobs(const std::string& expr)
    type typed_value (line 55) | struct typed_value
      method typed_value (line 64) | typed_value()
      method typed_value (line 68) | typed_value(int _i)
      method typed_value (line 72) | typed_value(float _f)
      method to_int (line 77) | int to_int()
    function eval_list_expression (line 87) | int eval_list_expression(const std::string& expr, const std::vector<Ma...

FILE: src/expression.h
  function namespace (line 6) | namespace ncnn {

FILE: src/gpu.cpp
  type ncnn (line 32) | namespace ncnn {
    class __ncnn_vulkan_instance_holder (line 37) | class __ncnn_vulkan_instance_holder
      method __ncnn_vulkan_instance_holder (line 40) | __ncnn_vulkan_instance_holder()
    type layer_shader_registry_entry (line 91) | struct layer_shader_registry_entry
    class GpuInfoPrivate (line 253) | class GpuInfoPrivate
    function find_device_compute_queue (line 521) | static uint32_t find_device_compute_queue(const std::vector<VkQueueFam...
    function find_device_transfer_queue (line 562) | static uint32_t find_device_transfer_queue(const std::vector<VkQueueFa...
    function get_vendor_default_subgroup_size (line 1167) | static int get_vendor_default_subgroup_size(uint32_t vendorID)
    function GpuInfo (line 1528) | GpuInfo& GpuInfo::operator=(const GpuInfo&)
    function VkPhysicalDevice (line 1538) | VkPhysicalDevice GpuInfo::physicalDevice() const
    function VkPhysicalDevice (line 1543) | VkPhysicalDevice GpuInfo::physical_device() const
    function VkPhysicalDeviceFeatures (line 1548) | const VkPhysicalDeviceFeatures& GpuInfo::physicalDevicefeatures() const
    function VkPhysicalDeviceProperties (line 1553) | const VkPhysicalDeviceProperties& GpuInfo::physicalDeviceProperties() ...
    function VkPhysicalDeviceMemoryProperties (line 1558) | const VkPhysicalDeviceMemoryProperties& GpuInfo::physicalDeviceMemoryP...
    function VkPhysicalDeviceMemoryProperties (line 1563) | const VkPhysicalDeviceMemoryProperties& GpuInfo::physical_device_memor...
    function VkPhysicalDevice8BitStorageFeaturesKHR (line 2130) | const VkPhysicalDevice8BitStorageFeaturesKHR& GpuInfo::query8BitStorag...
    function VkPhysicalDevice16BitStorageFeaturesKHR (line 2135) | const VkPhysicalDevice16BitStorageFeaturesKHR& GpuInfo::query16BitStor...
    function VkPhysicalDeviceFloat16Int8FeaturesKHR (line 2140) | const VkPhysicalDeviceFloat16Int8FeaturesKHR& GpuInfo::queryFloat16Int...
    function VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR (line 2145) | const VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR& GpuInfo::quer...
    function VkPhysicalDeviceCooperativeMatrixFeaturesKHR (line 2150) | const VkPhysicalDeviceCooperativeMatrixFeaturesKHR& GpuInfo::queryCoop...
    function VkPhysicalDeviceCooperativeMatrixFeaturesNV (line 2155) | const VkPhysicalDeviceCooperativeMatrixFeaturesNV& GpuInfo::queryCoope...
    function VkPhysicalDeviceCooperativeMatrix2FeaturesNV (line 2160) | const VkPhysicalDeviceCooperativeMatrix2FeaturesNV& GpuInfo::queryCoop...
    function VkPhysicalDeviceCooperativeVectorFeaturesNV (line 2165) | const VkPhysicalDeviceCooperativeVectorFeaturesNV& GpuInfo::queryCoope...
    function VkPhysicalDeviceRobustness2FeaturesKHR (line 2170) | const VkPhysicalDeviceRobustness2FeaturesKHR& GpuInfo::queryRobustness...
    function VkPhysicalDeviceSubgroupSizeControlFeaturesEXT (line 2175) | const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT& GpuInfo::querySu...
    function VkPhysicalDeviceShaderBfloat16FeaturesKHR (line 2180) | const VkPhysicalDeviceShaderBfloat16FeaturesKHR& GpuInfo::queryShaderB...
    function VkPhysicalDeviceShaderFloat8FeaturesEXT (line 2185) | const VkPhysicalDeviceShaderFloat8FeaturesEXT& GpuInfo::queryShaderFlo...
    function VkPhysicalDeviceShaderFloatControls2FeaturesKHR (line 2190) | const VkPhysicalDeviceShaderFloatControls2FeaturesKHR& GpuInfo::queryS...
    function VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR (line 2195) | const VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR& GpuInfo::que...
    function VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR (line 2200) | const VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR& GpuInfo::queryS...
    function VkPhysicalDeviceShaderAtomicFloatFeaturesEXT (line 2205) | const VkPhysicalDeviceShaderAtomicFloatFeaturesEXT& GpuInfo::queryShad...
    function VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT (line 2210) | const VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT& GpuInfo::querySha...
    function VkPhysicalDeviceVulkanMemoryModelFeaturesKHR (line 2215) | const VkPhysicalDeviceVulkanMemoryModelFeaturesKHR& GpuInfo::queryVulk...
    function VkPhysicalDeviceCooperativeMatrix2PropertiesNV (line 2225) | const VkPhysicalDeviceCooperativeMatrix2PropertiesNV& GpuInfo::queryCo...
    function VkPhysicalDeviceCooperativeVectorPropertiesNV (line 2230) | const VkPhysicalDeviceCooperativeVectorPropertiesNV& GpuInfo::queryCoo...
    function VkPhysicalDeviceDriverPropertiesKHR (line 2235) | const VkPhysicalDeviceDriverPropertiesKHR& GpuInfo::queryDriverPropert...
    function VkPhysicalDeviceFloatControlsPropertiesKHR (line 2240) | const VkPhysicalDeviceFloatControlsPropertiesKHR& GpuInfo::queryFloatC...
    function VkPhysicalDeviceRobustness2PropertiesKHR (line 2245) | const VkPhysicalDeviceRobustness2PropertiesKHR& GpuInfo::queryRobustne...
    function VkPhysicalDeviceShaderIntegerDotProductProperties (line 2250) | const VkPhysicalDeviceShaderIntegerDotProductProperties& GpuInfo::quer...
    function VkPhysicalDeviceSubgroupProperties (line 2255) | const VkPhysicalDeviceSubgroupProperties& GpuInfo::querySubgroupProper...
    function VkPhysicalDeviceSubgroupSizeControlPropertiesEXT (line 2260) | const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& GpuInfo::query...
    function VkPhysicalDeviceExternalMemoryHostPropertiesEXT (line 2265) | const VkPhysicalDeviceExternalMemoryHostPropertiesEXT& GpuInfo::queryE...
    function init_instance_core (line 2364) | static int init_instance_core()
    function init_instance_extension (line 2465) | static int init_instance_extension()
    function VKAPI_ATTR (line 2528) | static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
    function VkResult (line 2539) | static VkResult CreateDebugUtilsMessengerEXT(VkInstance instance, cons...
    function DestroyDebugUtilsMessengerEXT (line 2548) | static void DestroyDebugUtilsMessengerEXT(VkInstance instance, VkDebug...
    function find_default_vulkan_device_index (line 2556) | static int find_default_vulkan_device_index()
    function create_gpu_instance (line 2580) | int create_gpu_instance(const char* driver_path)
    function VkInstance (line 3123) | VkInstance get_gpu_instance()
    function destroy_gpu_instance (line 3128) | void destroy_gpu_instance()
    function try_create_gpu_instance (line 3188) | static void try_create_gpu_instance()
    function get_gpu_count (line 3200) | int get_gpu_count()
    function get_default_gpu_index (line 3207) | int get_default_gpu_index()
    function GpuInfo (line 3214) | const GpuInfo& get_gpu_info(int device_index)
    class VkDummyAllocator (line 3221) | class VkDummyAllocator : public VkBlobAllocator
      method VkDummyAllocator (line 3225) | VkDummyAllocator(const VulkanDevice* _vkdev)
    class VkDummyCompute (line 3231) | class VkDummyCompute : public VkCompute
      method VkDummyCompute (line 3234) | VkDummyCompute(const VulkanDevice* _vkdev)
      method record_dummy (line 3239) | void record_dummy(const VkMat& buffer)
      method record_dummy (line 3244) | void record_dummy(const VkImageMat& image)
      method record_dummy_readonly (line 3249) | void record_dummy_readonly(const VkImageMat& image)
    class VulkanDevicePrivate (line 3255) | class VulkanDevicePrivate
    function VulkanDevice (line 3834) | VulkanDevice& VulkanDevice::operator=(const VulkanDevice&)
    function VkDevice (line 3839) | VkDevice VulkanDevice::vkdevice() const
    function VkShaderModule (line 3849) | VkShaderModule VulkanDevice::compile_shader_module(const uint32_t* spv...
    function inject_local_size_xyz (line 3869) | static void inject_local_size_xyz(const uint32_t* code, size_t size, u...
    function VkShaderModule (line 3966) | VkShaderModule VulkanDevice::compile_shader_module(const uint32_t* spv...
    function VkQueue (line 4318) | VkQueue VulkanDevice::acquire_queue(uint32_t queue_family_index) const
    function VkAllocator (line 4407) | VkAllocator* VulkanDevice::acquire_blob_allocator() const
    function VkAllocator (line 4444) | VkAllocator* VulkanDevice::acquire_staging_allocator() const
    function VkSampler (line 4481) | const VkSampler* VulkanDevice::immutable_texelfetch_sampler() const
    function VkMat (line 4486) | VkMat VulkanDevice::get_dummy_buffer() const
    function VkImageMat (line 4491) | VkImageMat VulkanDevice::get_dummy_image() const
    function VkImageMat (line 4496) | VkImageMat VulkanDevice::get_dummy_image_readonly() const
    function PipelineCache (line 4505) | const PipelineCache* VulkanDevice::get_pipeline_cache() const
    function VulkanDevice (line 4733) | VulkanDevice* get_gpu_device(int device_index)
    function TBuiltInResource (line 4748) | static TBuiltInResource get_default_TBuiltInResource()
    class VulkanShaderIncluder (line 4861) | class VulkanShaderIncluder : public glslang::TShader::Includer
      method releaseInclude (line 4877) | virtual void releaseInclude(glslang::TShader::Includer::IncludeResul...
    class DefinitionCollector (line 4883) | class DefinitionCollector
      method append (line 4887) | void append(const char* key, T def)
      type typed_value (line 4893) | struct typed_value
        method typed_value (line 4895) | typed_value(const char* _s)
        method typed_value (line 4899) | typed_value(uint8_t _u8)
        method typed_value (line 4903) | typed_value(uint32_t _u32)
        method typed_value (line 4907) | typed_value(int32_t _i32)
        method typed_value (line 4911) | typed_value(uint64_t _u64)
        method typed_value (line 4915) | typed_value(float _f32)
    function compile_spirv_module (line 4935) | int compile_spirv_module(const char* comp_string, const Option& opt, s...
    function compile_spirv_module (line 4942) | int compile_spirv_module(const char* comp_data, int comp_data_size, co...
    function compile_spirv_module (line 6087) | int compile_spirv_module(int shader_type_index, const Option& opt, std...
    function resolve_shader_info (line 6101) | int resolve_shader_info(const uint32_t* spv_data, size_t spv_data_size...

FILE: src/gpu.h
  function device_index (line 191) | int device_index() const;

FILE: src/layer/absval.cpp
  type ncnn (line 6) | namespace ncnn {

FILE: src/layer/absval.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/argmax.cpp
  type ncnn (line 8) | namespace ncnn {

FILE: src/layer/argmax.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/absval_arm.cpp
  type ncnn (line 10) | namespace ncnn {

FILE: src/layer/arm/absval_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/arm_activation.h
  function float32x4_t (line 13) | static inline float32x4_t activation_ps(float32x4_t _v, int activation_t...
  function __fp16 (line 63) | static inline __fp16 activation_ss_f16(__fp16 v, int activation_type, co...
  function float16x4_t (line 108) | static inline float16x4_t activation_ps_f16(float16x4_t _v, int activati...
  function float16x8_t (line 158) | static inline float16x8_t activation_ps_f16(float16x8_t _v, int activati...

FILE: src/layer/arm/arm_usability.h
  function float2int8 (line 7) | static inline signed char float2int8(float v)
  function uint16x4_t (line 18) | static inline uint16x4_t float2bfloat(float32x4_t _v)
  function float32x4_t (line 22) | static inline float32x4_t bfloat2float(uint16x4_t _v)
  function int8x8_t (line 27) | static inline int8x8_t float2int8(float32x4_t _vlow, float32x4_t _vhigh)
  function int8x8_t (line 51) | static inline int8x8_t float2int8relu(float32x4_t _vlow, float32x4_t _vh...
  function int8x8_t (line 75) | static inline int8x8_t float2int8leakyrelu(float32x4_t _vlow, float32x4_...
  function __fp16 (line 116) | struct __fp16
  function operator (line 133) | operator const float() const
  function float16x4_t (line 194) | static inline float16x4_t vdup_n_f16(const __fp16& f16)
  function float16x8_t (line 199) | static inline float16x8_t vdupq_n_f16(const __fp16& f16)
  function __fp16 (line 204) | static inline __fp16 vmaxv_f16(float16x4_t a)
  function __fp16 (line 209) | static inline __fp16 vmaxvq_f16(float16x8_t a)
  function float2int8 (line 241) | static inline signed char float2int8(__fp16 v)
  function int8x8_t (line 249) | static inline int8x8_t float2int8(float16x8_t _v)
  function transpose4x4_u16 (line 257) | static inline void transpose4x4_u16(uint16x4_t& _r0, uint16x4_t& _r1, ui...
  function transpose4x8_u16 (line 269) | static inline void transpose4x8_u16(uint16x4_t& _r0, uint16x4_t& _r1, ui...
  function transpose4x12_u16 (line 289) | static inline void transpose4x12_u16(uint16x4_t& _r0, uint16x4_t& _r1, u...
  function transpose8x4_u16 (line 317) | static inline void transpose8x4_u16(uint16x8_t& _r0, uint16x8_t& _r1, ui...
  function transpose8x8_u16 (line 329) | static inline void transpose8x8_u16(uint16x8_t& _r0, uint16x8_t& _r1, ui...
  function transpose8x12_u16 (line 349) | static inline void transpose8x12_u16(uint16x8_t& _r0, uint16x8_t& _r1, u...
  function transpose4x4_ps (line 377) | static inline void transpose4x4_ps(float32x4_t& _r0, float32x4_t& _r1, f...
  function transpose4x8_ps (line 387) | static inline void transpose4x8_ps(float32x4_t& _r0, float32x4_t& _r1, f...
  function transpose4x12_ps (line 403) | static inline void transpose4x12_ps(float32x4_t& _r0, float32x4_t& _r1, ...
  function transpose8x4_ps (line 425) | static inline void transpose8x4_ps(float32x4_t& _r0l, float32x4_t& _r0h,
  function transpose12x4_ps (line 444) | static inline void transpose12x4_ps(float32x4_t& _r0l, float32x4_t& _r0m...
  function transpose8x8_ps (line 470) | static inline void transpose8x8_ps(float32x4_t& _r0l, float32x4_t& _r0h,
  function transpose8x12_ps (line 505) | static inline void transpose8x12_ps(float32x4_t& _r0l, float32x4_t& _r0h,
  function transpose12x8_ps (line 556) | static inline void transpose12x8_ps(float32x4_t& _r0l, float32x4_t& _r0m...
  function transpose4x4_ph (line 604) | static inline void transpose4x4_ph(float16x4_t& _r0, float16x4_t& _r1, f...
  function transpose4x8_ph (line 616) | static inline void transpose4x8_ph(float16x4_t& _r0, float16x4_t& _r1, f...
  function transpose4x12_ph (line 636) | static inline void transpose4x12_ph(float16x4_t& _r0, float16x4_t& _r1, ...
  function transpose8x4_ph (line 664) | static inline void transpose8x4_ph(float16x8_t& _r0, float16x8_t& _r1, f...
  function transpose8x8_ph (line 676) | static inline void transpose8x8_ph(float16x8_t& _r0, float16x8_t& _r1, f...
  function transpose8x12_ph (line 696) | static inline void transpose8x12_ph(float16x8_t& _r0, float16x8_t& _r1, ...
  function transpose12x4_ph (line 724) | static inline void transpose12x4_ph(float16x4_t& _r0l, float16x4_t& _r0m...

FILE: src/layer/arm/batchnorm_arm.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/batchnorm_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/batchnorm_arm_asimdhp.cpp
  type ncnn (line 12) | namespace ncnn {

FILE: src/layer/arm/bias_arm.cpp
  type ncnn (line 10) | namespace ncnn {

FILE: src/layer/arm/bias_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/binaryop_arm.cpp
  type ncnn (line 14) | namespace ncnn {
    function binary_op_vector_no_broadcast (line 31) | static void binary_op_vector_no_broadcast(const float* ptr, const floa...
    function binary_op_vector_broadcast_b (line 58) | static void binary_op_vector_broadcast_b(const float* ptr, const float...
    function binary_op_vector_broadcast_a (line 85) | static void binary_op_vector_broadcast_a(const float* ptr, const float...
    function binary_op_vector_broadcast_pb (line 112) | static void binary_op_vector_broadcast_pb(const float* ptr, const floa...
    function binary_op_vector_broadcast_pb_b (line 135) | static void binary_op_vector_broadcast_pb_b(const float* ptr, const fl...
    function binary_op_vector_broadcast_pb_a (line 156) | static void binary_op_vector_broadcast_pb_a(const float* ptr, const fl...
    function binary_op_vector (line 178) | static void binary_op_vector(const float* ptr, const float* ptr1, floa...
    type BinaryOp_arm_functor (line 229) | namespace BinaryOp_arm_functor {
    function binary_op_vector (line 291) | static void binary_op_vector(const float* ptr, const float* ptr1, floa...
    function binary_op_scalar (line 318) | static void binary_op_scalar(const Mat& a, float b, Mat& c, int op_typ...
    function binary_op_no_broadcast (line 333) | static void binary_op_no_broadcast(const Mat& a, const Mat& b, Mat& c,...
    function binary_op_broadcast (line 349) | static void binary_op_broadcast(const Mat& a, const Mat& b, Mat& c, in...
    function binary_op_scalar_inplace (line 438) | static void binary_op_scalar_inplace(Mat& a, float b, int op_type, con...
    function get_reverse_op_type (line 452) | static int get_reverse_op_type(int op_type)
    function binary_op_vector_no_broadcast_bf16s (line 655) | static void binary_op_vector_no_broadcast_bf16s(const unsigned short* ...
    function binary_op_vector_broadcast_b_bf16s (line 682) | static void binary_op_vector_broadcast_b_bf16s(const unsigned short* p...
    function binary_op_vector_broadcast_a_bf16s (line 709) | static void binary_op_vector_broadcast_a_bf16s(const unsigned short* p...
    function binary_op_vector_broadcast_pb_bf16s (line 736) | static void binary_op_vector_broadcast_pb_bf16s(const unsigned short* ...
    function binary_op_vector_broadcast_pb_b_bf16s (line 759) | static void binary_op_vector_broadcast_pb_b_bf16s(const unsigned short...
    function binary_op_vector_broadcast_pb_a_bf16s (line 780) | static void binary_op_vector_broadcast_pb_a_bf16s(const unsigned short...
    function binary_op_vector_bf16s (line 802) | static void binary_op_vector_bf16s(const unsigned short* ptr, const un...
    function binary_op_vector_bf16s (line 853) | static void binary_op_vector_bf16s(const unsigned short* ptr, const un...
    function binary_op_vector_scalar_b_bf16s (line 881) | static void binary_op_vector_scalar_b_bf16s(const unsigned short* ptr,...
    function binary_op_vector_scalar_b_bf16s (line 905) | static void binary_op_vector_scalar_b_bf16s(const unsigned short* ptr,...
    function binary_op_scalar_bf16s (line 932) | static void binary_op_scalar_bf16s(const Mat& a, float b, Mat& c, int ...
    function binary_op_no_broadcast_bf16s (line 947) | static void binary_op_no_broadcast_bf16s(const Mat& a, const Mat& b, M...
    function binary_op_broadcast_bf16s (line 963) | static void binary_op_broadcast_bf16s(const Mat& a, const Mat& b, Mat&...
    function binary_op_scalar_inplace_bf16s (line 1052) | static void binary_op_scalar_inplace_bf16s(Mat& a, float b, int op_typ...

FILE: src/layer/arm/binaryop_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/binaryop_arm_asimdhp.cpp
  type ncnn (line 12) | namespace ncnn {
    function float16x4_t (line 15) | static inline float16x4_t fmod_f16(const float16x4_t& x, const float16...
    function float16x8_t (line 22) | static inline float16x8_t fmodq_f16(const float16x8_t& x, const float1...
    function float16x4_t (line 34) | static inline float16x4_t round_f16(const float16x4_t& x)
    function float16x8_t (line 39) | static inline float16x8_t roundq_f16(const float16x8_t& x)
    function float16x4_t (line 48) | static inline float16x4_t logaddexp_f16(const float16x4_t& x, const fl...
    function float16x8_t (line 53) | static inline float16x8_t logaddexpq_f16(const float16x8_t& x, const f...
    function float16x4_t (line 64) | static inline float16x4_t floor_divide_f16(const float16x4_t& x, const...
    function float16x8_t (line 69) | static inline float16x8_t floor_divideq_f16(const float16x8_t& x, cons...
    function float16x4_t (line 80) | static inline float16x4_t remainder_f16(const float16x4_t& x, const fl...
    function float16x8_t (line 85) | static inline float16x8_t remainderq_f16(const float16x8_t& x, const f...
    function binary_op_vector_no_broadcast_fp16s (line 97) | static void binary_op_vector_no_broadcast_fp16s(const __fp16* ptr, con...
    function binary_op_vector_broadcast_b_fp16s (line 132) | static void binary_op_vector_broadcast_b_fp16s(const __fp16* ptr, cons...
    function binary_op_vector_broadcast_a_fp16s (line 166) | static void binary_op_vector_broadcast_a_fp16s(const __fp16* ptr, cons...
    function binary_op_vector_broadcast_pb_fp16s (line 200) | static void binary_op_vector_broadcast_pb_fp16s(const __fp16* ptr, con...
    function binary_op_vector_broadcast_pb_b_fp16s (line 247) | static void binary_op_vector_broadcast_pb_b_fp16s(const __fp16* ptr, c...
    function binary_op_vector_broadcast_pb_a_fp16s (line 274) | static void binary_op_vector_broadcast_pb_a_fp16s(const __fp16* ptr, c...
    function binary_op_vector_fp16s (line 318) | static void binary_op_vector_fp16s(const __fp16* ptr, const __fp16* pt...
    type BinaryOp_arm_functor (line 369) | namespace BinaryOp_arm_functor {
    function binary_op_vector_fp16s (line 416) | static void binary_op_vector_fp16s(const __fp16* ptr, const __fp16* pt...
    function binary_op_scalar_fp16s (line 443) | static void binary_op_scalar_fp16s(const Mat& a, __fp16 b, Mat& c, int...
    function binary_op_no_broadcast_fp16s (line 458) | static void binary_op_no_broadcast_fp16s(const Mat& a, const Mat& b, M...
    function binary_op_broadcast_fp16s (line 474) | static void binary_op_broadcast_fp16s(const Mat& a, const Mat& b, Mat&...
    function binary_op_scalar_inplace_fp16s (line 563) | static void binary_op_scalar_inplace_fp16s(Mat& a, __fp16 b, int op_ty...
    function get_reverse_op_type (line 577) | static int get_reverse_op_type(int op_type)

FILE: src/layer/arm/cast_arm.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/cast_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/cast_arm_bf16.cpp
  type ncnn (line 7) | namespace ncnn {
    function cast_fp32_to_bf16_neon_bf16 (line 11) | void cast_fp32_to_bf16_neon_bf16(const Mat& bottom_blob, Mat& top_blob...
    function cast_bf16_to_fp32_neon_bf16 (line 16) | void cast_bf16_to_fp32_neon_bf16(const Mat& bottom_blob, Mat& top_blob...

FILE: src/layer/arm/cast_arm_vfpv4.cpp
  type ncnn (line 7) | namespace ncnn {
    function cast_fp32_to_fp16_neon_vfpv4 (line 11) | void cast_fp32_to_fp16_neon_vfpv4(const Mat& bottom_blob, Mat& top_blo...
    function cast_fp16_to_fp32_neon_vfpv4 (line 16) | void cast_fp16_to_fp32_neon_vfpv4(const Mat& bottom_blob, Mat& top_blo...

FILE: src/layer/arm/cast_bf16.h
  function cast_fp32_to_bf16_neon (line 9) | static void cast_fp32_to_bf16_neon(const Mat& bottom_blob, Mat& top_blob...
  function cast_bf16_to_fp32_neon (line 173) | static void cast_bf16_to_fp32_neon(const Mat& bottom_blob, Mat& top_blob...

FILE: src/layer/arm/cast_fp16.h
  function cast_fp32_to_fp16_neon (line 9) | static void cast_fp32_to_fp16_neon(const Mat& bottom_blob, Mat& top_blob...
  function cast_fp16_to_fp32_neon (line 167) | static void cast_fp16_to_fp32_neon(const Mat& bottom_blob, Mat& top_blob...

FILE: src/layer/arm/clip_arm.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/clip_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/clip_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {

FILE: src/layer/arm/concat_arm.cpp
  type ncnn (line 8) | namespace ncnn {

FILE: src/layer/arm/concat_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/convolution1d_arm.cpp
  type ncnn (line 16) | namespace ncnn {

FILE: src/layer/arm/convolution1d_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/convolution1d_arm_asimdhp.cpp
  type ncnn (line 15) | namespace ncnn {

FILE: src/layer/arm/convolution1d_packed.h
  function convolution1d_transform_kernel_packed (line 4) | static void convolution1d_transform_kernel_packed(const Mat& kernel, Mat...
  function convolution1d_packed (line 494) | static void convolution1d_packed(const Mat& bottom_blob, Mat& top_blob, ...

FILE: src/layer/arm/convolution1d_packed_bf16s.h
  function convolution1d_transform_kernel_packed_bf16s (line 4) | static void convolution1d_transform_kernel_packed_bf16s(const Mat& kerne...
  function convolution1d_packed_bf16s (line 494) | static void convolution1d_packed_bf16s(const Mat& bottom_blob, Mat& top_...

FILE: src/layer/arm/convolution1d_packed_fp16s.h
  function convolution1d_transform_kernel_packed_fp16s (line 4) | static void convolution1d_transform_kernel_packed_fp16s(const Mat& kerne...
  function convolution1d_packed_fp16s (line 443) | static void convolution1d_packed_fp16s(const Mat& bottom_blob, Mat& top_...
  function convolution1d_packed_fp16sa (line 1192) | static void convolution1d_packed_fp16sa(const Mat& bottom_blob, Mat& top...

FILE: src/layer/arm/convolution_1x1.h
  function conv1x1s1_neon (line 4) | static void conv1x1s1_neon(const Mat& bottom_blob, Mat& top_blob, const ...
  function conv1x1s2_neon (line 1767) | static void conv1x1s2_neon(const Mat& bottom_blob, Mat& top_blob, const ...

FILE: src/layer/arm/convolution_2x2.h
  function conv2x2s1_neon (line 4) | static void conv2x2s1_neon(const Mat& bottom_blob, Mat& top_blob, const ...

FILE: src/layer/arm/convolution_3x3.h
  function conv3x3s1_neon (line 4) | static void conv3x3s1_neon(const Mat& bottom_blob, Mat& top_blob, const ...
  function conv3x3s2_neon (line 1290) | static void conv3x3s2_neon(const Mat& bottom_blob, Mat& top_blob, const ...
  function conv3x3s2_transform_kernel_neon (line 1865) | static void conv3x3s2_transform_kernel_neon(const Mat& _kernel, Mat& ker...
  function conv3x3s2_packed_neon (line 1929) | static void conv3x3s2_packed_neon(const Mat& bottom_blob, Mat& top_blob,...

FILE: src/layer/arm/convolution_3x3_int8.h
  function conv3x3s2_transform_kernel_int8_neon (line 4) | static void conv3x3s2_transform_kernel_int8_neon(const Mat& _kernel, Mat...
  function conv3x3s2_packed_int8_neon (line 68) | static void conv3x3s2_packed_int8_neon(const Mat& bottom_blob, Mat& top_...

FILE: src/layer/arm/convolution_3x3_pack1to4.h
  function conv3x3s1_pack1to4_neon (line 4) | static void conv3x3s1_pack1to4_neon(const Mat& bottom_blob, Mat& top_blo...
  function conv3x3s2_pack1to4_neon (line 887) | static void conv3x3s2_pack1to4_neon(const Mat& bottom_blob, Mat& top_blo...

FILE: src/layer/arm/convolution_3x3_pack1to4_bf16s.h
  function conv3x3s1_pack1to4_bf16s_neon (line 4) | static void conv3x3s1_pack1to4_bf16s_neon(const Mat& bottom_blob, Mat& t...
  function conv3x3s2_pack1to4_bf16s_neon (line 1944) | static void conv3x3s2_pack1to4_bf16s_neon(const Mat& bottom_blob, Mat& t...

FILE: src/layer/arm/convolution_3x3_pack1to4_fp16s.h
  function conv3x3s1_pack1to4_fp16sa_neon (line 4) | static void conv3x3s1_pack1to4_fp16sa_neon(const Mat& bottom_blob, Mat& ...
  function conv3x3s2_pack1to4_fp16sa_neon (line 383) | static void conv3x3s2_pack1to4_fp16sa_neon(const Mat& bottom_blob, Mat& ...

FILE: src/layer/arm/convolution_3x3_pack1to8_fp16s.h
  function conv3x3s1_pack1to8_fp16sa_neon (line 4) | static void conv3x3s1_pack1to8_fp16sa_neon(const Mat& bottom_blob, Mat& ...
  function conv3x3s2_pack1to8_fp16sa_neon (line 383) | static void conv3x3s2_pack1to8_fp16sa_neon(const Mat& bottom_blob, Mat& ...

FILE: src/layer/arm/convolution_3x3_pack4.h
  function conv3x3s2_pack4_neon (line 4) | static void conv3x3s2_pack4_neon(const Mat& bottom_blob, Mat& top_blob, ...

FILE: src/layer/arm/convolution_3x3_pack4_bf16s.h
  function conv3x3s2_pack4_bf16s_neon (line 4) | static void conv3x3s2_pack4_bf16s_neon(const Mat& bottom_blob, Mat& top_...

FILE: src/layer/arm/convolution_3x3_pack4_fp16s.h
  function conv3x3s1_pack4_fp16sa_neon (line 4) | static void conv3x3s1_pack4_fp16sa_neon(const Mat& bottom_blob, Mat& top...

FILE: src/layer/arm/convolution_3x3_pack4to1.h
  function conv3x3s1_pack4to1_neon (line 4) | static void conv3x3s1_pack4to1_neon(const Mat& bottom_blob, Mat& top_blo...

FILE: src/layer/arm/convolution_3x3_pack8_fp16s.h
  function conv3x3s1_pack8_fp16sa_neon (line 4) | static void conv3x3s1_pack8_fp16sa_neon(const Mat& bottom_blob, Mat& top...
  function conv3x3s2_pack8_fp16sa_neon (line 966) | static void conv3x3s2_pack8_fp16sa_neon(const Mat& bottom_blob, Mat& top...

FILE: src/layer/arm/convolution_3x3_winograd.h
  function conv3x3s1_winograd_pack_A_tile (line 4) | static void conv3x3s1_winograd_pack_A_tile(const Mat& A, Mat& AT, int ba...
  function conv3x3s1_winograd_transpose_pack_B_tile (line 79) | static void conv3x3s1_winograd_transpose_pack_B_tile(const Mat& B, Mat& ...
  function conv3x3s1_winograd_gemm_transB_packed_tile (line 763) | static void conv3x3s1_winograd_gemm_transB_packed_tile(const Mat& AT_til...
  function conv3x3s1_winograd_get_optimal_tile_mnk (line 4451) | static void conv3x3s1_winograd_get_optimal_tile_mnk(int M, int N, int K,...
  function conv3x3s1_winograd23_transform_kernel_tile (line 4574) | static inline void conv3x3s1_winograd23_transform_kernel_tile(const Mat&...
  function conv3x3s1_winograd23_transform_kernel (line 4630) | static void conv3x3s1_winograd23_transform_kernel(const Mat& kernel, Mat...
  function conv3x3s1_winograd43_transform_kernel_tile (line 5692) | static inline void conv3x3s1_winograd43_transform_kernel_tile(const Mat&...
  function conv3x3s1_winograd43_transform_kernel (line 5762) | static void conv3x3s1_winograd43_transform_kernel(const Mat& kernel, Mat...
  function conv3x3s1_winograd63_transform_kernel_tile (line 7380) | static inline void conv3x3s1_winograd63_transform_kernel_tile(const Mat&...
  function conv3x3s1_winograd63_transform_kernel (line 7457) | static void conv3x3s1_winograd63_transform_kernel(const Mat& kernel, Mat...

FILE: src/layer/arm/convolution_3x3_winograd_fp16s.h
  function conv3x3s1_winograd_pack_A_tile_fp16 (line 4) | static void conv3x3s1_winograd_pack_A_tile_fp16(const Mat& A, Mat& AT, i...
  function conv3x3s1_winograd_transpose_pack_B_tile_fp16 (line 75) | static void conv3x3s1_winograd_transpose_pack_B_tile_fp16(const Mat& B, ...
  function conv3x3s1_winograd_gemm_transB_packed_tile_fp16sa (line 475) | static void conv3x3s1_winograd_gemm_transB_packed_tile_fp16sa(const Mat&...
  function conv3x3s1_winograd_get_optimal_tile_mnk_fp16 (line 1986) | static void conv3x3s1_winograd_get_optimal_tile_mnk_fp16(int M, int N, i...
  function conv3x3s1_winograd23_transform_kernel_tile_fp16sa (line 2044) | static inline void conv3x3s1_winograd23_transform_kernel_tile_fp16sa(con...
  function conv3x3s1_winograd23_transform_kernel_fp16sa (line 2100) | static void conv3x3s1_winograd23_transform_kernel_fp16sa(const Mat& kern...
  function conv3x3s1_winograd43_transform_kernel_tile_fp16sa (line 3030) | static inline void conv3x3s1_winograd43_transform_kernel_tile_fp16sa(con...
  function conv3x3s1_winograd43_transform_kernel_fp16sa (line 3100) | static void conv3x3s1_winograd43_transform_kernel_fp16sa(const Mat& kern...
  function conv3x3s1_winograd63_transform_kernel_tile_fp16sa (line 4386) | static inline void conv3x3s1_winograd63_transform_kernel_tile_fp16sa(con...
  function conv3x3s1_winograd63_transform_kernel_fp16sa (line 4463) | static void conv3x3s1_winograd63_transform_kernel_fp16sa(const Mat& kern...

FILE: src/layer/arm/convolution_3x3_winograd_int8.h
  function pack_A_tile_int8 (line 4) | static void pack_A_tile_int8(const Mat& A, Mat& AT, int batch, int max_i...
  function transpose_pack_B_tile_int8 (line 88) | static void transpose_pack_B_tile_int8(const Mat& B, Mat& BT, int batch,...
  function gemm_transB_packed_tile_int8 (line 566) | static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& ...
  function get_optimal_tile_mnk_int8 (line 3410) | static void get_optimal_tile_mnk_int8(int M, int N, int K, int& TILE_M, ...
  function conv3x3s1_winograd23_transform_kernel_tile_int8 (line 3525) | static inline void conv3x3s1_winograd23_transform_kernel_tile_int8(const...
  function conv3x3s1_winograd23_transform_kernel_int8 (line 3581) | static void conv3x3s1_winograd23_transform_kernel_int8(const Mat& kernel...
  function conv3x3s1_winograd23_transform_input_tile_int8 (line 3617) | static inline void conv3x3s1_winograd23_transform_input_tile_int8(const ...
  function conv3x3s1_winograd23_transform_output_tile_int8 (line 3912) | static inline void conv3x3s1_winograd23_transform_output_tile_int8(const...
  function conv3x3s1_winograd23_int8 (line 4249) | static int conv3x3s1_winograd23_int8(Mat& bottom_blob, Mat& top_blob, co...
  function conv3x3s1_winograd43_transform_kernel_tile_int8 (line 4373) | static inline void conv3x3s1_winograd43_transform_kernel_tile_int8(const...
  function conv3x3s1_winograd43_transform_kernel_int8 (line 4437) | static void conv3x3s1_winograd43_transform_kernel_int8(const Mat& kernel...
  function conv3x3s1_winograd43_transform_input_tile_int8 (line 4473) | static inline void conv3x3s1_winograd43_transform_input_tile_int8(const ...
  function conv3x3s1_winograd43_transform_output_tile_int8 (line 4887) | static inline void conv3x3s1_winograd43_transform_output_tile_int8(const...
  function conv3x3s1_winograd43_int8 (line 5606) | static int conv3x3s1_winograd43_int8(Mat& bottom_blob, Mat& top_blob, co...

FILE: src/layer/arm/convolution_4x4.h
  function conv4x4s4_neon (line 4) | static void conv4x4s4_neon(const Mat& bottom_blob, Mat& top_blob, const ...

FILE: src/layer/arm/convolution_5x5.h
  function conv5x5s1_neon (line 4) | static void conv5x5s1_neon(const Mat& bottom_blob, Mat& top_blob, const ...
  function conv5x5s2_neon (line 962) | static void conv5x5s2_neon(const Mat& bottom_blob, Mat& top_blob, const ...

FILE: src/layer/arm/convolution_5x5_pack4.h
  function conv5x5s1_pack4_neon (line 4) | static void conv5x5s1_pack4_neon(const Mat& bottom_blob, Mat& top_blob, ...
  function conv5x5s2_pack4_neon (line 2472) | static void conv5x5s2_pack4_neon(const Mat& bottom_blob, Mat& top_blob, ...

FILE: src/layer/arm/convolution_5x5_pack4_bf16s.h
  function conv5x5s1_pack4_bf16s_neon (line 4) | static void conv5x5s1_pack4_bf16s_neon(const Mat& bottom_blob, Mat& top_...
  function conv5x5s2_pack4_bf16s_neon (line 6319) | static void conv5x5s2_pack4_bf16s_neon(const Mat& bottom_blob, Mat& top_...

FILE: src/layer/arm/convolution_5x5_pack8_fp16s.h
  function conv5x5s1_pack8_fp16sa_neon (line 4) | static void conv5x5s1_pack8_fp16sa_neon(const Mat& bottom_blob, Mat& top...
  function conv5x5s2_pack8_fp16sa_neon (line 2253) | static void conv5x5s2_pack8_fp16sa_neon(const Mat& bottom_blob, Mat& top...

FILE: src/layer/arm/convolution_7x7.h
  function conv7x7s1_neon (line 4) | static void conv7x7s1_neon(const Mat& bottom_blob, Mat& top_blob, const ...
  function conv7x7s2_neon (line 691) | static void conv7x7s2_neon(const Mat& bottom_blob, Mat& top_blob, const ...

FILE: src/layer/arm/convolution_7x7_pack1to4.h
  function conv7x7s2_pack1to4_neon (line 4) | static void conv7x7s2_pack1to4_neon(const Mat& bottom_blob, Mat& top_blo...

FILE: src/layer/arm/convolution_7x7_pack1to4_bf16s.h
  function conv7x7s2_pack1to4_bf16s_neon (line 4) | static void conv7x7s2_pack1to4_bf16s_neon(const Mat& bottom_blob, Mat& t...

FILE: src/layer/arm/convolution_7x7_pack1to8_fp16s.h
  function conv7x7s2_pack1to8_fp16sa_neon (line 4) | static void conv7x7s2_pack1to8_fp16sa_neon(const Mat& bottom_blob, Mat& ...

FILE: src/layer/arm/convolution_arm.cpp
  type ncnn (line 17) | namespace ncnn {
    function convolution_transform_kernel_packed_neon (line 82) | static void convolution_transform_kernel_packed_neon(const Mat& weight...
    function convolution_transform_kernel_packed_bf16s_neon (line 842) | static void convolution_transform_kernel_packed_bf16s_neon(const Mat& ...

FILE: src/layer/arm/convolution_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/convolution_arm_asimddp.cpp
  type ncnn (line 7) | namespace ncnn {
    function convolution_transform_kernel_packed_int8_asimddp (line 13) | void convolution_transform_kernel_packed_int8_asimddp(const Mat& kerne...
    function convolution_packed_int8_asimddp (line 18) | void convolution_packed_int8_asimddp(const Mat& bottom_blob, Mat& top_...
    function convolution_im2col_gemm_transform_kernel_int8_asimddp (line 24) | void convolution_im2col_gemm_transform_kernel_int8_asimddp(const Mat& ...
    function convolution_im2col_gemm_int8_asimddp (line 29) | int convolution_im2col_gemm_int8_asimddp(const Mat& bottom_blob, Mat& ...

FILE: src/layer/arm/convolution_arm_asimdhp.cpp
  type ncnn (line 15) | namespace ncnn {
    function convolution_transform_kernel_packed_fp16s_neon (line 38) | static void convolution_transform_kernel_packed_fp16s_neon(const Mat& ...

FILE: src/layer/arm/convolution_arm_i8mm.cpp
  type ncnn (line 7) | namespace ncnn {
    function convolution_transform_kernel_packed_int8_i8mm (line 13) | void convolution_transform_kernel_packed_int8_i8mm(const Mat& kernel, ...
    function convolution_packed_int8_i8mm (line 18) | void convolution_packed_int8_i8mm(const Mat& bottom_blob, Mat& top_blo...
    function convolution_im2col_gemm_transform_kernel_int8_i8mm (line 24) | void convolution_im2col_gemm_transform_kernel_int8_i8mm(const Mat& ker...
    function convolution_im2col_gemm_int8_i8mm (line 29) | int convolution_im2col_gemm_int8_i8mm(const Mat& bottom_blob, Mat& top...

FILE: src/layer/arm/convolution_im2col_gemm.h
  function convolution_im2col_pack_A_tile (line 4) | static void convolution_im2col_pack_A_tile(const Mat& A, Mat& AT, int i,...
  function convolution_gemm_transB_packed_tile (line 178) | static void convolution_gemm_transB_packed_tile(const Mat& AT_tile, cons...
  function convolution_im2col_gemm_get_optimal_tile_mnk (line 5937) | static void convolution_im2col_gemm_get_optimal_tile_mnk(int M, int N, i...
  function convolution_im2col_input_tile_conv1x1s1d1 (line 6056) | static void convolution_im2col_input_tile_conv1x1s1d1(const Mat& bottom_...
  function convolution_im2col_input_tile (line 6365) | static void convolution_im2col_input_tile(const Mat& bottom_blob, Mat& B...
  function convolution_im2col_gemm_transform_kernel (line 6749) | static void convolution_im2col_gemm_transform_kernel(const Mat& kernel, ...
  function convolution_im2col_gemm (line 6821) | static int convolution_im2col_gemm(const Mat& bottom_blob, Mat& top_blob...

FILE: src/layer/arm/convolution_im2col_gemm_bf16s.h
  function convolution_gemm_transB_packed_tile_bf16s (line 4) | static void convolution_gemm_transB_packed_tile_bf16s(const Mat& AT_tile...
  function convolution_im2col_gemm_get_optimal_tile_mnk_bf16s (line 5816) | static void convolution_im2col_gemm_get_optimal_tile_mnk_bf16s(int M, in...
  function convolution_im2col_gemm_transform_kernel_bf16s (line 5927) | static void convolution_im2col_gemm_transform_kernel_bf16s(const Mat& ke...
  function convolution_im2col_gemm_bf16s (line 6000) | static int convolution_im2col_gemm_bf16s(const Mat& bottom_blob, Mat& to...

FILE: src/layer/arm/convolution_im2col_gemm_bf16s_fp16s.h
  function convolution_im2col_pack_A_tile_bf16_fp16 (line 4) | static void convolution_im2col_pack_A_tile_bf16_fp16(const Mat& A, Mat& ...
  function convolution_im2col_input_tile_conv1x1s1d1_bf16_fp16 (line 192) | static void convolution_im2col_input_tile_conv1x1s1d1_bf16_fp16(const Ma...
  function convolution_im2col_input_tile_bf16_fp16 (line 652) | static void convolution_im2col_input_tile_bf16_fp16(const Mat& bottom_bl...

FILE: src/layer/arm/convolution_im2col_gemm_fp16s.h
  function convolution_gemm_transB_packed_tile_fp16sa (line 4) | static void convolution_gemm_transB_packed_tile_fp16sa(const Mat& AT_til...
  function convolution_im2col_gemm_get_optimal_tile_mnk_fp16sa (line 3009) | static void convolution_im2col_gemm_get_optimal_tile_mnk_fp16sa(int M, i...
  function convolution_im2col_gemm_transform_kernel_fp16sa (line 3066) | static void convolution_im2col_gemm_transform_kernel_fp16sa(const Mat& k...
  function convolution_im2col_gemm_fp16sa (line 3137) | static int convolution_im2col_gemm_fp16sa(const Mat& bottom_blob, Mat& t...

FILE: src/layer/arm/convolution_im2col_gemm_int8.h
  function convolution_im2col_pack_A_tile_int8 (line 14) | static void convolution_im2col_pack_A_tile_int8(const Mat& A, Mat& AT, i...
  function convolution_gemm_transB_packed_tile_int8 (line 639) | static void convolution_gemm_transB_packed_tile_int8(const Mat& AT_tile,...
  function convolution_im2col_gemm_get_optimal_tile_mnk_int8 (line 7519) | static void convolution_im2col_gemm_get_optimal_tile_mnk_int8(int M, int...
  function convolution_im2col_input_tile_conv1x1s1d1_int8 (line 7612) | static void convolution_im2col_input_tile_conv1x1s1d1_int8(const Mat& bo...
  function convolution_im2col_input_tile_int8_impl (line 8220) | static inline void convolution_im2col_input_tile_int8_impl(const Mat& bo...
  function convolution_im2col_gemm_transform_kernel_int8 (line 10848) | static void convolution_im2col_gemm_transform_kernel_int8(const Mat& ker...
  function convolution_im2col_gemm_int8 (line 10936) | static int convolution_im2col_gemm_int8(const Mat& bottom_blob, Mat& top...

FILE: src/layer/arm/convolution_packed.h
  function convolution_transform_kernel_packed (line 4) | static void convolution_transform_kernel_packed(const Mat& kernel, Mat& ...
  function convolution_packed (line 496) | static void convolution_packed(const Mat& bottom_blob, Mat& top_blob, co...

FILE: src/layer/arm/convolution_packed_bf16s.h
  function convolution_transform_kernel_packed_bf16s (line 4) | static void convolution_transform_kernel_packed_bf16s(const Mat& kernel,...
  function convolution_packed_bf16s (line 496) | static void convolution_packed_bf16s(const Mat& bottom_blob, Mat& top_bl...

FILE: src/layer/arm/convolution_packed_fp16s.h
  function convolution_transform_kernel_packed_fp16s (line 4) | static void convolution_transform_kernel_packed_fp16s(const Mat& kernel,...
  function convolution_packed_fp16s (line 445) | static void convolution_packed_fp16s(const Mat& bottom_blob, Mat& top_bl...
  function convolution_packed_fp16sa (line 1220) | static void convolution_packed_fp16sa(const Mat& bottom_blob, Mat& top_b...

FILE: src/layer/arm/convolution_packed_int8.h
  function convolution_transform_kernel_packed_int8 (line 14) | static void convolution_transform_kernel_packed_int8(const Mat& kernel, ...
  function convolution_packed_int8 (line 517) | static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blo...

FILE: src/layer/arm/convolutiondepthwise_3x3.h
  function convdw3x3s1_neon (line 4) | static void convdw3x3s1_neon(const Mat& bottom_blob, Mat& top_blob, cons...
  function convdw3x3s2_neon (line 816) | static void convdw3x3s2_neon(const Mat& bottom_blob, Mat& top_blob, cons...

FILE: src/layer/arm/convolutiondepthwise_3x3_fp16s.h
  function convdw3x3s1_fp16sa_neon (line 4) | static void convdw3x3s1_fp16sa_neon(const Mat& bottom_blob, Mat& top_blo...
  function convdw3x3s2_fp16sa_neon (line 306) | static void convdw3x3s2_fp16sa_neon(const Mat& bottom_blob, Mat& top_blo...

FILE: src/layer/arm/convolutiondepthwise_3x3_int8.h
  function convdw3x3s1_int8_neon (line 4) | static void convdw3x3s1_int8_neon(const Mat& bottom_blob, Mat& top_blob,...
  function convdw3x3s2_int8_neon (line 546) | static void convdw3x3s2_int8_neon(const Mat& bottom_blob, Mat& top_blob,...
  function convdw3x3s1_int8_requant_neon (line 777) | static void convdw3x3s1_int8_requant_neon(const Mat& bottom_blob, Mat& t...
  function convdw3x3s2_int8_requant_neon (line 1491) | static void convdw3x3s2_int8_requant_neon(const Mat& bottom_blob, Mat& t...

FILE: src/layer/arm/convolutiondepthwise_3x3_pack4.h
  function convdw3x3s1_pack4_neon (line 4) | static void convdw3x3s1_pack4_neon(const Mat& bottom_blob, Mat& top_blob...
  function convdw3x3s2_pack4_neon (line 793) | static void convdw3x3s2_pack4_neon(const Mat& bottom_blob, Mat& top_blob...

FILE: src/layer/arm/convolutiondepthwise_3x3_pack4_bf16s.h
  function convdw3x3s1_pack4_bf16s_neon (line 4) | static void convdw3x3s1_pack4_bf16s_neon(const Mat& bottom_blob, Mat& to...
  function convdw3x3s2_pack4_bf16s_neon (line 1042) | static void convdw3x3s2_pack4_bf16s_neon(const Mat& bottom_blob, Mat& to...

FILE: src/layer/arm/convolutiondepthwise_3x3_pack8_fp16s.h
  function convdw3x3s1_pack8_fp16sa_neon (line 4) | static void convdw3x3s1_pack8_fp16sa_neon(const Mat& bottom_blob, Mat& t...
  function convdw3x3s2_pack8_fp16sa_neon (line 614) | static void convdw3x3s2_pack8_fp16sa_neon(const Mat& bottom_blob, Mat& t...

FILE: src/layer/arm/convolutiondepthwise_3x3_pack8_int8.h
  function convdw3x3s1_pack8_int8_neon (line 4) | static void convdw3x3s1_pack8_int8_neon(const Mat& bottom_blob, Mat& top...
  function convdw3x3s2_pack8_int8_neon (line 322) | static void convdw3x3s2_pack8_int8_neon(const Mat& bottom_blob, Mat& top...

FILE: src/layer/arm/convolutiondepthwise_5x5.h
  function convdw5x5s1_neon (line 4) | static void convdw5x5s1_neon(const Mat& bottom_blob, Mat& top_blob, cons...
  function convdw5x5s2_neon (line 1650) | static void convdw5x5s2_neon(const Mat& bottom_blob, Mat& top_blob, cons...

FILE: src/layer/arm/convolutiondepthwise_5x5_pack4.h
  function convdw5x5s1_pack4_neon (line 4) | static void convdw5x5s1_pack4_neon(const Mat& bottom_blob, Mat& top_blob...
  function convdw5x5s2_pack4_neon (line 1208) | static void convdw5x5s2_pack4_neon(const Mat& bottom_blob, Mat& top_blob...

FILE: src/layer/arm/convolutiondepthwise_5x5_pack4_bf16s.h
  function convdw5x5s1_pack4_bf16s_neon (line 4) | static void convdw5x5s1_pack4_bf16s_neon(const Mat& bottom_blob, Mat& to...
  function convdw5x5s2_pack4_bf16s_neon (line 2353) | static void convdw5x5s2_pack4_bf16s_neon(const Mat& bottom_blob, Mat& to...

FILE: src/layer/arm/convolutiondepthwise_5x5_pack8_fp16s.h
  function convdw5x5s1_pack8_fp16sa_neon (line 4) | static void convdw5x5s1_pack8_fp16sa_neon(const Mat& bottom_blob, Mat& t...
  function convdw5x5s2_pack8_fp16sa_neon (line 1262) | static void convdw5x5s2_pack8_fp16sa_neon(const Mat& bottom_blob, Mat& t...

FILE: src/layer/arm/convolutiondepthwise_arm.cpp
  type ncnn (line 16) | namespace ncnn {

FILE: src/layer/arm/convolutiondepthwise_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/convolutiondepthwise_arm_asimdhp.cpp
  type ncnn (line 15) | namespace ncnn {

FILE: src/layer/arm/crop_arm.cpp
  type ncnn (line 12) | namespace ncnn {
    function crop_pack8_neon (line 29) | static void crop_pack8_neon(const Mat& src, Mat& dst, int top, int left)
    function crop_pack8_bf16_fp16s_neon (line 54) | static void crop_pack8_bf16_fp16s_neon(const Mat& src, Mat& dst, int t...
    function crop_pack4_neon (line 77) | static void crop_pack4_neon(const Mat& src, Mat& dst, int top, int left)
    function crop_pack4_bf16_fp16s_neon (line 100) | static void crop_pack4_bf16_fp16s_neon(const Mat& src, Mat& dst, int t...

FILE: src/layer/arm/crop_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/deconvolution_3x3.h
  function deconv3x3s1_neon (line 4) | static void deconv3x3s1_neon(const Mat& bottom_blob, Mat& top_blob, cons...
  function deconv3x3s2_neon (line 130) | static void deconv3x3s2_neon(const Mat& bottom_blob, Mat& top_blob, cons...

FILE: src/layer/arm/deconvolution_4x4.h
  function deconv4x4s1_neon (line 4) | static void deconv4x4s1_neon(const Mat& bottom_blob, Mat& top_blob, cons...
  function deconv4x4s2_neon (line 173) | static void deconv4x4s2_neon(const Mat& bottom_blob, Mat& top_blob, cons...

FILE: src/layer/arm/deconvolution_4x4_fp16s.h
  function deconv4x4s2_fp16sa_neon (line 4) | static void deconv4x4s2_fp16sa_neon(const Mat& bottom_blob, Mat& top_blo...

FILE: src/layer/arm/deconvolution_arm.cpp
  type ncnn (line 17) | namespace ncnn {

FILE: src/layer/arm/deconvolution_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/deconvolution_arm_asimdhp.cpp
  type ncnn (line 12) | namespace ncnn {

FILE: src/layer/arm/deconvolutiondepthwise_arm.cpp
  type ncnn (line 17) | namespace ncnn {

FILE: src/layer/arm/deconvolutiondepthwise_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/deconvolutiondepthwise_arm_asimdhp.cpp
  type ncnn (line 12) | namespace ncnn {

FILE: src/layer/arm/dequantize_arm.cpp
  type ncnn (line 14) | namespace ncnn {
    function dequantize (line 30) | static void dequantize(const int* intptr, float* ptr, const Mat& scale...
    function dequantize_bf16s (line 231) | static void dequantize_bf16s(const int* intptr, unsigned short* ptr, c...

FILE: src/layer/arm/dequantize_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/dequantize_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {
    function dequantize_fp16s (line 14) | static void dequantize_fp16s(const int* intptr, __fp16* ptr, const Mat...

FILE: src/layer/arm/dropout_arm.cpp
  type ncnn (line 10) | namespace ncnn {

FILE: src/layer/arm/dropout_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/eltwise_arm.cpp
  type ncnn (line 14) | namespace ncnn {

FILE: src/layer/arm/eltwise_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/eltwise_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {

FILE: src/layer/arm/flatten_arm.cpp
  type ncnn (line 12) | namespace ncnn {

FILE: src/layer/arm/flatten_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/gelu_arm.cpp
  type ncnn (line 14) | namespace ncnn {

FILE: src/layer/arm/gelu_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/gelu_arm_asimdhp.cpp
  type ncnn (line 15) | namespace ncnn {

FILE: src/layer/arm/gemm_arm.cpp
  type ncnn (line 15) | namespace ncnn {
    function pack_A_tile (line 45) | void pack_A_tile(const Mat& A, Mat& AT, int i, int max_ii, int k, int ...
    function transpose_pack_A_tile (line 256) | static void transpose_pack_A_tile(const Mat& A, Mat& AT, int i, int ma...
    function pack_B_tile (line 400) | static void pack_B_tile(const Mat& B, Mat& BT, int j, int max_jj, int ...
    function transpose_pack_B_tile (line 708) | static void transpose_pack_B_tile(const Mat& B, Mat& BT, int j, int ma...
    function transpose_unpack_output_tile (line 895) | static void transpose_unpack_output_tile(const Mat& topT, Mat& top_blo...
    function gemm_transB_packed_tile (line 1042) | static void gemm_transB_packed_tile(const Mat& AT_tile, const Mat& BT_...
    function get_optimal_tile_mnk (line 3733) | static void get_optimal_tile_mnk(int M, int N, int K, int constant_TIL...
    function gemm_arm (line 3857) | static int gemm_arm(const Mat& A, const Mat& B, const Mat& C, Mat& top...
    function gemm_AT_arm (line 3979) | static int gemm_AT_arm(const Mat& AT, const Mat& B, const Mat& C, Mat&...
    function gemm_BT_arm (line 4080) | static int gemm_BT_arm(const Mat& A, const Mat& BT, const Mat& C, Mat&...
    function gemm_AT_BT_arm (line 4169) | static int gemm_AT_BT_arm(const Mat& AT, const Mat& BT, const Mat& C, ...
    function gemm_arm_bf16s (line 4612) | static int gemm_arm_bf16s(const Mat& A, const Mat& B, const Mat& C, Ma...
    function gemm_AT_arm_bf16s (line 4735) | static int gemm_AT_arm_bf16s(const Mat& AT, const Mat& B, const Mat& C...
    function gemm_BT_arm_bf16s (line 4837) | static int gemm_BT_arm_bf16s(const Mat& A, const Mat& BT, const Mat& C...
    function gemm_AT_BT_arm_bf16s (line 4927) | static int gemm_AT_BT_arm_bf16s(const Mat& AT, const Mat& BT, const Ma...
    function compute_A_tile_int8_scales (line 5319) | static void compute_A_tile_int8_scales(const Mat& A, Mat& scales, floa...
    function transpose_compute_A_tile_int8_scales (line 5340) | static void transpose_compute_A_tile_int8_scales(const Mat& A, Mat& sc...
    function pack_A_tile_quantize (line 5361) | static void pack_A_tile_quantize(const Mat& A, Mat& AT, int i, int max...
    function transpose_pack_A_tile_quantize (line 5382) | static void transpose_pack_A_tile_quantize(const Mat& A, Mat& AT, int ...
    function compute_B_int8_scale (line 5403) | static void compute_B_int8_scale(const Mat& B, float& scale, int input...
    function pack_B_tile_quantize (line 5424) | static void pack_B_tile_quantize(const Mat& B, Mat& BT, int j, int max...
    function transpose_pack_B_tile_quantize (line 5445) | static void transpose_pack_B_tile_quantize(const Mat& B, Mat& BT, int ...
    function unpack_output_tile_dequantize (line 5466) | static void unpack_output_tile_dequantize(const Mat& topT, const Mat& ...
    function transpose_unpack_output_tile_dequantize (line 5487) | static void transpose_unpack_output_tile_dequantize(const Mat& topT, c...
    type gemm_arm_int8_omp_args (line 5508) | struct gemm_arm_int8_omp_args
    function gemm_arm_int8 (line 5522) | static int gemm_arm_int8(const Mat& A, const Mat& B, const Mat& C, Mat...
    function gemm_AT_arm_int8 (line 5661) | static int gemm_AT_arm_int8(const Mat& AT, const Mat& A_int8_scales, c...
    function gemm_BT_arm_int8 (line 5773) | static int gemm_BT_arm_int8(const Mat& A, const Mat& BT, float B_int8_...
    function gemm_AT_BT_arm_int8 (line 5880) | static int gemm_AT_BT_arm_int8(const Mat& AT, const Mat& A_int8_scales...

FILE: src/layer/arm/gemm_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/gemm_arm_asimddp.cpp
  type ncnn (line 8) | namespace ncnn {
    function pack_A_tile_int8_asimddp (line 17) | void pack_A_tile_int8_asimddp(const Mat& A, Mat& AT, int i, int max_ii...
    function transpose_pack_A_tile_int8_asimddp (line 22) | void transpose_pack_A_tile_int8_asimddp(const Mat& A, Mat& AT, int i, ...
    function pack_B_tile_int8_asimddp (line 27) | void pack_B_tile_int8_asimddp(const Mat& B, Mat& BT, int j, int max_jj...
    function transpose_pack_B_tile_int8_asimddp (line 32) | void transpose_pack_B_tile_int8_asimddp(const Mat& B, Mat& BT, int j, ...
    function pack_A_tile_fp32_to_int8_asimddp (line 37) | void pack_A_tile_fp32_to_int8_asimddp(const Mat& A, Mat& AT, int i, in...
    function transpose_pack_A_tile_fp32_to_int8_asimddp (line 42) | void transpose_pack_A_tile_fp32_to_int8_asimddp(const Mat& A, Mat& AT,...
    function pack_B_tile_fp32_to_int8_asimddp (line 47) | void pack_B_tile_fp32_to_int8_asimddp(const Mat& B, Mat& BT, int j, in...
    function transpose_pack_B_tile_fp32_to_int8_asimddp (line 52) | void transpose_pack_B_tile_fp32_to_int8_asimddp(const Mat& B, Mat& BT,...
    function unpack_output_tile_int32_to_fp32_asimddp (line 57) | void unpack_output_tile_int32_to_fp32_asimddp(const Mat& topT, const M...
    function transpose_unpack_output_tile_int32_to_fp32_asimddp (line 62) | void transpose_unpack_output_tile_int32_to_fp32_asimddp(const Mat& top...
    function gemm_transB_packed_tile_int8_asimddp (line 67) | void gemm_transB_packed_tile_int8_asimddp(const Mat& AT_tile, const Ma...
    function pack_A_tile_fp16_to_int8_asimddp (line 72) | void pack_A_tile_fp16_to_int8_asimddp(const Mat& A, Mat& AT, int i, in...
    function transpose_pack_A_tile_fp16_to_int8_asimddp (line 77) | void transpose_pack_A_tile_fp16_to_int8_asimddp(const Mat& A, Mat& AT,...
    function pack_B_tile_fp16_to_int8_asimddp (line 82) | void pack_B_tile_fp16_to_int8_asimddp(const Mat& B, Mat& BT, int j, in...
    function transpose_pack_B_tile_fp16_to_int8_asimddp (line 87) | void transpose_pack_B_tile_fp16_to_int8_asimddp(const Mat& B, Mat& BT,...
    function unpack_output_tile_int32_to_fp16_asimddp (line 92) | void unpack_output_tile_int32_to_fp16_asimddp(const Mat& topT, const M...
    function transpose_unpack_output_tile_int32_to_fp16_asimddp (line 97) | void transpose_unpack_output_tile_int32_to_fp16_asimddp(const Mat& top...
    function pack_A_tile_bf16_to_int8_asimddp (line 103) | void pack_A_tile_bf16_to_int8_asimddp(const Mat& A, Mat& AT, int i, in...
    function transpose_pack_A_tile_bf16_to_int8_asimddp (line 108) | void transpose_pack_A_tile_bf16_to_int8_asimddp(const Mat& A, Mat& AT,...
    function pack_B_tile_bf16_to_int8_asimddp (line 113) | void pack_B_tile_bf16_to_int8_asimddp(const Mat& B, Mat& BT, int j, in...
    function transpose_pack_B_tile_bf16_to_int8_asimddp (line 118) | void transpose_pack_B_tile_bf16_to_int8_asimddp(const Mat& B, Mat& BT,...
    function unpack_output_tile_int32_to_bf16_asimddp (line 123) | void unpack_output_tile_int32_to_bf16_asimddp(const Mat& topT, const M...
    function transpose_unpack_output_tile_int32_to_bf16_asimddp (line 128) | void transpose_unpack_output_tile_int32_to_bf16_asimddp(const Mat& top...

FILE: src/layer/arm/gemm_arm_asimdfhm.cpp
  type ncnn (line 14) | namespace ncnn {
    function gemm_transB_packed_tile_fp16s_asimdfhm (line 18) | void gemm_transB_packed_tile_fp16s_asimdfhm(const Mat& AT_tile, const ...

FILE: src/layer/arm/gemm_arm_asimdhp.cpp
  type ncnn (line 14) | namespace ncnn {
    function gemm_transB_packed_tile_fp16sa (line 23) | static void gemm_transB_packed_tile_fp16sa(const Mat& AT_tile, const M...
    function get_optimal_tile_mnk_fp16sa (line 2274) | static void get_optimal_tile_mnk_fp16sa(int M, int N, int K, int const...
    function gemm_arm_fp16sa (line 2338) | static int gemm_arm_fp16sa(const Mat& A, const Mat& B, const Mat& C, M...
    function gemm_AT_arm_fp16sa (line 2460) | static int gemm_AT_arm_fp16sa(const Mat& AT, const Mat& B, const Mat& ...
    function gemm_BT_arm_fp16sa (line 2561) | static int gemm_BT_arm_fp16sa(const Mat& A, const Mat& BT, const Mat& ...
    function gemm_AT_BT_arm_fp16sa (line 2650) | static int gemm_AT_BT_arm_fp16sa(const Mat& AT, const Mat& BT, const M...
    function compute_A_tile_fp16_int8_scales_asimdhp (line 3027) | void compute_A_tile_fp16_int8_scales_asimdhp(const Mat& A, Mat& scales...
    function transpose_compute_A_tile_fp16_int8_scales_asimdhp (line 3032) | void transpose_compute_A_tile_fp16_int8_scales_asimdhp(const Mat& A, M...
    function compute_B_fp16_int8_scale_asimdhp (line 3037) | void compute_B_fp16_int8_scale_asimdhp(const Mat& B, float& scale)

FILE: src/layer/arm/gemm_arm_i8mm.cpp
  type ncnn (line 8) | namespace ncnn {
    function pack_A_tile_int8_i8mm (line 17) | void pack_A_tile_int8_i8mm(const Mat& A, Mat& AT, int i, int max_ii, i...
    function transpose_pack_A_tile_int8_i8mm (line 22) | void transpose_pack_A_tile_int8_i8mm(const Mat& A, Mat& AT, int i, int...
    function pack_B_tile_int8_i8mm (line 27) | void pack_B_tile_int8_i8mm(const Mat& B, Mat& BT, int j, int max_jj, i...
    function transpose_pack_B_tile_int8_i8mm (line 32) | void transpose_pack_B_tile_int8_i8mm(const Mat& B, Mat& BT, int j, int...
    function pack_A_tile_fp32_to_int8_i8mm (line 37) | void pack_A_tile_fp32_to_int8_i8mm(const Mat& A, Mat& AT, int i, int m...
    function transpose_pack_A_tile_fp32_to_int8_i8mm (line 42) | void transpose_pack_A_tile_fp32_to_int8_i8mm(const Mat& A, Mat& AT, in...
    function pack_B_tile_fp32_to_int8_i8mm (line 47) | void pack_B_tile_fp32_to_int8_i8mm(const Mat& B, Mat& BT, int j, int m...
    function transpose_pack_B_tile_fp32_to_int8_i8mm (line 52) | void transpose_pack_B_tile_fp32_to_int8_i8mm(const Mat& B, Mat& BT, in...
    function gemm_transB_packed_tile_int8_i8mm (line 57) | void gemm_transB_packed_tile_int8_i8mm(const Mat& AT_tile, const Mat& ...
    function pack_A_tile_fp16_to_int8_i8mm (line 62) | void pack_A_tile_fp16_to_int8_i8mm(const Mat& A, Mat& AT, int i, int m...
    function transpose_pack_A_tile_fp16_to_int8_i8mm (line 67) | void transpose_pack_A_tile_fp16_to_int8_i8mm(const Mat& A, Mat& AT, in...
    function pack_B_tile_fp16_to_int8_i8mm (line 72) | void pack_B_tile_fp16_to_int8_i8mm(const Mat& B, Mat& BT, int j, int m...
    function transpose_pack_B_tile_fp16_to_int8_i8mm (line 77) | void transpose_pack_B_tile_fp16_to_int8_i8mm(const Mat& B, Mat& BT, in...
    function pack_A_tile_bf16_to_int8_i8mm (line 83) | void pack_A_tile_bf16_to_int8_i8mm(const Mat& A, Mat& AT, int i, int m...
    function transpose_pack_A_tile_bf16_to_int8_i8mm (line 88) | void transpose_pack_A_tile_bf16_to_int8_i8mm(const Mat& A, Mat& AT, in...
    function pack_B_tile_bf16_to_int8_i8mm (line 93) | void pack_B_tile_bf16_to_int8_i8mm(const Mat& B, Mat& BT, int j, int m...
    function transpose_pack_B_tile_bf16_to_int8_i8mm (line 98) | void transpose_pack_B_tile_bf16_to_int8_i8mm(const Mat& B, Mat& BT, in...

FILE: src/layer/arm/gemm_arm_vfpv4.cpp
  type ncnn (line 14) | namespace ncnn {
    function gemm_arm_fp16s (line 25) | static int gemm_arm_fp16s(const Mat& A, const Mat& B, const Mat& C, Ma...
    function gemm_AT_arm_fp16s (line 148) | static int gemm_AT_arm_fp16s(const Mat& AT, const Mat& B, const Mat& C...
    function gemm_BT_arm_fp16s (line 250) | static int gemm_BT_arm_fp16s(const Mat& A, const Mat& BT, const Mat& C...
    function gemm_AT_BT_arm_fp16s (line 340) | static int gemm_AT_BT_arm_fp16s(const Mat& AT, const Mat& BT, const Ma...
    function compute_A_tile_fp16_int8_scales_vfpv4 (line 714) | void compute_A_tile_fp16_int8_scales_vfpv4(const Mat& A, Mat& scales, ...
    function transpose_compute_A_tile_fp16_int8_scales_vfpv4 (line 719) | void transpose_compute_A_tile_fp16_int8_scales_vfpv4(const Mat& A, Mat...
    function pack_A_tile_fp16_to_int8_vfpv4 (line 724) | void pack_A_tile_fp16_to_int8_vfpv4(const Mat& A, Mat& AT, int i, int ...
    function transpose_pack_A_tile_fp16_to_int8_vfpv4 (line 729) | void transpose_pack_A_tile_fp16_to_int8_vfpv4(const Mat& A, Mat& AT, i...
    function compute_B_fp16_int8_scale_vfpv4 (line 734) | void compute_B_fp16_int8_scale_vfpv4(const Mat& B, float& scale)
    function pack_B_tile_fp16_to_int8_vfpv4 (line 739) | void pack_B_tile_fp16_to_int8_vfpv4(const Mat& B, Mat& BT, int j, int ...
    function transpose_pack_B_tile_fp16_to_int8_vfpv4 (line 744) | void transpose_pack_B_tile_fp16_to_int8_vfpv4(const Mat& B, Mat& BT, i...
    function unpack_output_tile_int32_to_fp16_vfpv4 (line 749) | void unpack_output_tile_int32_to_fp16_vfpv4(const Mat& topT, const Mat...
    function transpose_unpack_output_tile_int32_to_fp16_vfpv4 (line 754) | void transpose_unpack_output_tile_int32_to_fp16_vfpv4(const Mat& topT,...

FILE: src/layer/arm/gemm_bf16s.h
  function pack_A_tile_fp32_to_bf16 (line 4) | static void pack_A_tile_fp32_to_bf16(const Mat& A, Mat& AT, int i, int m...
  function transpose_pack_A_tile_fp32_to_bf16 (line 193) | static void transpose_pack_A_tile_fp32_to_bf16(const Mat& A, Mat& AT, in...
  function pack_B_tile_fp32_to_bf16 (line 257) | static void pack_B_tile_fp32_to_bf16(const Mat& B, Mat& BT, int j, int m...
  function transpose_pack_B_tile_fp32_to_bf16 (line 568) | static void transpose_pack_B_tile_fp32_to_bf16(const Mat& B, Mat& BT, in...
  function transpose_unpack_output_tile_fp32_to_bf16 (line 646) | static void transpose_unpack_output_tile_fp32_to_bf16(const Mat& topT, M...
  function gemm_transB_packed_tile_bf16s (line 785) | static void gemm_transB_packed_tile_bf16s(const Mat& AT_tile, const Mat&...

FILE: src/layer/arm/gemm_bf16s_fp16s.h
  function pack_A_tile_bf16_fp16 (line 4) | static void pack_A_tile_bf16_fp16(const Mat& A, Mat& AT, int i, int max_...
  function transpose_pack_A_tile_bf16_fp16 (line 247) | static void transpose_pack_A_tile_bf16_fp16(const Mat& A, Mat& AT, int i...
  function pack_B_tile_bf16_fp16 (line 452) | static void pack_B_tile_bf16_fp16(const Mat& B, Mat& BT, int j, int max_...
  function transpose_pack_B_tile_bf16_fp16 (line 902) | static void transpose_pack_B_tile_bf16_fp16(const Mat& B, Mat& BT, int j...
  function transpose_unpack_output_tile_bf16_fp16 (line 1182) | static void transpose_unpack_output_tile_bf16_fp16(const Mat& topT, Mat&...
  function get_optimal_tile_mnk_bf16s_fp16s (line 1511) | static void get_optimal_tile_mnk_bf16s_fp16s(int M, int N, int K, int co...

FILE: src/layer/arm/gemm_fp16s.h
  function pack_A_tile_fp32_to_fp16 (line 8) | static void pack_A_tile_fp32_to_fp16(const Mat& A, Mat& AT, int i, int m...
  function transpose_pack_A_tile_fp32_to_fp16 (line 191) | static void transpose_pack_A_tile_fp32_to_fp16(const Mat& A, Mat& AT, in...
  function pack_B_tile_fp32_to_fp16 (line 253) | static void pack_B_tile_fp32_to_fp16(const Mat& B, Mat& BT, int j, int m...
  function transpose_pack_B_tile_fp32_to_fp16 (line 558) | static void transpose_pack_B_tile_fp32_to_fp16(const Mat& B, Mat& BT, in...
  function transpose_unpack_output_tile_fp32_to_fp16 (line 634) | static void transpose_unpack_output_tile_fp32_to_fp16(const Mat& topT, M...
  function gemm_transB_packed_tile_fp16s (line 773) | static void gemm_transB_packed_tile_fp16s(const Mat& AT_tile, const Mat&...

FILE: src/layer/arm/gemm_int8.h
  function pack_A_tile_int8 (line 30) | static void pack_A_tile_int8(const Mat& A, Mat& AT, int i, int max_ii, i...
  function transpose_pack_A_tile_int8 (line 598) | static void transpose_pack_A_tile_int8(const Mat& A, Mat& AT, int i, int...
  function pack_B_tile_int8 (line 866) | static void pack_B_tile_int8(const Mat& B, Mat& BT, int j, int max_jj, i...
  function transpose_pack_B_tile_int8 (line 1436) | static void transpose_pack_B_tile_int8(const Mat& B, Mat& BT, int j, int...
  function compute_A_tile_fp32_int8_scales (line 1706) | static void compute_A_tile_fp32_int8_scales(const Mat& A, Mat& scales, f...
  function pack_A_tile_fp32_to_int8 (line 1858) | static void pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int m...
  function transpose_compute_A_tile_fp32_int8_scales (line 2725) | static void transpose_compute_A_tile_fp32_int8_scales(const Mat& A, Mat&...
  function transpose_pack_A_tile_fp32_to_int8 (line 3016) | static void transpose_pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, in...
  function compute_B_fp32_int8_scale (line 3924) | static void compute_B_fp32_int8_scale(const Mat& B, float& scale)
  function pack_B_tile_fp32_to_int8 (line 3960) | static void pack_B_tile_fp32_to_int8(const Mat& B, Mat& BT, int j, int m...
  function transpose_pack_B_tile_fp32_to_int8 (line 4767) | static void transpose_pack_B_tile_fp32_to_int8(const Mat& B, Mat& BT, in...
  function unpack_output_tile_int32_to_fp32 (line 5611) | static void unpack_output_tile_int32_to_fp32(const Mat& topT, const Mat&...
  function transpose_unpack_output_tile_int32_to_fp32 (line 7691) | static void transpose_unpack_output_tile_int32_to_fp32(const Mat& topT, ...
  function gemm_transB_packed_tile_int8 (line 9859) | static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& ...
  function get_optimal_tile_mnk_int8 (line 14598) | static void get_optimal_tile_mnk_int8(int M, int N, int K, int constant_...

FILE: src/layer/arm/gemm_int8_bf16s.h
  function compute_A_tile_bf16_int8_scales (line 20) | static void compute_A_tile_bf16_int8_scales(const Mat& A, Mat& scales, f...
  function pack_A_tile_bf16_to_int8 (line 178) | static void pack_A_tile_bf16_to_int8(const Mat& A, Mat& AT, int i, int m...
  function transpose_compute_A_tile_bf16_int8_scales (line 1096) | static void transpose_compute_A_tile_bf16_int8_scales(const Mat& A, Mat&...
  function transpose_pack_A_tile_bf16_to_int8 (line 1323) | static void transpose_pack_A_tile_bf16_to_int8(const Mat& A, Mat& AT, in...
  function compute_B_bf16_int8_scale (line 2309) | static void compute_B_bf16_int8_scale(const Mat& B, float& scale)
  function pack_B_tile_bf16_to_int8 (line 2354) | static void pack_B_tile_bf16_to_int8(const Mat& B, Mat& BT, int j, int m...
  function transpose_pack_B_tile_bf16_to_int8 (line 3211) | static void transpose_pack_B_tile_bf16_to_int8(const Mat& B, Mat& BT, in...
  function unpack_output_tile_int32_to_bf16 (line 4134) | static void unpack_output_tile_int32_to_bf16(const Mat& topT, const Mat&...
  function transpose_unpack_output_tile_int32_to_bf16 (line 6324) | static void transpose_unpack_output_tile_int32_to_bf16(const Mat& topT, ...

FILE: src/layer/arm/gemm_int8_fp16s.h
  function compute_A_tile_fp16_int8_scales (line 26) | static void compute_A_tile_fp16_int8_scales(const Mat& A, Mat& scales, f...
  function pack_A_tile_fp16_to_int8 (line 351) | static void pack_A_tile_fp16_to_int8(const Mat& A, Mat& AT, int i, int m...
  function transpose_compute_A_tile_fp16_int8_scales (line 1438) | static void transpose_compute_A_tile_fp16_int8_scales(const Mat& A, Mat&...
  function transpose_pack_A_tile_fp16_to_int8 (line 1996) | static void transpose_pack_A_tile_fp16_to_int8(const Mat& A, Mat& AT, in...
  function compute_B_fp16_int8_scale (line 3233) | static void compute_B_fp16_int8_scale(const Mat& B, float& scale)
  function pack_B_tile_fp16_to_int8 (line 3370) | static void pack_B_tile_fp16_to_int8(const Mat& B, Mat& BT, int j, int m...
  function transpose_pack_B_tile_fp16_to_int8 (line 4396) | static void transpose_pack_B_tile_fp16_to_int8(const Mat& B, Mat& BT, in...
  function unpack_output_tile_int32_to_fp16 (line 5570) | static void unpack_output_tile_int32_to_fp16(const Mat& topT, const Mat&...
  function transpose_unpack_output_tile_int32_to_fp16 (line 7948) | static void transpose_unpack_output_tile_int32_to_fp16(const Mat& topT, ...

FILE: src/layer/arm/groupnorm_arm.cpp
  type ncnn (line 13) | namespace ncnn {
    function groupnorm (line 29) | static void groupnorm(float* ptr, const float* gamma_ptr, const float*...
    function groupnorm_bf16s (line 281) | static void groupnorm_bf16s(unsigned short* ptr, const float* gamma_pt...

FILE: src/layer/arm/groupnorm_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/groupnorm_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {
    function groupnorm_fp16s (line 14) | static void groupnorm_fp16s(__fp16* ptr, const float* gamma_ptr, const...

FILE: src/layer/arm/gru_arm.cpp
  type ncnn (line 15) | namespace ncnn {
    function gru (line 251) | static int gru(const Mat& bottom_blob, Mat& top_blob, int reverse, con...
    function gru_bf16s (line 805) | static int gru_bf16s(const Mat& bottom_blob, Mat& top_blob, int revers...

FILE: src/layer/arm/gru_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/gru_arm_asimddp.cpp
  type ncnn (line 10) | namespace ncnn {
    function gru_transform_weight_int8_asimddp (line 14) | void gru_transform_weight_int8_asimddp(const Mat& weight_xc, const Mat...
    function gru_int8_asimddp (line 19) | void gru_int8_asimddp(const Mat& bottom_blob_int8, const Mat& bottom_b...

FILE: src/layer/arm/gru_arm_asimdhp.cpp
  type ncnn (line 12) | namespace ncnn {
    function gru_fp16sa (line 15) | static int gru_fp16sa(const Mat& bottom_blob, Mat& top_blob, int rever...
    function gru_fp16s (line 406) | static int gru_fp16s(const Mat& bottom_blob, Mat& top_blob, int revers...

FILE: src/layer/arm/gru_arm_vfpv4.cpp
  type ncnn (line 10) | namespace ncnn {
    function gru_int8_gate_output_vfpv4 (line 14) | void gru_int8_gate_output_vfpv4(const Mat& gates, Mat& hidden_state, M...

FILE: src/layer/arm/gru_int8.h
  function gru_transform_weight_int8 (line 13) | static void gru_transform_weight_int8(const Mat& weight_xc, const Mat& w...
  function gru_int8_gate_output (line 519) | static void gru_int8_gate_output(const Mat& gates, Mat& hidden_state, Ma...
  function gru_int8 (line 630) | static void gru_int8(const Mat& bottom_blob_int8, const Mat& bottom_blob...

FILE: src/layer/arm/hardsigmoid_arm.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/hardsigmoid_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/hardsigmoid_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {

FILE: src/layer/arm/hardswish_arm.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/hardswish_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/hardswish_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {

FILE: src/layer/arm/innerproduct_arm.cpp
  type ncnn (line 17) | namespace ncnn {

FILE: src/layer/arm/innerproduct_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/innerproduct_arm_asimdfhm.cpp
  type ncnn (line 15) | namespace ncnn {
    function innerproduct_pack4_fp16s_neon_asimdfhm (line 20) | void innerproduct_pack4_fp16s_neon_asimdfhm(const Mat& bottom_blob, Ma...
    function innerproduct_fp16s_neon_asimdfhm (line 25) | void innerproduct_fp16s_neon_asimdfhm(const Mat& bottom_blob, Mat& top...
    function innerproduct_gemm_fp16s_neon_asimdfhm (line 30) | void innerproduct_gemm_fp16s_neon_asimdfhm(const Mat& bottom_blob, Mat...
    function innerproduct_transform_kernel_fp16s_neon_asimdfhm (line 35) | void innerproduct_transform_kernel_fp16s_neon_asimdfhm(const Mat& weig...

FILE: src/layer/arm/innerproduct_arm_asimdhp.cpp
  type ncnn (line 15) | namespace ncnn {
    function innerproduct_pack4_fp16s_neon_asimdhp (line 20) | void innerproduct_pack4_fp16s_neon_asimdhp(const Mat& bottom_blob, Mat...
    function innerproduct_fp16s_neon_asimdhp (line 25) | void innerproduct_fp16s_neon_asimdhp(const Mat& bottom_blob, Mat& top_...
    function innerproduct_gemm_fp16s_neon_asimdhp (line 30) | void innerproduct_gemm_fp16s_neon_asimdhp(const Mat& bottom_blob, Mat&...
    function innerproduct_transform_kernel_fp16s_neon_asimdhp (line 35) | void innerproduct_transform_kernel_fp16s_neon_asimdhp(const Mat& weigh...

FILE: src/layer/arm/innerproduct_arm_vfpv4.cpp
  type ncnn (line 15) | namespace ncnn {

FILE: src/layer/arm/innerproduct_fp16s.h
  function innerproduct_pack4_fp16s_neon (line 16) | static void innerproduct_pack4_fp16s_neon(const Mat& bottom_blob, Mat& t...
  function innerproduct_fp16s_neon (line 280) | static void innerproduct_fp16s_neon(const Mat& bottom_blob, Mat& top_blo...
  function innerproduct_transform_kernel_fp16s_neon (line 500) | static void innerproduct_transform_kernel_fp16s_neon(const Mat& weight_d...

FILE: src/layer/arm/innerproduct_gemm_fp16s.h
  function innerproduct_gemm_fp16s_neon (line 12) | static void innerproduct_gemm_fp16s_neon(const Mat& bottom_blob, Mat& to...

FILE: src/layer/arm/instancenorm_arm.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/instancenorm_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/instancenorm_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {

FILE: src/layer/arm/interp_arm.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/interp_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/interp_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {

FILE: src/layer/arm/interp_bicubic.h
  function interpolate_cubic (line 4) | static inline void interpolate_cubic(float fx, float* coeffs)
  function cubic_coeffs (line 19) | static void cubic_coeffs(int w, int outw, int* xofs, float* alpha, int a...
  function resize_bicubic_image (line 77) | static void resize_bicubic_image(const Mat& src, Mat& dst, float* alpha,...

FILE: src/layer/arm/interp_bicubic_bf16s.h
  function resize_bicubic_image_bf16s (line 4) | static void resize_bicubic_image_bf16s(const Mat& src, Mat& dst, float* ...

FILE: src/layer/arm/interp_bicubic_fp16s.h
  function interpolate_cubic_fp16sa (line 4) | static inline void interpolate_cubic_fp16sa(float fx, __fp16* coeffs)
  function cubic_coeffs_fp16sa (line 19) | static void cubic_coeffs_fp16sa(int w, int outw, int* xofs, __fp16* alph...
  function resize_bicubic_image_fp16s (line 77) | static void resize_bicubic_image_fp16s(const Mat& src, Mat& dst, float* ...
  function resize_bicubic_image_fp16sa (line 252) | static void resize_bicubic_image_fp16sa(const Mat& src, Mat& dst, __fp16...

FILE: src/layer/arm/interp_bicubic_pack4.h
  function resize_bicubic_image_pack4 (line 4) | static void resize_bicubic_image_pack4(const Mat& src, Mat& dst, float* ...

FILE: src/layer/arm/interp_bicubic_pack4_bf16s.h
  function resize_bicubic_image_pack4_bf16s (line 4) | static void resize_bicubic_image_pack4_bf16s(const Mat& src, Mat& dst, f...

FILE: src/layer/arm/interp_bicubic_pack4_fp16s.h
  function resize_bicubic_image_pack4_fp16s (line 4) | static void resize_bicubic_image_pack4_fp16s(const Mat& src, Mat& dst, f...
  function resize_bicubic_image_pack4_fp16sa (line 262) | static void resize_bicubic_image_pack4_fp16sa(const Mat& src, Mat& dst, ...

FILE: src/layer/arm/interp_bicubic_pack8_fp16s.h
  function resize_bicubic_image_pack8_fp16sa (line 4) | static void resize_bicubic_image_pack8_fp16sa(const Mat& src, Mat& dst, ...

FILE: src/layer/arm/interp_bilinear.h
  function linear_coeffs (line 4) | static void linear_coeffs(int w, int outw, int* xofs, float* alpha, int ...
  function resize_bilinear_image (line 41) | static void resize_bilinear_image(const Mat& src, Mat& dst, float* alpha...

FILE: src/layer/arm/interp_bilinear_bf16s.h
  function resize_bilinear_image_bf16s (line 4) | static void resize_bilinear_image_bf16s(const Mat& src, Mat& dst, float*...

FILE: src/layer/arm/interp_bilinear_fp16s.h
  function linear_coeffs_fp16sa (line 4) | static void linear_coeffs_fp16sa(int w, int outw, int* xofs, __fp16* alp...
  function resize_bilinear_image_fp16s (line 41) | static void resize_bilinear_image_fp16s(const Mat& src, Mat& dst, float*...
  function resize_bilinear_image_fp16sa (line 157) | static void resize_bilinear_image_fp16sa(const Mat& src, Mat& dst, __fp1...

FILE: src/layer/arm/interp_bilinear_pack4.h
  function resize_bilinear_image_pack4 (line 4) | static void resize_bilinear_image_pack4(const Mat& src, Mat& dst, float*...

FILE: src/layer/arm/interp_bilinear_pack4_bf16s.h
  function resize_bilinear_image_pack4_bf16s (line 4) | static void resize_bilinear_image_pack4_bf16s(const Mat& src, Mat& dst, ...

FILE: src/layer/arm/interp_bilinear_pack4_fp16s.h
  function resize_bilinear_image_pack4_fp16s (line 4) | static void resize_bilinear_image_pack4_fp16s(const Mat& src, Mat& dst, ...
  function resize_bilinear_image_pack4_fp16sa (line 111) | static void resize_bilinear_image_pack4_fp16sa(const Mat& src, Mat& dst,...

FILE: src/layer/arm/interp_bilinear_pack8_fp16s.h
  function resize_bilinear_image_pack8_fp16sa (line 4) | static void resize_bilinear_image_pack8_fp16sa(const Mat& src, Mat& dst,...

FILE: src/layer/arm/layernorm_arm.cpp
  type ncnn (line 14) | namespace ncnn {
    function layernorm (line 30) | static void layernorm(float* ptr, const float* gamma_ptr, const float*...
    function layernorm_bf16s (line 273) | static void layernorm_bf16s(unsigned short* ptr, const float* gamma_pt...

FILE: src/layer/arm/layernorm_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/layernorm_arm_asimdhp.cpp
  type ncnn (line 12) | namespace ncnn {
    function layernorm_fp16s (line 15) | static void layernorm_fp16s(__fp16* ptr, const float* gamma_ptr, const...

FILE: src/layer/arm/lrn_arm.cpp
  type ncnn (line 11) | namespace ncnn {

FILE: src/layer/arm/lrn_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/lstm_arm.cpp
  type ncnn (line 15) | namespace ncnn {
    function lstm (line 135) | static int lstm(const Mat& bottom_blob, Mat& top_blob, int reverse, co...
    function lstm_bf16s (line 600) | static int lstm_bf16s(const Mat& bottom_blob, Mat& top_blob, int rever...

FILE: src/layer/arm/lstm_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/lstm_arm_asimddp.cpp
  type ncnn (line 10) | namespace ncnn {
    function lstm_transform_weight_int8_asimddp (line 14) | void lstm_transform_weight_int8_asimddp(const Mat& weight_xc, const Ma...
    function lstm_int8_asimddp (line 19) | void lstm_int8_asimddp(const Mat& bottom_blob_int8, const Mat& bottom_...

FILE: src/layer/arm/lstm_arm_asimdhp.cpp
  type ncnn (line 12) | namespace ncnn {
    function lstm_fp16sa (line 15) | static int lstm_fp16sa(const Mat& bottom_blob, Mat& top_blob, int reve...
    function lstm_fp16s (line 415) | static int lstm_fp16s(const Mat& bottom_blob, Mat& top_blob, int rever...

FILE: src/layer/arm/lstm_arm_vfpv4.cpp
  type ncnn (line 10) | namespace ncnn {
    function lstm_int8_gate_output_vfpv4 (line 14) | void lstm_int8_gate_output_vfpv4(const Mat& gates, const Mat& weight_h...

FILE: src/layer/arm/lstm_int8.h
  function lstm_transform_weight_int8 (line 13) | static void lstm_transform_weight_int8(const Mat& weight_xc, const Mat& ...
  function lstm_int8_gate_output (line 192) | static void lstm_int8_gate_output(const Mat& gates, const Mat& weight_hr...
  function lstm_int8 (line 379) | static void lstm_int8(const Mat& bottom_blob_int8, const Mat& bottom_blo...

FILE: src/layer/arm/matmul_arm.cpp
  type ncnn (line 10) | namespace ncnn {

FILE: src/layer/arm/matmul_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/mish_arm.cpp
  type ncnn (line 14) | namespace ncnn {

FILE: src/layer/arm/mish_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/mish_arm_asimdhp.cpp
  type ncnn (line 15) | namespace ncnn {

FILE: src/layer/arm/multiheadattention_arm.cpp
  type ncnn (line 9) | namespace ncnn {

FILE: src/layer/arm/multiheadattention_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/neon_mathfun.h
  function float32x4_t (line 59) | static inline float32x4_t log_ps(float32x4_t x)
  function float32x4_t (line 132) | static inline float32x4_t exp_ps(float32x4_t x)
  function sincos_ps (line 206) | static inline void sincos_ps(float32x4_t x, float32x4_t* ysin, float32x4...
  function float32x4_t (line 267) | static inline float32x4_t sin_ps(float32x4_t x)
  function float32x4_t (line 274) | static inline float32x4_t cos_ps(float32x4_t x)
  function float32x4_t (line 281) | static inline float32x4_t div_ps(float32x4_t a, float32x4_t b)
  function float32x4_t (line 293) | static inline float32x4_t tan_ps(float32x4_t x)
  function float32x4_t (line 301) | static inline float32x4_t pow_ps(float32x4_t a, float32x4_t b)
  function float32x4_t (line 307) | static inline float32x4_t sigmoid_ps(float32x4_t _v)
  function asincos_ps (line 328) | static inline void asincos_ps(float32x4_t x, float32x4_t* yasin, float32...
  function float32x4_t (line 381) | static inline float32x4_t asin_ps(float32x4_t x)
  function float32x4_t (line 388) | static inline float32x4_t acos_ps(float32x4_t x)
  function float32x4_t (line 395) | static inline float32x4_t atan2_ps(float32x4_t a, float32x4_t b)
  function float32x4_t (line 407) | static inline float32x4_t trunc_ps(const float32x4_t& x)
  function float32x4_t (line 418) | static inline float32x4_t fmod_ps(const float32x4_t& x, const float32x4_...
  function float32x4_t (line 430) | static inline float32x4_t round_ps(const float32x4_t& x)
  function float32x4_t (line 452) | static inline float32x4_t logaddexp_ps(const float32x4_t& x, const float...
  function float32x4_t (line 463) | static inline float32x4_t floor_ps(const float32x4_t& x)
  function float32x4_t (line 475) | static inline float32x4_t floor_divide_ps(const float32x4_t& x, const fl...
  function float32x4_t (line 485) | static inline float32x4_t remainder_ps(const float32x4_t& x, const float...

FILE: src/layer/arm/neon_mathfun_fp16s.h
  function float16x4_t (line 53) | static inline float16x4_t log_ps_f16(float16x4_t x)
  function float16x8_t (line 111) | static inline float16x8_t log_ps_f16(float16x8_t x)
  function float16x4_t (line 184) | static inline float16x4_t exp_ps_f16(float16x4_t x)
  function float16x8_t (line 241) | static inline float16x8_t exp_ps_f16(float16x8_t x)
  function sincos_ps_f16 (line 327) | static inline void sincos_ps_f16(float16x4_t x, float16x4_t* ysin, float...
  function sincos_ps_f16 (line 402) | static inline void sincos_ps_f16(float16x8_t x, float16x8_t* ysin, float...
  function float16x4_t (line 477) | static inline float16x4_t sin_ps_f16(float16x4_t x)
  function float16x8_t (line 484) | static inline float16x8_t sin_ps_f16(float16x8_t x)
  function float16x4_t (line 491) | static inline float16x4_t cos_ps_f16(float16x4_t x)
  function float16x8_t (line 498) | static inline float16x8_t cos_ps_f16(float16x8_t x)
  function float16x4_t (line 522) | static inline float16x4_t tanh_ps_f16(float16x4_t x)
  function float16x8_t (line 563) | static inline float16x8_t tanh_ps_f16(float16x8_t x)
  function float16x4_t (line 604) | static inline float16x4_t sigmoid_ps_f16(float16x4_t _v)
  function float16x8_t (line 613) | static inline float16x8_t sigmoid_ps_f16(float16x8_t _v)

FILE: src/layer/arm/neon_mathfun_tanh.h
  function float32x4_t (line 26) | static inline float32x4_t tanh_ps(float32x4_t x)

FILE: src/layer/arm/packing_arm.cpp
  type ncnn (line 12) | namespace ncnn {

FILE: src/layer/arm/packing_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/padding_arm.cpp
  type ncnn (line 12) | namespace ncnn {

FILE: src/layer/arm/padding_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/padding_pack4.h
  function padding_constant_pack4_neon (line 4) | static void padding_constant_pack4_neon(const Mat& src, Mat& dst, int to...
  function padding_replicate_pack4_neon (line 395) | static void padding_replicate_pack4_neon(const Mat& src, Mat& dst, int t...
  function padding_reflect_pack4_neon (line 471) | static void padding_reflect_pack4_neon(const Mat& src, Mat& dst, int top...

FILE: src/layer/arm/padding_pack4_bf16s_fp16s.h
  function padding_constant_pack4_bf16_fp16s_neon (line 4) | static void padding_constant_pack4_bf16_fp16s_neon(const Mat& src, Mat& ...
  function padding_replicate_pack4_bf16_fp16s_neon (line 381) | static void padding_replicate_pack4_bf16_fp16s_neon(const Mat& src, Mat&...
  function padding_reflect_pack4_bf16_fp16s_neon (line 457) | static void padding_reflect_pack4_bf16_fp16s_neon(const Mat& src, Mat& d...

FILE: src/layer/arm/padding_pack8_fp16s.h
  function padding_constant_pack8_fp16s_neon (line 4) | static void padding_constant_pack8_fp16s_neon(const Mat& src, Mat& dst, ...
  function padding_replicate_pack8_fp16s_neon (line 209) | static void padding_replicate_pack8_fp16s_neon(const Mat& src, Mat& dst,...
  function padding_reflect_pack8_fp16s_neon (line 285) | static void padding_reflect_pack8_fp16s_neon(const Mat& src, Mat& dst, i...

FILE: src/layer/arm/padding_pack8_int8.h
  function padding_constant_pack8_int8_neon (line 4) | static void padding_constant_pack8_int8_neon(const Mat& src, Mat& dst, i...
  function padding_replicate_pack8_int8_neon (line 387) | static void padding_replicate_pack8_int8_neon(const Mat& src, Mat& dst, ...
  function padding_reflect_pack8_int8_neon (line 463) | static void padding_reflect_pack8_int8_neon(const Mat& src, Mat& dst, in...

FILE: src/layer/arm/pixelshuffle_arm.cpp
  type ncnn (line 14) | namespace ncnn {

FILE: src/layer/arm/pixelshuffle_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/pooling_2x2.h
  function pooling2x2s2_max_neon (line 4) | static void pooling2x2s2_max_neon(const Mat& bottom_blob, Mat& top_blob,...

FILE: src/layer/arm/pooling_2x2_pack4.h
  function pooling2x2s2_max_pack4_neon (line 4) | static void pooling2x2s2_max_pack4_neon(const Mat& bottom_blob, Mat& top...

FILE: src/layer/arm/pooling_2x2_pack4_bf16s.h
  function pooling2x2s2_max_pack4_bf16s_neon (line 4) | static void pooling2x2s2_max_pack4_bf16s_neon(const Mat& bottom_blob, Ma...

FILE: src/layer/arm/pooling_3x3.h
  function pooling3x3s2_max_neon (line 4) | static void pooling3x3s2_max_neon(const Mat& bottom_blob, Mat& top_blob,...

FILE: src/layer/arm/pooling_3x3_pack4.h
  function pooling3x3s2_max_pack4_neon (line 4) | static void pooling3x3s2_max_pack4_neon(const Mat& bottom_blob, Mat& top...

FILE: src/layer/arm/pooling_3x3_pack4_bf16s.h
  function pooling3x3s2_max_pack4_bf16s_neon (line 4) | static void pooling3x3s2_max_pack4_bf16s_neon(const Mat& bottom_blob, Ma...

FILE: src/layer/arm/pooling_arm.cpp
  type ncnn (line 16) | namespace ncnn {

FILE: src/layer/arm/pooling_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/pooling_arm_asimdhp.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/prelu_arm.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/prelu_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/prelu_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {

FILE: src/layer/arm/quantize_arm.cpp
  type ncnn (line 15) | namespace ncnn {
    function quantize (line 31) | static void quantize(const float* ptr, signed char* s8ptr, const Mat& ...
    function quantize_pack4to8 (line 99) | static void quantize_pack4to8(const float* ptr0, const float* ptr1, si...
    function quantize_pack4to1 (line 143) | static void quantize_pack4to1(const float* ptr, signed char* s8ptr0, s...
    function quantize_bf16s (line 401) | static void quantize_bf16s(const unsigned short* ptr, signed char* s8p...
    function quantize_pack4to8_bf16s (line 472) | static void quantize_pack4to8_bf16s(const unsigned short* ptr0, const ...
    function quantize_pack4to1_bf16s (line 518) | static void quantize_pack4to1_bf16s(const unsigned short* ptr, signed ...

FILE: src/layer/arm/quantize_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/quantize_arm_asimdhp.cpp
  type ncnn (line 12) | namespace ncnn {
    function quantize_fp16s (line 15) | static void quantize_fp16s(const __fp16* ptr, signed char* s8ptr, cons...
    function quantize_pack4to8_fp16s (line 81) | static void quantize_pack4to8_fp16s(const __fp16* ptr0, const __fp16* ...
    function quantize_pack4to1_fp16s (line 127) | static void quantize_pack4to1_fp16s(const __fp16* ptr, signed char* s8...
    function quantize_fp16sa (line 360) | static void quantize_fp16sa(const __fp16* ptr, signed char* s8ptr, con...
    function quantize_pack4to1_fp16sa (line 415) | static void quantize_pack4to1_fp16sa(const __fp16* ptr, signed char* s...

FILE: src/layer/arm/relu_arm.cpp
  type ncnn (line 13) | namespace ncnn {

FILE: src/layer/arm/relu_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/relu_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {

FILE: src/layer/arm/requantize_arm.cpp
  type ncnn (line 14) | namespace ncnn {
    function requantize_relu (line 23) | static void requantize_relu(const int* intptr, signed char* ptr, const...
    function requantize_leakyrelu (line 175) | static void requantize_leakyrelu(const int* intptr, signed char* ptr, ...
    function requantize (line 328) | static void requantize(const int* intptr, signed char* ptr, const Mat&...

FILE: src/layer/arm/requantize_arm.h
  function namespace (line 10) | namespace ncnn {

FILE: src/layer/arm/reshape_arm.cpp
  type ncnn (line 12) | namespace ncnn {

FILE: src/layer/arm/reshape_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/rmsnorm_arm.cpp
  type ncnn (line 13) | namespace ncnn {
    function rmsnorm (line 29) | static void rmsnorm(float* ptr, const float* gamma_ptr, float eps, int...
    function rmsnorm_bf16s (line 222) | static void rmsnorm_bf16s(unsigned short* ptr, const float* gamma_ptr,...

FILE: src/layer/arm/rmsnorm_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/rmsnorm_arm_asimdhp.cpp
  type ncnn (line 11) | namespace ncnn {
    function rmsnorm_fp16s (line 14) | static void rmsnorm_fp16s(__fp16* ptr, const float* gamma_ptr, float e...

FILE: src/layer/arm/rnn_arm.cpp
  type ncnn (line 15) | namespace ncnn {
    function rnn (line 152) | static int rnn(const Mat& bottom_blob, Mat& top_blob, int reverse, con...
    function rnn_bf16s (line 499) | static int rnn_bf16s(const Mat& bottom_blob, Mat& top_blob, int revers...

FILE: src/layer/arm/rnn_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/rnn_arm_asimddp.cpp
  type ncnn (line 10) | namespace ncnn {
    function rnn_transform_weight_int8_asimddp (line 14) | void rnn_transform_weight_int8_asimddp(const Mat& weight_xc, const Mat...
    function rnn_int8_asimddp (line 19) | void rnn_int8_asimddp(const Mat& bottom_blob_int8, const Mat& bottom_b...

FILE: src/layer/arm/rnn_arm_asimdhp.cpp
  type ncnn (line 12) | namespace ncnn {
    function rnn_fp16sa (line 15) | static int rnn_fp16sa(const Mat& bottom_blob, Mat& top_blob, int rever...
    function rnn_fp16s (line 230) | static int rnn_fp16s(const Mat& bottom_blob, Mat& top_blob, int revers...

FILE: src/layer/arm/rnn_arm_vfpv4.cpp
  type ncnn (line 10) | namespace ncnn {
    function rnn_int8_gate_output_vfpv4 (line 14) | void rnn_int8_gate_output_vfpv4(const Mat& gates, Mat& hidden_state, M...

FILE: src/layer/arm/rnn_int8.h
  function rnn_transform_weight_int8 (line 13) | static void rnn_transform_weight_int8(const Mat& weight_xc, const Mat& w...
  function rnn_int8_gate_output (line 223) | static void rnn_int8_gate_output(const Mat& gates, Mat& hidden_state, Ma...
  function rnn_int8 (line 319) | static void rnn_int8(const Mat& bottom_blob_int8, const Mat& bottom_blob...

FILE: src/layer/arm/scale_arm.cpp
  type ncnn (line 10) | namespace ncnn {

FILE: src/layer/arm/scale_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/selu_arm.cpp
  type ncnn (line 12) | namespace ncnn {

FILE: src/layer/arm/selu_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/shufflechannel_arm.cpp
  type ncnn (line 14) | namespace ncnn {

FILE: src/layer/arm/shufflechannel_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/sigmoid_arm.cpp
  type ncnn (line 15) | namespace ncnn {

FILE: src/layer/arm/sigmoid_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/sigmoid_arm_asimdhp.cpp
  type ncnn (line 15) | namespace ncnn {

FILE: src/layer/arm/slice_arm.cpp
  type ncnn (line 8) | namespace ncnn {

FILE: src/layer/arm/slice_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/softmax_arm.cpp
  type ncnn (line 16) | namespace ncnn {
    function softmax (line 32) | static void softmax(float* _ptr, int elemcount, int elempack)
    function softmax_pack4 (line 145) | static void softmax_pack4(float* _ptr, int elemcount, size_t stride, i...
    function softmax_pack1 (line 281) | static void softmax_pack1(float* _ptr, int elemcount, size_t stride, i...
    function softmax (line 391) | static void softmax(float* _ptr, int elemcount, int elempack, size_t s...
    function softmax_bf16s (line 621) | static void softmax_bf16s(unsigned short* _ptr, int elemcount, int ele...
    function softmax_bf16s_pack4 (line 774) | static void softmax_bf16s_pack4(unsigned short* _ptr, int elemcount, s...
    function softmax_bf16s_pack1 (line 930) | static void softmax_bf16s_pack1(unsigned short* _ptr, int elemcount, s...
    function softmax_bf16s (line 1091) | static void softmax_bf16s(unsigned short* _ptr, int elemcount, int ele...

FILE: src/layer/arm/softmax_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/softmax_arm_asimdhp.cpp
  type ncnn (line 14) | namespace ncnn {
    function softmax_fp16s (line 17) | static void softmax_fp16s(__fp16* _ptr, int elemcount, int elempack)
    function softmax_fp16s_pack8 (line 145) | static void softmax_fp16s_pack8(__fp16* _ptr, int elemcount, size_t st...
    function softmax_fp16s_pack4 (line 382) | static void softmax_fp16s_pack4(__fp16* _ptr, int elemcount, size_t st...
    function softmax_fp16s_pack1 (line 576) | static void softmax_fp16s_pack1(__fp16* _ptr, int elemcount, size_t st...
    function softmax_fp16s (line 717) | static void softmax_fp16s(__fp16* _ptr, int elemcount, int elempack, s...

FILE: src/layer/arm/swish_arm.cpp
  type ncnn (line 14) | namespace ncnn {

FILE: src/layer/arm/swish_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/swish_arm_asimdhp.cpp
  type ncnn (line 15) | namespace ncnn {

FILE: src/layer/arm/tanh_arm.cpp
  type ncnn (line 14) | namespace ncnn {

FILE: src/layer/arm/tanh_arm.h
  function namespace (line 9) | namespace ncnn {

FILE: src/layer/arm/tanh_arm_asimdhp.cpp
  type ncnn (line 15) | namespace ncnn {

FILE: src/layer/arm/unaryop_arm.cpp
  type ncnn (line 18) | namespace ncnn {
    function unary_op_inplace (line 35) | static int unary_op_inplace(Mat& a, const Option& opt)
    type UnaryOp_arm_functor (line 81) | namespace UnaryOp_arm_functor {
      type unary_op_abs (line 83) | struct unary_op_abs
        method func (line 85) | float func(const float& x) const
        method float32x4_t (line 90) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_neg (line 97) | struct unary_op_neg
        method func (line 99) | float func(const float& x) const
        method float32x4_t (line 104) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_floor (line 111) | struct unary_op_floor
        method func (line 113) | float func(const float& x) const
        method float32x4_t (line 118) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_ceil (line 131) | struct unary_op_ceil
        method func (line 133) | float func(const float& x) const
        method float32x4_t (line 138) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_square (line 151) | struct unary_op_square
        method func (line 153) | float func(const float& x) const
        method float32x4_t (line 158) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_sqrt (line 165) | struct unary_op_sqrt
        method func (line 167) | float func(const float& x) const
        method float32x4_t (line 172) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_rsqrt (line 186) | struct unary_op_rsqrt
        method func (line 188) | float func(const float& x) const
        method float32x4_t (line 193) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_exp (line 203) | struct unary_op_exp
        method func (line 205) | float func(const float& x) const
        method float32x4_t (line 210) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_log (line 217) | struct unary_op_log
        method func (line 219) | float func(const float& x) const
        method float32x4_t (line 224) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_sin (line 231) | struct unary_op_sin
        method func (line 233) | float func(const float& x) const
        method float32x4_t (line 238) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_cos (line 245) | struct unary_op_cos
        method func (line 247) | float func(const float& x) const
        method float32x4_t (line 252) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_tan (line 259) | struct unary_op_tan
        method func (line 261) | float func(const float& x) const
        method float32x4_t (line 266) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_asin (line 273) | struct unary_op_asin
        method func (line 275) | float func(const float& x) const
        method float32x4_t (line 280) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_acos (line 287) | struct unary_op_acos
        method func (line 289) | float func(const float& x) const
        method float32x4_t (line 294) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_atan (line 301) | struct unary_op_atan
        method func (line 303) | float func(const float& x) const
        method float32x4_t (line 308) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_reciprocal (line 322) | struct unary_op_reciprocal
        method func (line 324) | float func(const float& x) const
        method float32x4_t (line 329) | float32x4_t func_pack4(const float32x4_t& x) const
      type unary_op_tanh (line 339) | struct unary_op_tanh
        method func (line 341) | float func(const float& x) const
        method float32x4_t (line 346) | float32x4_t func_pack4(const float32x4_t& x) const
      ty
Copy disabled (too large) Download .json
Condensed preview — 3805 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (16,472K chars).
[
  {
    "path": ".astylerc",
    "chars": 623,
    "preview": "# astyle -n -r \"benchmark/*.h,*.cpp\" \"src/*.h,*.cpp\" \"tests/*.h,*.cpp\" \"tools/*.h,*.cpp\" \"examples/*.h,*.cpp\"\n\n# brace s"
  },
  {
    "path": ".clang-format",
    "chars": 3744,
    "preview": "# find src/ tools/ tests/ examples/ benchmark/ -type f -name '*.c' -o -name '*.cpp' -o -name '*.h' | xargs -i clang-form"
  },
  {
    "path": ".gitattributes",
    "chars": 30,
    "preview": "*.comp linguist-language=GLSL\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug.md",
    "chars": 194,
    "preview": "---\nname: \"\\U0001F41B bug issue\"\nabout: submit a bug report +_+\n---\n\n## error log | 日志或报错信息 | ログ\n\n## context | 编译/运行环境 |"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/model-convert.md",
    "chars": 213,
    "preview": "---\nname: \"\\U0001F6B8 model convert issue\"\nabout: \"Life is Short, Use pnnx and convertmodel.com\"\n---\n\n## error log | 日志或"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/others.md",
    "chars": 103,
    "preview": "---\nname: \"\\U0001F4DD others\"\nabout: discussion, suggestion and question\n---\n\n## detail | 详细描述 | 詳細な説明\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/quantization.md",
    "chars": 261,
    "preview": "---\nname: \"\\U0001F4C8 quantization\"\nabout: best wishes for your low bit quantization has a low accuracy loss...\\(^▽^)/.."
  },
  {
    "path": ".github/dependabot.yml",
    "chars": 117,
    "preview": "version: 2\nupdates:\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"daily\"\n"
  },
  {
    "path": ".github/labeler.yml",
    "chars": 992,
    "preview": "cmake:\n- changed-files:\n  - any-glob-to-any-file: ['cmake/**', 'toolchains/**']\n\ndoc: \n- changed-files:\n  - any-glob-to-"
  },
  {
    "path": ".github/workflows/android.yml",
    "chars": 5616,
    "preview": "name: android\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/android.yml'\n    - 'CMakeLists.txt'"
  },
  {
    "path": ".github/workflows/code-format-msg.yml",
    "chars": 3811,
    "preview": "name: code-format-msg\n\non:\n  workflow_run:\n    workflows: [code-format]\n    types: [completed]\n\nconcurrency:\n  group: co"
  },
  {
    "path": ".github/workflows/code-format.yml",
    "chars": 3855,
    "preview": "name: code-format\n\non: [push, pull_request]\n\nconcurrency:\n  group: code-format-${{ github.ref }}\n  cancel-in-progress: t"
  },
  {
    "path": ".github/workflows/codeql-analysis.yml",
    "chars": 2978,
    "preview": "# For most projects, this workflow file will not need changing; you simply need\n# to commit it to your repository.\n#\n# Y"
  },
  {
    "path": ".github/workflows/compare-binary-size-pr-comment.yml",
    "chars": 5629,
    "preview": "name: compare-binary-size-pr-comment\non:\n  workflow_run:\n    workflows: [\"compare-binary-size\"]\n    types:\n      - compl"
  },
  {
    "path": ".github/workflows/compare-binary-size.yml",
    "chars": 3279,
    "preview": "name: compare-binary-size\non:\n  pull_request:\n    branches: [master]\n    paths:\n    - '.github/workflows/compare-binary-"
  },
  {
    "path": ".github/workflows/elf-riscv32.yml",
    "chars": 2974,
    "preview": "name: elf-riscv32\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/elf-riscv32.yml'\n    - 'toolcha"
  },
  {
    "path": ".github/workflows/elf-riscv64.yml",
    "chars": 2927,
    "preview": "name: elf-riscv64\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/elf-riscv64.yml'\n    - 'toolcha"
  },
  {
    "path": ".github/workflows/esp32.yml",
    "chars": 1698,
    "preview": "name: ESP32\non:\n  push:\n    branches: [master]\n    paths:\n      - '.github/workflows/esp32.yml'\n      - 'CMakeLists.txt'"
  },
  {
    "path": ".github/workflows/harmonyos.yml",
    "chars": 3003,
    "preview": "name: harmonyos\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/harmonyos.yml'\n    - 'CMakeLists."
  },
  {
    "path": ".github/workflows/ios.yml",
    "chars": 6704,
    "preview": "name: ios\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/ios.yml'\n    - 'toolchains/ios.toolchai"
  },
  {
    "path": ".github/workflows/labeler.yml",
    "chars": 179,
    "preview": "name: labeler\non: [pull_request_target]\n\npermissions:\n  contents: read\n  pull-requests: write\n\njobs:\n  label:\n    runs-o"
  },
  {
    "path": ".github/workflows/linux-aarch64.yml",
    "chars": 3948,
    "preview": "name: linux-aarch64\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-aarch64.yml'\n    - 'too"
  },
  {
    "path": ".github/workflows/linux-arm.yml",
    "chars": 4338,
    "preview": "name: linux-arm\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-arm.yml'\n    - 'toolchains/"
  },
  {
    "path": ".github/workflows/linux-loongarch64.yml",
    "chars": 1845,
    "preview": "name: linux-loongarch64\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-loongarch64.yml'\n  "
  },
  {
    "path": ".github/workflows/linux-mips.yml",
    "chars": 2158,
    "preview": "name: linux-mips\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-mips.yml'\n    - 'toolchain"
  },
  {
    "path": ".github/workflows/linux-mips64.yml",
    "chars": 2244,
    "preview": "name: linux-mips64\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-mips64.yml'\n    - 'toolc"
  },
  {
    "path": ".github/workflows/linux-ppc64.yml",
    "chars": 3466,
    "preview": "name: linux-ppc64\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-ppc64.yml'\n    - 'toolcha"
  },
  {
    "path": ".github/workflows/linux-riscv32.yml",
    "chars": 1973,
    "preview": "name: linux-riscv32\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-riscv32.yml'\n    - 'too"
  },
  {
    "path": ".github/workflows/linux-riscv64.yml",
    "chars": 15572,
    "preview": "name: linux-riscv64\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-riscv64.yml'\n    - 'too"
  },
  {
    "path": ".github/workflows/linux-x64-cpu-clang.yml",
    "chars": 4032,
    "preview": "name: linux-x64-cpu-clang\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-x64-cpu-clang.yml"
  },
  {
    "path": ".github/workflows/linux-x64-cpu-gcc-musl.yml",
    "chars": 1525,
    "preview": "name: linux-x64-cpu-gcc-musl\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-x64-cpu-gcc-mu"
  },
  {
    "path": ".github/workflows/linux-x64-cpu-gcc.yml",
    "chars": 5303,
    "preview": "name: linux-x64-cpu-gcc\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-x64-cpu-gcc.yml'\n  "
  },
  {
    "path": ".github/workflows/linux-x64-gpu-clang.yml",
    "chars": 2699,
    "preview": "name: linux-x64-gpu-clang\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-x64-gpu-clang.yml"
  },
  {
    "path": ".github/workflows/linux-x64-gpu-gcc.yml",
    "chars": 3929,
    "preview": "name: linux-x64-gpu-gcc\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-x64-gpu-gcc.yml'\n  "
  },
  {
    "path": ".github/workflows/linux-x64-sde.yml",
    "chars": 2575,
    "preview": "name: linux-x64-sde\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-x64-sde.yml'\n    - 'CMa"
  },
  {
    "path": ".github/workflows/linux-x86-cpu-clang.yml",
    "chars": 2038,
    "preview": "name: linux-x86-cpu-clang\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-x86-cpu-clang.yml"
  },
  {
    "path": ".github/workflows/linux-x86-cpu-gcc.yml",
    "chars": 2283,
    "preview": "name: linux-x86-cpu-gcc\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/linux-x86-cpu-gcc.yml'\n  "
  },
  {
    "path": ".github/workflows/mac-catalyst.yml",
    "chars": 5418,
    "preview": "name: mac-catalyst\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/mac-catalyst.yml'\n    - 'toolc"
  },
  {
    "path": ".github/workflows/macos.yml",
    "chars": 7508,
    "preview": "name: macos\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/macos.yml'\n    - 'toolchains/ios.tool"
  },
  {
    "path": ".github/workflows/pnnx.yml",
    "chars": 17148,
    "preview": "name: pnnx\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/pnnx.yml'\n    - 'src/layer/*'\n    - 't"
  },
  {
    "path": ".github/workflows/python.yml",
    "chars": 4294,
    "preview": "name: python\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/python.yml'\n    - 'CMakeLists.txt'\n "
  },
  {
    "path": ".github/workflows/release-python.yml",
    "chars": 13422,
    "preview": "name: release-python\non:\n  push:\n    tags:\n      - '*'\n  workflow_dispatch:\n\nenv:\n  DEVELOPER_DIR: /Applications/Xcode_1"
  },
  {
    "path": ".github/workflows/release.yml",
    "chars": 107443,
    "preview": "name: release\non:\n  push:\n    tags:\n      - '*'\n\nenv:\n  DEVELOPER_DIR: /Applications/Xcode_16.4.0.app/Contents/Developer"
  },
  {
    "path": ".github/workflows/sync-wiki.yml",
    "chars": 818,
    "preview": "name: sync-wiki\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/sync-wiki.yml'\n    - 'docs/**'\nco"
  },
  {
    "path": ".github/workflows/test-coverage.yml",
    "chars": 23383,
    "preview": "name: test-coverage\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/test-coverage.yml'\n    - 'CMa"
  },
  {
    "path": ".github/workflows/tvos.yml",
    "chars": 7401,
    "preview": "name: tvos\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/tvos.yml'\n    - 'toolchains/ios.toolch"
  },
  {
    "path": ".github/workflows/visionos.yml",
    "chars": 6713,
    "preview": "name: visionos\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/visionos.yml'\n    - 'toolchains/io"
  },
  {
    "path": ".github/workflows/watchos.yml",
    "chars": 7342,
    "preview": "name: watchos\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/watchos.yml'\n    - 'toolchains/ios."
  },
  {
    "path": ".github/workflows/web-assembly.yml",
    "chars": 2796,
    "preview": "name: web-assembly\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/web-assembly.yml'\n    - 'CMake"
  },
  {
    "path": ".github/workflows/windows-arm.yml",
    "chars": 3500,
    "preview": "name: windows-arm\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/windows-arm.yml'\n    - 'CMakeLi"
  },
  {
    "path": ".github/workflows/windows-clang.yml",
    "chars": 2487,
    "preview": "name: windows-clang\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/windows-clang.yml'\n    - 'CMa"
  },
  {
    "path": ".github/workflows/windows-mingw.yml",
    "chars": 1445,
    "preview": "name: windows-mingw\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/windows-mingw.yml'\n    - 'CMa"
  },
  {
    "path": ".github/workflows/windows-xp.yml",
    "chars": 4636,
    "preview": "name: windows-xp\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/windows-xp.yml'\n    - 'toolchain"
  },
  {
    "path": ".github/workflows/windows.yml",
    "chars": 8195,
    "preview": "name: windows\non:\n  push:\n    branches: [master]\n    paths:\n    - '.github/workflows/windows.yml'\n    - 'CMakeLists.txt'"
  },
  {
    "path": ".gitignore",
    "chars": 531,
    "preview": "# CMake build directory\nbuild*/\n\n# Backup files.\n*~\n\n# Prerequisites\n*.d\n\n# Compiled Object files\n*.slo\n*.lo\n*.o\n*.obj\n\n"
  },
  {
    "path": ".gitmodules",
    "chars": 178,
    "preview": "[submodule \"glslang\"]\n\tpath = glslang\n\turl = https://github.com/nihui/glslang\n[submodule \"python/pybind11\"]\n\tpath = pyth"
  },
  {
    "path": "CITATION.cff",
    "chars": 659,
    "preview": "cff-version: 1.2.0\ntitle: ncnn\nmessage: >-\n  If you use this software, please cite it using the\n  metadata from this fil"
  },
  {
    "path": "CMakeLists.txt",
    "chars": 51656,
    "preview": "if(CMAKE_TOOLCHAIN_FILE)\n    set(LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_BINARY_DIR} CACHE PATH \"root for library output, set t"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 414,
    "preview": "\n# Acknowledgements\n\n- Thanks to bug1989 [https://github.com/bug1989] for contributing the initial quantized int8 infere"
  },
  {
    "path": "Info.plist",
    "chars": 575,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/P"
  },
  {
    "path": "LICENSE.txt",
    "chars": 6415,
    "preview": "Tencent is pleased to support the open source community by making ncnn available.\nCopyright (C) 2017 Tencent.  All right"
  },
  {
    "path": "MANIFEST.in",
    "chars": 179,
    "preview": "recursive-include cmake *\n\nrecursive-include glslang *\nprune glslang/Test\n\nrecursive-include src *\n\nrecursive-include py"
  },
  {
    "path": "README.md",
    "chars": 25350,
    "preview": "![ncnn](https://raw.githubusercontent.com/Tencent/ncnn/master/images/256-ncnn.png)\n\n# ncnn\n\n[![License](https://img.shie"
  },
  {
    "path": "benchmark/CMakeLists.txt",
    "chars": 1859,
    "preview": "\nif(MSVC)\n    # warning C4996: 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disa"
  },
  {
    "path": "benchmark/FastestDet.param",
    "chars": 12398,
    "preview": "7767517\n127 150\nInput                    in0                      0 1 in0\nConvolution              convrelu_0           "
  },
  {
    "path": "benchmark/README.md",
    "chars": 551541,
    "preview": "benchncnn can be used to test neural network inference performance\n\nOnly the network definition files (ncnn param) are r"
  },
  {
    "path": "benchmark/RankCards/CMakeLists.txt",
    "chars": 195,
    "preview": "cmake_minimum_required(VERSION 3.10)\n\nproject(RankCards CXX)\n\nset(CMAKE_CXX_STANDARD 11)\nset(CMAKE_CXX_STANDARD_REQUIRED"
  },
  {
    "path": "benchmark/RankCards/README.md",
    "chars": 6844,
    "preview": "### Rank the boards.\nThe table below is generated by RankCards, using the timings found in the /ncnn/benchmark/README.md"
  },
  {
    "path": "benchmark/RankCards/Rcards.h",
    "chars": 8452,
    "preview": "// Copyright 2017 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n#ifndef RCARDS_H\n#define RCARDS_H\n\n#include <cstdint>"
  },
  {
    "path": "benchmark/RankCards/main.cpp",
    "chars": 5510,
    "preview": "// Copyright 2017 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include <iostream>\n#include <iostream>\n#include <fs"
  },
  {
    "path": "benchmark/alexnet.param",
    "chars": 1684,
    "preview": "7767517\n15 15\nInput                    data                     0 1 data -23330=4,3,227,227,3 0=227 1=227 2=3\nConvolutio"
  },
  {
    "path": "benchmark/benchncnn.cpp",
    "chars": 12975,
    "preview": "// Copyright 2018 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include <float.h>\n#include <stdio.h>\n#include <stri"
  },
  {
    "path": "benchmark/benchncnn_param_data.h.in",
    "chars": 111,
    "preview": "// Benchncnn Param Data header\n//\n// This file is auto-generated by cmake, don't edit it.\n\n@param_header_data@\n"
  },
  {
    "path": "benchmark/blazeface.param",
    "chars": 9327,
    "preview": "7767517\n101 117\nInput            data                    0 1 data 0=128 1=128 2=3\nPadding          75                   "
  },
  {
    "path": "benchmark/efficientnet_b0.param",
    "chars": 20213,
    "preview": "7767517\n200 225\nInput                    input.1                  0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolut"
  },
  {
    "path": "benchmark/efficientnetv2_b0.param",
    "chars": 28419,
    "preview": "7767517\n257 288\nMemoryData               110:12                   0 1 110:12 -23330=4,1,112,1,1 0=112\nMemoryData        "
  },
  {
    "path": "benchmark/googlenet.param",
    "chars": 17254,
    "preview": "7767517\n94 121\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvoluti"
  },
  {
    "path": "benchmark/googlenet_int8.param",
    "chars": 15283,
    "preview": "7767517\n94 121\nInput                    data                     0 1 data 0=224 1=224 2=3\nConvolution              conv1"
  },
  {
    "path": "benchmark/mnasnet.param",
    "chars": 9965,
    "preview": "7767517\n76 86\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolutio"
  },
  {
    "path": "benchmark/mobilenet.param",
    "chars": 4463,
    "preview": "7767517\n31 31\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolutio"
  },
  {
    "path": "benchmark/mobilenet_int8.param",
    "chars": 3982,
    "preview": "7767517\n31 31\nInput                    data                     0 1 data 0=224 1=224 2=3\nConvolution              conv1 "
  },
  {
    "path": "benchmark/mobilenet_ssd.param",
    "chars": 14398,
    "preview": "7767517\n92 115\nInput                    input                    0 1 data -23330=4,3,300,300,3 0=300 1=300 2=3\nSplit    "
  },
  {
    "path": "benchmark/mobilenet_ssd_int8.param",
    "chars": 12465,
    "preview": "7767517\n92 115\nInput                    input                    0 1 data 0=300 1=300 2=3\nSplit                    split"
  },
  {
    "path": "benchmark/mobilenet_v2.param",
    "chars": 12648,
    "preview": "7767517\n77 87\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolutio"
  },
  {
    "path": "benchmark/mobilenet_v3.param",
    "chars": 14794,
    "preview": "7767517\n145 163\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolut"
  },
  {
    "path": "benchmark/mobilenet_yolo.param",
    "chars": 5824,
    "preview": "7767517\n39 41\nInput                    data                     0 1 data -23330=4,3,416,416,3 0=416 1=416 2=3\nConvolutio"
  },
  {
    "path": "benchmark/mobilenetv2_yolov3.param",
    "chars": 11418,
    "preview": "7767517\n87 99\nInput                    data                     0 1 data -23330=4,3,352,352,3 0=352 1=352 2=3\nConvolutio"
  },
  {
    "path": "benchmark/nanodet_m.param",
    "chars": 20921,
    "preview": "7767517\n179 204\nInput                    input.1                  0 1 input.1 -23330=4,3,320,320,3 0=320 1=320 2=3\nConvo"
  },
  {
    "path": "benchmark/proxylessnasnet.param",
    "chars": 12004,
    "preview": "7767517\n91 104\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvoluti"
  },
  {
    "path": "benchmark/regnety_400m.param",
    "chars": 19703,
    "preview": "7767517\n185 217\nInput                    input.1                  0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolut"
  },
  {
    "path": "benchmark/resnet18.param",
    "chars": 7157,
    "preview": "7767517\n50 58\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolutio"
  },
  {
    "path": "benchmark/resnet18_int8.param",
    "chars": 6152,
    "preview": "7767517\n50 58\nInput                    data                     0 1 data 0=224 1=224 2=3\nConvolution              conv1 "
  },
  {
    "path": "benchmark/resnet50.param",
    "chars": 15849,
    "preview": "7767517\n106 122\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolut"
  },
  {
    "path": "benchmark/resnet50_int8.param",
    "chars": 13713,
    "preview": "7767517\n106 122\nInput                    data                     0 1 data 0=224 1=224 2=3\nConvolution              conv"
  },
  {
    "path": "benchmark/shufflenet.param",
    "chars": 18469,
    "preview": "7767517\n120 136\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolut"
  },
  {
    "path": "benchmark/shufflenet_v2.param",
    "chars": 15815,
    "preview": "7767517\n109 125\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolut"
  },
  {
    "path": "benchmark/squeezenet.param",
    "chars": 8222,
    "preview": "7767517\n48 56\nInput                    data                     0 1 data -23330=4,3,227,227,3 0=227 1=227 2=3\nConvolutio"
  },
  {
    "path": "benchmark/squeezenet_int8.param",
    "chars": 7267,
    "preview": "7767517\n48 56\nInput                    data                     0 1 data 0=227 1=227 2=3\nConvolution              conv1 "
  },
  {
    "path": "benchmark/squeezenet_ssd.param",
    "chars": 19907,
    "preview": "7767517\n119 152\nInput                    data                     0 1 data -23330=4,3,300,300,3 0=300 1=300 2=3\nSplit   "
  },
  {
    "path": "benchmark/squeezenet_ssd_int8.param",
    "chars": 17275,
    "preview": "7767517\n119 152\nInput                    data                     0 1 data 0=300 1=300 2=3\nSplit                    spli"
  },
  {
    "path": "benchmark/vgg16.param",
    "chars": 2814,
    "preview": "7767517\n23 23\nInput                    data                     0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3\nConvolutio"
  },
  {
    "path": "benchmark/vgg16_int8.param",
    "chars": 2410,
    "preview": "7767517\n23 23\nInput                    data                     0 1 data 0=224 1=224 2=3\nConvolution              conv1_"
  },
  {
    "path": "benchmark/vision_transformer.param",
    "chars": 14958,
    "preview": "7767517\n144 192\nInput            input                    0 1 input\nMemoryData       backbone.cls_token       0 1 backbo"
  },
  {
    "path": "benchmark/yolo-fastest-1.1.param",
    "chars": 17909,
    "preview": "7767517\n131 154\nInput                    data                     0 1 data -23330=4,3,320,320,3 0=320 1=320 2=3\nConvolut"
  },
  {
    "path": "benchmark/yolo-fastestv2.param",
    "chars": 15139,
    "preview": "7767517\n144 166\nInput                    input.1                  0 1 input.1 -23330=4,3,352,352,3 0=352 1=352 2=3\nConvo"
  },
  {
    "path": "benchmark/yolov4-tiny.param",
    "chars": 6544,
    "preview": "7767517\n45 53\nInput                    data                     0 1 data -23330=4,3,416,416,3 0=416 1=416 2=3\nConvolutio"
  },
  {
    "path": "build-android.cmd",
    "chars": 2325,
    "preview": ":: Set android ndk root\n@ECHO OFF\n@SETLOCAL\n@SET ANDROID_NDK=<your-ndk-root_path, such as\"E:\\android-ndk-r27\">\n\n:: Set n"
  },
  {
    "path": "build.sh",
    "chars": 3789,
    "preview": "#!/usr/bin/env bash\n\n##### android armv7 without neon\nmkdir -p build-android-armv7-without-neon\npushd build-android-armv"
  },
  {
    "path": "cmake/ncnnConfig.cmake.in",
    "chars": 1727,
    "preview": "set(NCNN_VERSION @NCNN_VERSION@)\nset(NCNN_OPENMP @NCNN_OPENMP@)\nset(NCNN_THREADS @NCNN_THREADS@)\nset(NCNN_VULKAN @NCNN_V"
  },
  {
    "path": "cmake/ncnn_add_layer.cmake",
    "chars": 27568,
    "preview": "\nmacro(ncnn_add_arch_opt_layer class NCNN_TARGET_ARCH_OPT NCNN_TARGET_ARCH_OPT_CFLAGS)\n    set(NCNN_${NCNN_TARGET_ARCH}_"
  },
  {
    "path": "cmake/ncnn_add_param.cmake",
    "chars": 2007,
    "preview": "\nmacro(ncnn_add_param NCNN_PARAM_SRC)\n    # Get the file name with extension\n    get_filename_component(NCNN_PARAM_SRC_N"
  },
  {
    "path": "cmake/ncnn_add_shader.cmake",
    "chars": 1467,
    "preview": "\nmacro(ncnn_add_shader NCNN_SHADER_SRC)\n    get_filename_component(NCNN_SHADER_SRC_NAME_WE ${NCNN_SHADER_SRC} NAME_WE)\n "
  },
  {
    "path": "cmake/ncnn_generate_avx512_source.cmake",
    "chars": 517,
    "preview": "\n# must define SRC DST CLASS\n\nfile(READ ${SRC} source_data)\n\n# replace\nstring(TOUPPER ${CLASS} CLASS_UPPER)\nstring(TOLOW"
  },
  {
    "path": "cmake/ncnn_generate_avx_source.cmake",
    "chars": 508,
    "preview": "\n# must define SRC DST CLASS\n\nfile(READ ${SRC} source_data)\n\n# replace\nstring(TOUPPER ${CLASS} CLASS_UPPER)\nstring(TOLOW"
  },
  {
    "path": "cmake/ncnn_generate_fma_source.cmake",
    "chars": 508,
    "preview": "\n# must define SRC DST CLASS\n\nfile(READ ${SRC} source_data)\n\n# replace\nstring(TOUPPER ${CLASS} CLASS_UPPER)\nstring(TOLOW"
  },
  {
    "path": "cmake/ncnn_generate_lasx_source.cmake",
    "chars": 547,
    "preview": "\n# must define SRC DST CLASS\n\nfile(READ ${SRC} source_data)\n\n# replace\nstring(TOUPPER ${CLASS} CLASS_UPPER)\nstring(TOLOW"
  },
  {
    "path": "cmake/ncnn_generate_lsx_source.cmake",
    "chars": 544,
    "preview": "\n# must define SRC DST CLASS\n\nfile(READ ${SRC} source_data)\n\n# replace\nstring(TOUPPER ${CLASS} CLASS_UPPER)\nstring(TOLOW"
  },
  {
    "path": "cmake/ncnn_generate_msa_source.cmake",
    "chars": 514,
    "preview": "\n# must define SRC DST CLASS\n\nfile(READ ${SRC} source_data)\n\n# replace\nstring(TOUPPER ${CLASS} CLASS_UPPER)\nstring(TOLOW"
  },
  {
    "path": "cmake/ncnn_generate_param_header.cmake",
    "chars": 922,
    "preview": "\n# must define PARAM_HEADER PARAM_SRC PARAM_SRC_NAME_WE\n\nfile(READ ${PARAM_SRC} param_data)\n\n# remove whitespace\nstring("
  },
  {
    "path": "cmake/ncnn_generate_rvv_source.cmake",
    "chars": 520,
    "preview": "\n# must define SRC DST CLASS\n\nfile(READ ${SRC} source_data)\n\n# replace\nstring(TOUPPER ${CLASS} CLASS_UPPER)\nstring(TOLOW"
  },
  {
    "path": "cmake/ncnn_generate_shader_comp_header.cmake",
    "chars": 1227,
    "preview": "\n# must define SHADER_COMP_HEADER SHADER_SRC\n\nfile(READ ${SHADER_SRC} comp_data)\n\n# skip leading comment\nstring(FIND \"${"
  },
  {
    "path": "cmake/ncnn_generate_xtheadvector_source.cmake",
    "chars": 547,
    "preview": "\n# must define SRC DST CLASS\n\nfile(READ ${SRC} source_data)\n\n# replace\nstring(TOUPPER ${CLASS} CLASS_UPPER)\nstring(TOLOW"
  },
  {
    "path": "cmake/run_test.cmake",
    "chars": 275,
    "preview": "\r\nexecute_process(COMMAND $ENV{TESTS_EXECUTABLE_LOADER} $ENV{TESTS_EXECUTABLE_LOADER_ARGUMENTS} ${TEST_EXECUTABLE} $ENV{"
  },
  {
    "path": "codeformat.sh",
    "chars": 775,
    "preview": "#!/usr/bin/env bash\n\n# we run clang-format and astyle twice to get stable format output\n\nformat_code() {\n    find src/ t"
  },
  {
    "path": "docs/Home.md",
    "chars": 3398,
    "preview": "### input data and extract output\n```cpp\n#include <opencv2/core/core.hpp>\n#include <opencv2/highgui/highgui.hpp>\n#includ"
  },
  {
    "path": "docs/application-with-ncnn-inside.md",
    "chars": 2766,
    "preview": "![](https://github.com/nihui/ncnn-assets/raw/master/20180626/com.azarlive.android.png) Azar-视频交友与聊天 June 20, 2018\n\n![](h"
  },
  {
    "path": "docs/benchmark/the-benchmark-of-caffe-android-lib,-mini-caffe,-and-ncnn.md",
    "chars": 3314,
    "preview": "caffe-android-lib https://github.com/sh1r0/caffe-android-lib\n\nmini-caffe https://github.com/luoyetx/mini-caffe\n\nopenblas"
  },
  {
    "path": "docs/benchmark/vulkan-conformance-test.md",
    "chars": 2395,
    "preview": "\n|device|gpu|api version|driver version|squeezenet|mobilenetssd|yolov3|\n|---|---|---|---|---|---|---|\n|intel-i7-7700|Int"
  },
  {
    "path": "docs/developer-guide/aarch64-mix-assembly-and-intrinsic.md",
    "chars": 1077,
    "preview": "```c\n// v寄存器全部使用 %.4s\n// 128-bit vreg matches %.4s\n// a += b * c\nfloat32x4_t _a = vld1q_f32(a);\nfloat32x4_t _b = vld1q_f"
  },
  {
    "path": "docs/developer-guide/add-custom-layer.zh.md",
    "chars": 2771,
    "preview": "# NCNN增加自定义层\n\n## 举例\n\n这里举个例子添加自定义层次 如Relu6,即 std::min(6.f, std::max(0.f, val))\n\n```\nInput            input   0 1 input\nCo"
  },
  {
    "path": "docs/developer-guide/arm-a53-a55-dual-issue.md",
    "chars": 3074,
    "preview": "## natural assembly\n* no register dependency, no penalty\n```\nld1     {v0.4s}, [r0], #16\nfmla    v10.4s, v16.4s, v24.s[0]"
  },
  {
    "path": "docs/developer-guide/armv7-mix-assembly-and-intrinsic.md",
    "chars": 2561,
    "preview": "```c\n// d寄存器全部使用 %P\n// d reg matches %P\n// a += b * c\nfloat32x2_t _a = vld1_f32(a);\nfloat32x2_t _b = vld1_f32(b);\nfloat3"
  },
  {
    "path": "docs/developer-guide/binaryop-broadcasting.md",
    "chars": 1904,
    "preview": "### broadcasting rule\n\nncnn BinaryOp accepts blobs with different shape\n\nC = BinaryOp(A, B)\n\nshape notation convention i"
  },
  {
    "path": "docs/developer-guide/build-ncnn-on-windows-xp.zh.md",
    "chars": 4231,
    "preview": "# Build ncnn on Windows XP\n\n> **Contributors:** [@Sugar-Baby](https://github.com/Sugar-Baby) and [@AtomAlpaca](https://g"
  },
  {
    "path": "docs/developer-guide/custom-allocator.md",
    "chars": 2127,
    "preview": "Mat structure is now allocator-aware via an extra allocator parameter with default zero value.\n\nThe good-old ncnn::fastM"
  },
  {
    "path": "docs/developer-guide/element-packing.md",
    "chars": 3002,
    "preview": "### what is packing and why\n\npacking is the form of storing multiple short-sized values as one long-sized value.\n\nelemen"
  },
  {
    "path": "docs/developer-guide/expression.md",
    "chars": 3423,
    "preview": "### expression\n\nexpression is used in the reshape slice parameter to express the dynamic shape or subscript value based "
  },
  {
    "path": "docs/developer-guide/glsl-extension.md",
    "chars": 12963,
    "preview": "# ncnn GLSL extension\n\n## rationale\nDifferent GPUs support different features, some support fp16 as buffer storage type,"
  },
  {
    "path": "docs/developer-guide/glsl-extension.zh.md",
    "chars": 8865,
    "preview": "# ncnn GLSL 扩展\n\n## 理由\n不同的 GPU 支持不同的功能,有的支持 fp16 作为缓冲存储类型,有的支持 fp16 作为操作数变量,有的老 GPU 只支持 fp32。\n\n当 GPU 支持 `VK_KHR_16bit_sto"
  },
  {
    "path": "docs/developer-guide/how-to-be-a-contributor.zh.md",
    "chars": 1984,
    "preview": "### 如何提交代码\n\n#### 一、fork 分支\n在浏览器中打开 [ncnn](https://github.com/tencent/ncnn), `fork` 到自己的 repositories,例如\n```\nhttps://gith"
  },
  {
    "path": "docs/developer-guide/how-to-implement-custom-layer-step-by-step.md",
    "chars": 8280,
    "preview": "# step1 create a new empty class\n```cpp\n// mylayer.h\n#include \"layer.h\"\nusing namespace ncnn;\n\n// a new layer type calle"
  },
  {
    "path": "docs/developer-guide/how-to-write-a-neon-optimized-op-kernel.md",
    "chars": 445,
    "preview": "# benchmark\nop\n\n# naive C with openmp\nfor for for\n\n# unroll, first try\nh\n\n# register allocation\nkernels\n\n# unroll, secon"
  },
  {
    "path": "docs/developer-guide/how-to-write-a-sse-optimized-op-kernel.zh.md",
    "chars": 9940,
    "preview": "# 如何使用SSE来优化算子核心\n\n## 一:准备\n\n### 1.背景资料\n\n​\tSSE 全称Intel® Streaming SIMD Extensions (Intel® SSE),本质是Intel公司封装汇编语句提供的底层操作指令函数"
  },
  {
    "path": "docs/developer-guide/kvcache.md",
    "chars": 17092,
    "preview": "# high-performance transformer inference with mha kv cache in ncnn\n\nThis document details the implementation and usage o"
  },
  {
    "path": "docs/developer-guide/layer-feat-mask.md",
    "chars": 4191,
    "preview": "# layer feature mask\n\nEach ncnn layer allows a special parameter pair `31=X` to control specific bahavior.\n\nX is an unsi"
  },
  {
    "path": "docs/developer-guide/layer-support-behavior.md",
    "chars": 11542,
    "preview": "# Understanding `support_XYZ` Properties in ncnn's `Layer` Class\n\nThis document is for developers implementing new layer"
  },
  {
    "path": "docs/developer-guide/low-level-operation-api.md",
    "chars": 7350,
    "preview": "# implement elementwise addition with/without broadcast using BinaryOp operation\n\n* input must be fp32 storage without p"
  },
  {
    "path": "docs/developer-guide/ncnn-tips-and-tricks.zh.md",
    "chars": 1372,
    "preview": "### blob内存是隐含共享的\n\nncnn的blob最初直接使用opencv的cv::Mat,后发现blob最多只支持三维,因此实现了类似的Mat\nMat的data每个通道内存16字节对齐,并且有原子的引用计数,a=b不复制数据,超级快\n"
  },
  {
    "path": "docs/developer-guide/new-model-load-api.md",
    "chars": 5258,
    "preview": "## current model load api\n### Cons\n#### long and awful code\n#### two functions\n#### deal float32 float16 quantized-u8\n##"
  },
  {
    "path": "docs/developer-guide/new-param-load-api.md",
    "chars": 1546,
    "preview": "## current param load api\n### Cons\n#### long and awful code\n#### three functions\n#### not extensible\n#### no default val"
  },
  {
    "path": "docs/developer-guide/operation-param-weight-table.md",
    "chars": 6456,
    "preview": "\n|operation|param id|param phase|default value|weight order|\n|:---:|:---:|:---:|:---:|:---:|\n|AbsVal|||\n|ArgMax|0|out_ma"
  },
  {
    "path": "docs/developer-guide/operators.md",
    "chars": 76829,
    "preview": "\n* [AbsVal](#absval)\n* [ArgMax](#argmax)\n* [BatchNorm](#batchnorm)\n* [Bias](#bias)\n* [BinaryOp](#binaryop)\n* [BNLL](#bnl"
  },
  {
    "path": "docs/developer-guide/param-and-model-file-structure.md",
    "chars": 2751,
    "preview": "## net.param\n### example\n```\n7767517\n3 3\nInput         input    0 1 data 0=4 1=4 2=1\nInnerProduct  ip       1 1 data fc "
  },
  {
    "path": "docs/developer-guide/preload-practice.zh.md",
    "chars": 661,
    "preview": "## 只是实践经验,没有理论,不一定正确\n\n```\nprfm pldl1keep, [x0, #256]\n```\n* 放在 ld1 [x0] 前面 0~8 条指令\n* #256 表示把 x0+256 的内容放进 L1 cache\n* ldp"
  },
  {
    "path": "docs/developer-guide/tensorflow-op-combination.md",
    "chars": 2106,
    "preview": "## batchnorm\n```\nInput       A            0 1 A 0 0 0\nMemoryData  sub/y        0 1 sub/y 16 0 0\nBinaryOp    sub         "
  },
  {
    "path": "docs/developer-guide/vulkan-driver-loader.md",
    "chars": 3916,
    "preview": "# ncnn vulkan driver loader\n\nncnn turns on the ```NCNN_SIMPLEVK``` cmake option by default, when ```NCNN_VULKAN``` is en"
  },
  {
    "path": "docs/faq.en.md",
    "chars": 8724,
    "preview": "\n\n# How to join the technical Community Groups with QQ  ?\n\n- Open QQ -> click the group chat search-> search group numbe"
  },
  {
    "path": "docs/faq.md",
    "chars": 18685,
    "preview": "\n\n# 如何加入技术交流QQ群?\n\n- 打开QQ→点击群聊搜索→搜索群号637093648→输入问题答案:卷卷卷卷卷→进入群聊→准备接受图灵测试(bushi)\n- 前往QQ搜索Pocky群:677104663(超多大佬),问题答案:mult"
  },
  {
    "path": "docs/how-to-build/build-mlir2ncnn.md",
    "chars": 1208,
    "preview": "# mlir2ncnn\n\n## Compile\n\n**Clone LLVM**\n```bash\nhttps://github.com/llvm/llvm-project.git\ngit checkout -b mlir <a_working"
  },
  {
    "path": "docs/how-to-build/how-to-build.md",
    "chars": 37308,
    "preview": "### Git clone ncnn repo with submodule\n\n```\ngit clone https://github.com/Tencent/ncnn.git\ncd ncnn\ngit submodule update -"
  },
  {
    "path": "docs/how-to-use-and-FAQ/FAQ-ncnn-produce-wrong-result.md",
    "chars": 6341,
    "preview": "### caffemodel should be row-major\n\n`caffe2ncnn` tool assumes the caffemodel is row-major (produced by c++ caffe train c"
  },
  {
    "path": "docs/how-to-use-and-FAQ/FAQ-ncnn-protobuf-problem.zh.md",
    "chars": 1878,
    "preview": "# Protobuf 类问题解决方法\n\n## 问题分析\n\nprotobuf 有关的报错,一般都是两个原因:\n\n1. 需要的 pb 没安装/`FindProtobuf.cmake`不存在,最终 `find_package` 失败\n2. 系统不"
  },
  {
    "path": "docs/how-to-use-and-FAQ/FAQ-ncnn-throw-error.md",
    "chars": 4368,
    "preview": "### param is too old, please regenerate\n\nYour model file is being the old format converted by an old caffe2ncnn tool.\n\nC"
  },
  {
    "path": "docs/how-to-use-and-FAQ/FAQ-ncnn-vulkan.md",
    "chars": 6648,
    "preview": "### how to enable ncnn vulkan capability\n\nfollow [the build and install instruction](https://github.com/Tencent/ncnn/blo"
  },
  {
    "path": "docs/how-to-use-and-FAQ/build-minimal-library.md",
    "chars": 4547,
    "preview": "For some reason, if you're not happy with the binary size of the ncnn library, then here is the cheatsheet that helps yo"
  },
  {
    "path": "docs/how-to-use-and-FAQ/efficient-roi-resize-rotate.md",
    "chars": 4679,
    "preview": "\n### image roi crop + convert to ncnn::Mat\n\n```\n+--------------+\n|   y          |           /-------/\n| x +-------+  |  "
  },
  {
    "path": "docs/how-to-use-and-FAQ/ncnn-load-model.md",
    "chars": 1546,
    "preview": "### the comprehensive model loading api table\n\n|load from|alexnet.param|alexnet.param.bin|alexnet.bin|\n|---|---|---|---|"
  },
  {
    "path": "docs/how-to-use-and-FAQ/openmp-best-practice.md",
    "chars": 4162,
    "preview": "ncnn openmp best practice\r\n\r\n### CPU loadaverage is too high with ncnn.\r\n\r\n   When inference the neural network with ncn"
  },
  {
    "path": "docs/how-to-use-and-FAQ/openmp-best-practice.zh.md",
    "chars": 1803,
    "preview": "ncnn openmp 最佳实践\r\n\r\n### ncnn占用过多cpu资源\r\n\r\n   使用ncnn推理运算,cpu占用非常高甚至所有核心占用都接近100%。\r\n\r\n   如果还有其它线程或进程需要较多的cpu资源,运行速度下降严重。\r\n\r"
  },
  {
    "path": "docs/how-to-use-and-FAQ/quantized-int8-inference.md",
    "chars": 3718,
    "preview": "# Post Training Quantization Tools\n\nTo support int8 model deployment on mobile devices,we provide the universal post tra"
  },
  {
    "path": "docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md",
    "chars": 4630,
    "preview": "We use alexnet as an example\n\n### prepare caffe prototxt and model\n\nThese files will usually generated when trained with"
  },
  {
    "path": "docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.zh.md",
    "chars": 3477,
    "preview": "首先,非常感谢大家对 ncnn 组件的关注\n为了方便大家使用 ncnn 组件,up主特意写了这篇使用指北,以烂大街的 alexnet 作为例子\n\n\n### 准备caffe网络和模型\n\ncaffe 的网络和模型通常是搞深度学习的研究者训练出来"
  },
  {
    "path": "docs/how-to-use-and-FAQ/use-ncnn-with-opencv.md",
    "chars": 3649,
    "preview": "### opencv to ncnn\n\n* cv::Mat CV_8UC3 -> ncnn::Mat 3 channel + swap RGB/BGR\n\n```cpp\n// cv::Mat a(h, w, CV_8UC3);\nncnn::M"
  },
  {
    "path": "docs/how-to-use-and-FAQ/use-ncnn-with-own-project.md",
    "chars": 1711,
    "preview": "### use ncnn with own project\n\nAfter building ncnn, there is one or more library files generated. Consider integrating n"
  },
  {
    "path": "docs/how-to-use-and-FAQ/use-ncnn-with-pytorch-or-onnx.md",
    "chars": 5366,
    "preview": "# A Guide to Converting pytorch / onnx Models to ncnn\n\nThis guide is designed to help pytorch and onnx users use the new"
  },
  {
    "path": "docs/how-to-use-and-FAQ/use-ncnnoptimize-to-optimize-model.md",
    "chars": 599,
    "preview": "\nthe typical usage\n```\nncnnoptimize mobilenet.param mobilenet.bin mobilenet-opt.param mobilenet-opt.bin 65536 \n```\n\noper"
  },
  {
    "path": "docs/how-to-use-and-FAQ/vulkan-notes.md",
    "chars": 3701,
    "preview": "## supported platform\n\n* Y = known work\n* ? = shall work, not confirmed\n* / = not applied\n\n|    |windows|linux|android|m"
  },
  {
    "path": "examples/CMakeLists.txt",
    "chars": 3399,
    "preview": "macro(ncnn_add_example name)\n    add_executable(${name} ${name}.cpp)\n    if(OpenCV_FOUND)\n        target_include_directo"
  },
  {
    "path": "examples/arcface.cpp",
    "chars": 19195,
    "preview": "// Copyright 2025 heabeounMKTO\n// SPDX-License-Identifier: BSD-3-Clause\n/* ncnn example using yolo-face and arcface to e"
  },
  {
    "path": "examples/fasterrcnn.cpp",
    "chars": 9741,
    "preview": "// Copyright 2018 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#include <math.h>\n#if defined(USE_"
  },
  {
    "path": "examples/mobilenetssd.cpp",
    "chars": 4207,
    "preview": "// Copyright 2017 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#if defined(USE_NCNN_SIMPLEOCV)\n#i"
  },
  {
    "path": "examples/mobilenetv2ssdlite.cpp",
    "chars": 4373,
    "preview": "// Copyright 2018 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#if defined(USE_NCNN_SIMPLEOCV)\n#i"
  },
  {
    "path": "examples/mobilenetv3ssdlite.cpp",
    "chars": 4792,
    "preview": "// Copyright 2018 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n#include \"platform.h\"\n\n#if defined("
  },
  {
    "path": "examples/nanodet.cpp",
    "chars": 11956,
    "preview": "// Copyright 2020 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#if defined(USE_NCNN_SIMPLEOCV)\n#i"
  },
  {
    "path": "examples/nanodetplus_pnnx.cpp",
    "chars": 12173,
    "preview": "// Copyright 2020 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#if defined(USE_NCNN_SIMPLEOCV)\n#i"
  },
  {
    "path": "examples/p2pnet.cpp",
    "chars": 6828,
    "preview": "// Copyright 2021 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n#if defined(USE_NCNN_SIMPLEOCV)\n#in"
  },
  {
    "path": "examples/peleenetssd_seg.cpp",
    "chars": 5717,
    "preview": "// Copyright 2017 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#if defined(USE_NCNN_SIMPLEOCV)\n#i"
  },
  {
    "path": "examples/piper.cpp",
    "chars": 23449,
    "preview": "// Copyright 2025 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n// convert piper checkpoints to ncnn models\n//  1. c"
  },
  {
    "path": "examples/ppocrv5.cpp",
    "chars": 16192,
    "preview": "// Copyright 2025 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n// pip install paddlepaddle==3.0.0\n// pip install pa"
  },
  {
    "path": "examples/ppocrv5_dict.h",
    "chars": 165655,
    "preview": "// Copyright 2025 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\nstatic const char* character_dict[] = {\n    \" \",\n   "
  },
  {
    "path": "examples/retinaface.cpp",
    "chars": 13345,
    "preview": "// Copyright 2019 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#if defined(USE_NCNN_SIMPLEOCV)\n#i"
  },
  {
    "path": "examples/rfcn.cpp",
    "chars": 9512,
    "preview": "// Copyright 2018 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#include <math.h>\n#if defined(USE_"
  },
  {
    "path": "examples/rvm.cpp",
    "chars": 9940,
    "preview": "// Copyright 2025 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n// ncnn model exported from https://github.com/Peter"
  },
  {
    "path": "examples/scrfd.cpp",
    "chars": 12243,
    "preview": "// Copyright 2021 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#if defined(USE_NCNN_SIMPLEOCV)\n#i"
  },
  {
    "path": "examples/scrfd_crowdhuman.cpp",
    "chars": 13444,
    "preview": "// Copyright 2021 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#if defined(USE_NCNN_SIMPLEOCV)\n#i"
  },
  {
    "path": "examples/shufflenetv2.cpp",
    "chars": 2785,
    "preview": "// Copyright 2018 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#include <algorithm>\n#if defined(U"
  },
  {
    "path": "examples/simplepose.cpp",
    "chars": 4130,
    "preview": "// Copyright 2019 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#include <algorithm>\n#if defined(U"
  },
  {
    "path": "examples/squeezencnn/README.md",
    "chars": 106,
    "preview": "The squeezenet android example project has been moved to https://github.com/nihui/ncnn-android-squeezenet\n"
  },
  {
    "path": "examples/squeezenet.cpp",
    "chars": 2258,
    "preview": "// Copyright 2017 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"net.h\"\n\n#include <algorithm>\n#if defined(U"
  },
  {
    "path": "examples/squeezenet_c_api.cpp",
    "chars": 2726,
    "preview": "// Copyright 2020 Tencent\n// SPDX-License-Identifier: BSD-3-Clause\n\n#include \"c_api.h\"\n\n#include <algorithm>\n#if defined"
  }
]

// ... and 3605 more files (download for full content)

About this extraction

This page contains the full source code of the Tencent/ncnn GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 3805 files (33.0 MB), approximately 4.2M tokens, and a symbol index with 2590 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!