Repository: apache/mxnet Branch: master Commit: b84609d3fc73 Files: 2643 Total size: 28.3 MB Directory structure: gitextract_zlms863u/ ├── .asf.yaml ├── .clang-format ├── .clang-tidy ├── .cmakelintrc ├── .codecov.yml ├── .git-blame-ignore-revs ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── config.yml │ │ ├── feature_request.md │ │ ├── flaky_test.md │ │ └── rfc.md │ ├── PULL_REQUEST_TEMPLATE.md │ └── workflows/ │ ├── greetings.yml │ ├── license_check.yml │ ├── link_check.yml │ ├── os_x_mklbuild.yml │ └── os_x_staticbuild.yml ├── .gitignore ├── .gitmodules ├── .licenserc.yaml ├── .mxnet_root ├── 3rdparty/ │ ├── ctc_include/ │ │ ├── LICENSE │ │ ├── contrib/ │ │ │ └── moderngpu/ │ │ │ ├── LICENSE │ │ │ └── include/ │ │ │ ├── device/ │ │ │ │ ├── ctaloadbalance.cuh │ │ │ │ ├── ctamerge.cuh │ │ │ │ ├── ctascan.cuh │ │ │ │ ├── ctasearch.cuh │ │ │ │ ├── ctasegreduce.cuh │ │ │ │ ├── ctasegscan.cuh │ │ │ │ ├── ctasegsort.cuh │ │ │ │ ├── ctasortedsearch.cuh │ │ │ │ ├── devicetypes.cuh │ │ │ │ ├── deviceutil.cuh │ │ │ │ ├── intrinsics.cuh │ │ │ │ ├── loadstore.cuh │ │ │ │ ├── serialsets.cuh │ │ │ │ └── sortnetwork.cuh │ │ │ ├── mgpudevice.cuh │ │ │ ├── mgpuenums.h │ │ │ └── util/ │ │ │ └── static.h │ │ └── detail/ │ │ ├── cpu_ctc.h │ │ ├── ctc_helper.h │ │ ├── gpu_ctc.h │ │ ├── gpu_ctc_kernels.h │ │ └── hostdevice.h │ ├── miniz/ │ │ ├── miniz.c │ │ └── miniz.h │ └── mshadow/ │ ├── .gitignore │ ├── .travis.yml │ ├── CHANGES.md │ ├── CMakeLists.txt │ ├── LICENSE │ ├── README.md │ ├── cmake/ │ │ └── AutoDetectF16C.cmake │ ├── doc/ │ │ ├── Doxyfile │ │ ├── README.md │ │ └── mkdoc.sh │ ├── guide/ │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── basic.cpp │ │ ├── basic_stream.cu │ │ ├── defop.cpp │ │ ├── exp-template/ │ │ │ ├── .gitignore │ │ │ ├── Makefile │ │ │ └── README.md │ │ ├── mshadow-ps/ │ │ │ ├── .gitignore │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── dbstr.h │ │ │ ├── dist_async_sum-inl.h │ │ │ ├── dist_async_sum.cpp │ │ │ ├── local.sh │ │ │ ├── local_sum-inl.h │ │ │ ├── local_sum.cpp │ │ │ └── local_sum.cu │ │ └── neuralnet/ │ │ ├── Makefile │ │ ├── README.md │ │ ├── convnet.cu │ │ ├── nnet.cu │ │ ├── nnet_ps.cu │ │ └── util.h │ ├── make/ │ │ ├── README.md │ │ └── mshadow.mk │ ├── mshadow/ │ │ ├── README.md │ │ ├── base.h │ │ ├── bfloat.h │ │ ├── cuda/ │ │ │ ├── reduce.cuh │ │ │ └── tensor_gpu-inl.cuh │ │ ├── dot_engine-inl.h │ │ ├── expr_engine-inl.h │ │ ├── expr_scalar-inl.h │ │ ├── expression.h │ │ ├── extension/ │ │ │ ├── broadcast.h │ │ │ ├── broadcast_with_axis.h │ │ │ ├── channel_pool.h │ │ │ ├── channel_unpool.h │ │ │ ├── choose.h │ │ │ ├── complex.h │ │ │ ├── concat.h │ │ │ ├── crop.h │ │ │ ├── fill.h │ │ │ ├── flip.h │ │ │ ├── implicit_gemm.h │ │ │ ├── mask.h │ │ │ ├── mirror.h │ │ │ ├── one_hot.h │ │ │ ├── pack_col2patch.h │ │ │ ├── pad.h │ │ │ ├── range.h │ │ │ ├── reduce_with_axis.h │ │ │ ├── reduceto1d.h │ │ │ ├── reshape.h │ │ │ ├── slice.h │ │ │ ├── slice_ex.h │ │ │ ├── spatial_pool.h │ │ │ ├── spatial_unpool.h │ │ │ ├── spatial_upsampling_nearest.h │ │ │ ├── swapaxis.h │ │ │ ├── take.h │ │ │ ├── take_grad.h │ │ │ ├── transpose.h │ │ │ └── unpack_patch2col.h │ │ ├── extension.h │ │ ├── half.h │ │ ├── io.h │ │ ├── packet/ │ │ │ ├── plain-inl.h │ │ │ └── sse-inl.h │ │ ├── packet-inl.h │ │ ├── random.h │ │ ├── stream_gpu-inl.h │ │ ├── tensor.h │ │ ├── tensor_container.h │ │ ├── tensor_cpu-inl.h │ │ └── tensor_gpu-inl.h │ ├── mshadow-ps/ │ │ ├── .gitignore │ │ ├── README.md │ │ ├── mshadow_ps.h │ │ ├── ps_dist-inl.h │ │ ├── ps_local-inl.h │ │ ├── ps_rabit-inl.h │ │ ├── thread.h │ │ └── thread_util.h │ ├── scripts/ │ │ └── travis_script.sh │ └── test/ │ ├── Makefile │ ├── pairtest.cu │ ├── pool.cu │ ├── reshape.cu │ ├── test.cu │ ├── test.h │ └── unpack.cu ├── CMakeLists.txt ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTORS.md ├── DNNL_README.md ├── LICENSE ├── NEWS.md ├── NOTICE ├── README.md ├── SECURITY.md ├── benchmark/ │ ├── __init__.py │ ├── opperf/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── custom_operations/ │ │ │ ├── __init__.py │ │ │ └── custom_operations.py │ │ ├── nd_operations/ │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── array_manipulation_operators.py │ │ │ ├── array_rearrange.py │ │ │ ├── binary_operators.py │ │ │ ├── gemm_operators.py │ │ │ ├── indexing_routines.py │ │ │ ├── linalg_operators.py │ │ │ ├── misc_operators.py │ │ │ ├── nn_activation_operators.py │ │ │ ├── nn_basic_operators.py │ │ │ ├── nn_conv_operators.py │ │ │ ├── nn_loss_operators.py │ │ │ ├── nn_optimizer_operators.py │ │ │ ├── random_sampling_operators.py │ │ │ ├── reduction_operators.py │ │ │ ├── sorting_searching_operators.py │ │ │ └── unary_operators.py │ │ ├── opperf.py │ │ ├── results/ │ │ │ ├── mxnet_operator_benchmark_results_cpu.md │ │ │ └── mxnet_operator_benchmark_results_gpu.md │ │ ├── rules/ │ │ │ ├── __init__.py │ │ │ └── default_params.py │ │ └── utils/ │ │ ├── __init__.py │ │ ├── benchmark_operators_pytest.py │ │ ├── benchmark_utils.py │ │ ├── common_utils.py │ │ ├── ndarray_utils.py │ │ ├── op_registry_utils.py │ │ └── profiler_utils.py │ └── python/ │ ├── control_flow/ │ │ └── rnn.py │ ├── dnnl/ │ │ ├── fc_add.py │ │ ├── run.sh │ │ └── run_per_thread.sh │ ├── einsum/ │ │ └── benchmark_einsum.py │ ├── ffi/ │ │ └── benchmark_ffi.py │ ├── metric/ │ │ └── benchmark_metric.py │ ├── quantization/ │ │ └── benchmark_op.py │ ├── sparse/ │ │ ├── cast_storage.py │ │ ├── dot.py │ │ ├── memory_benchmark.py │ │ ├── sparse_op.py │ │ ├── updater.py │ │ └── util.py │ └── tvmop/ │ └── benchmark_tvmop.py ├── cd/ │ ├── Jenkinsfile_cd_pipeline │ ├── Jenkinsfile_release_job │ ├── Jenkinsfile_utils.groovy │ ├── README.md │ ├── mxnet_lib/ │ │ ├── Jenkins_pipeline.groovy │ │ └── mxnet_lib_pipeline.groovy │ ├── python/ │ │ ├── docker/ │ │ │ ├── Dockerfile │ │ │ ├── Dockerfile.test │ │ │ ├── Jenkins_pipeline.groovy │ │ │ ├── python_images.sh │ │ │ └── test_python_image.sh │ │ └── pypi/ │ │ ├── Jenkins_pipeline.groovy │ │ ├── README.md │ │ ├── pypi_package.sh │ │ └── pypi_publish.py │ └── utils/ │ ├── artifact_repository.md │ ├── artifact_repository.py │ ├── docker_tag.sh │ ├── mxnet_base_image.sh │ └── test_artifact_repository.py ├── ci/ │ ├── Jenkinsfile_docker_cache │ ├── Jenkinsfile_utils.groovy │ ├── README.md │ ├── __init__.py │ ├── build.py │ ├── build_windows.py │ ├── dev_menu.py │ ├── docker/ │ │ ├── Dockerfile.build.android │ │ ├── Dockerfile.build.arm │ │ ├── Dockerfile.build.centos7 │ │ ├── Dockerfile.build.jetson │ │ ├── Dockerfile.build.ubuntu │ │ ├── Dockerfile.build.ubuntu_cpu_jekyll │ │ ├── Dockerfile.publish.test.centos7 │ │ ├── Dockerfile.test.arm │ │ ├── docker-compose.yml │ │ ├── install/ │ │ │ ├── deb_ubuntu_ccache.sh │ │ │ ├── docker_filepermissions.sh │ │ │ ├── requirements │ │ │ └── ubuntu_adduser.sh │ │ ├── runtime_functions.sh │ │ └── toolchains/ │ │ ├── aarch64-linux-gnu-toolchain.cmake │ │ └── arm-linux-gnueabihf-toolchain.cmake │ ├── docker_login.py │ ├── jenkins/ │ │ ├── Jenkins_steps.groovy │ │ ├── Jenkinsfile_centos_cpu │ │ ├── Jenkinsfile_centos_gpu │ │ ├── Jenkinsfile_clang │ │ ├── Jenkinsfile_edge │ │ ├── Jenkinsfile_full │ │ ├── Jenkinsfile_miscellaneous │ │ ├── Jenkinsfile_sanity │ │ ├── Jenkinsfile_tools │ │ ├── Jenkinsfile_unix_cpu │ │ ├── Jenkinsfile_unix_gpu │ │ ├── Jenkinsfile_website_beta │ │ ├── Jenkinsfile_website_full │ │ ├── Jenkinsfile_website_full_pr │ │ ├── Jenkinsfile_website_jekyll_docs │ │ ├── Jenkinsfile_website_mxnet_build │ │ ├── Jenkinsfile_website_nightly │ │ ├── Jenkinsfile_website_python_docs │ │ ├── Jenkinsfile_website_version_artifacts │ │ ├── Jenkinsfile_windows_cpu │ │ └── Jenkinsfile_windows_gpu │ ├── logging.conf │ ├── other/ │ │ └── ci_deploy_doc.sh │ ├── publish/ │ │ ├── Jenkinsfile │ │ ├── README.md │ │ ├── python/ │ │ │ └── build.sh │ │ ├── scala/ │ │ │ ├── build.sh │ │ │ ├── buildkey.py │ │ │ ├── deploy.sh │ │ │ ├── fullDeploy.sh │ │ │ └── test.sh │ │ └── website/ │ │ ├── README.md │ │ ├── beta-deploy.sh │ │ ├── deploy.sh │ │ └── publish_artifacts.sh │ ├── test_docker_login.py │ ├── util.py │ └── windows/ │ ├── test_py3_cpu.ps1 │ └── test_py3_gpu.ps1 ├── cmake/ │ ├── BuildCythonModules.cmake │ ├── BuildTVM.cmake │ ├── ChooseBlas.cmake │ ├── Modules/ │ │ ├── FindAccelerate.cmake │ │ ├── FindAtlas.cmake │ │ ├── FindCUDNN.cmake │ │ ├── FindCUTENSOR.cmake │ │ ├── FindGperftools.cmake │ │ ├── FindJeMalloc.cmake │ │ ├── FindNCCL.cmake │ │ ├── FindNVML.cmake │ │ ├── FindNVTX.cmake │ │ └── FindOpenBLAS.cmake │ ├── Utils.cmake │ ├── libmxnet.sym │ └── upstream/ │ ├── FindBLAS.cmake │ ├── FindCUDAToolkit.cmake │ └── select_compute_arch.cmake ├── config/ │ ├── darwin.cmake │ ├── distribution/ │ │ ├── darwin_cpu.cmake │ │ ├── darwin_cpu_mkl.cmake │ │ ├── darwin_native.cmake │ │ ├── linux_cpu.cmake │ │ ├── linux_cpu_mkl.cmake │ │ ├── linux_cu100.cmake │ │ ├── linux_cu101.cmake │ │ ├── linux_cu102.cmake │ │ ├── linux_cu110.cmake │ │ ├── linux_cu112.cmake │ │ ├── linux_cu92.cmake │ │ └── linux_native.cmake │ ├── linux.cmake │ └── linux_gpu.cmake ├── conftest.py ├── contrib/ │ └── tvmop/ │ ├── __init__.py │ ├── basic/ │ │ ├── __init__.py │ │ └── ufunc.py │ ├── compile.py │ ├── core/ │ │ ├── __init__.py │ │ ├── fromnumeric.py │ │ ├── multiarray.py │ │ └── umath.py │ ├── opdef.py │ ├── space.py │ └── utils.py ├── cpp-package/ │ ├── CMakeLists.txt │ ├── README.md │ ├── example/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── alexnet.cpp │ │ ├── charRNN.cpp │ │ ├── feature_extract/ │ │ │ ├── README.md │ │ │ ├── feature_extract.cpp │ │ │ ├── prepare_data_with_opencv.cpp │ │ │ └── run.sh │ │ ├── get_data.sh │ │ ├── googlenet.cpp │ │ ├── inception_bn.cpp │ │ ├── inference/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── imagenet_inference.cpp │ │ │ ├── multi_threaded_inference/ │ │ │ │ ├── get_model.py │ │ │ │ ├── multi_threaded_inference.cc │ │ │ │ └── unit_test_multi_threaded_inference.sh │ │ │ ├── sentiment_analysis_rnn.cpp │ │ │ ├── unit_test_imagenet_inference.sh │ │ │ └── unit_test_sentiment_analysis_rnn.sh │ │ ├── lenet.cpp │ │ ├── lenet_with_mxdataiter.cpp │ │ ├── mlp.cpp │ │ ├── mlp_cpu.cpp │ │ ├── mlp_csv.cpp │ │ ├── mlp_gpu.cpp │ │ ├── mnist_to_csv.py │ │ ├── resnet.cpp │ │ ├── run_lenet_with_mxdataiter.sh │ │ ├── test_kvstore.cpp │ │ ├── test_ndarray_copy.cpp │ │ ├── test_optimizer.cpp │ │ ├── test_regress_label.cpp │ │ ├── test_score.cpp │ │ ├── unittests/ │ │ │ └── unit_test_mlp_csv.sh │ │ └── utils.h │ ├── include/ │ │ └── mxnet-cpp/ │ │ ├── .gitignore │ │ ├── CPPLINT.cfg │ │ ├── MxNetCpp.h │ │ ├── base.h │ │ ├── contrib.h │ │ ├── executor.h │ │ ├── executor.hpp │ │ ├── initializer.h │ │ ├── io.h │ │ ├── io.hpp │ │ ├── kvstore.h │ │ ├── kvstore.hpp │ │ ├── lr_scheduler.h │ │ ├── metric.h │ │ ├── model.h │ │ ├── ndarray.h │ │ ├── ndarray.hpp │ │ ├── op_map.h │ │ ├── op_suppl.h │ │ ├── op_util.h │ │ ├── operator.h │ │ ├── operator.hpp │ │ ├── optimizer.h │ │ ├── optimizer.hpp │ │ ├── shape.h │ │ ├── symbol.h │ │ └── symbol.hpp │ ├── scripts/ │ │ ├── OpWrapperGenerator.py │ │ └── lint.py │ └── tests/ │ └── ci_test.sh ├── doap.rdf ├── docker/ │ ├── .gitignore │ ├── Dockerfiles/ │ │ ├── Dockerfile.in.julia │ │ ├── Dockerfile.in.lib.cpu │ │ ├── Dockerfile.in.lib.gpu │ │ ├── Dockerfile.in.perl │ │ ├── Dockerfile.in.python │ │ ├── Dockerfile.in.r-lang │ │ └── Dockerfile.in.scala │ ├── README.md │ ├── docker-python/ │ │ ├── README.md │ │ ├── build_python_dockerfile.sh │ │ └── test_mxnet.py │ ├── install/ │ │ ├── cpp.sh │ │ ├── julia.sh │ │ ├── perl.sh │ │ ├── python.sh │ │ ├── r.sh │ │ └── scala.sh │ ├── run.sh │ └── tool.sh ├── docs/ │ ├── .dockerignore │ ├── .gitignore │ ├── README.md │ ├── cpp_docs/ │ │ ├── Doxyfile │ │ └── Makefile │ ├── python_docs/ │ │ ├── README.md │ │ ├── _static/ │ │ │ ├── autodoc.js │ │ │ ├── feedback.css │ │ │ ├── matomo_analytics.js │ │ │ └── mxnet.css │ │ ├── python/ │ │ │ ├── .gitignore │ │ │ ├── Makefile │ │ │ ├── Makefile_sphinx │ │ │ ├── api/ │ │ │ │ ├── autograd/ │ │ │ │ │ └── index.rst │ │ │ │ ├── contrib/ │ │ │ │ │ ├── index.rst │ │ │ │ │ ├── io/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── ndarray/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── onnx/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── quantization/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── symbol/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── tensorboard/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── tensorrt/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ └── text/ │ │ │ │ │ └── index.rst │ │ │ │ ├── device/ │ │ │ │ │ └── index.rst │ │ │ │ ├── engine/ │ │ │ │ │ └── index.rst │ │ │ │ ├── executor/ │ │ │ │ │ └── index.rst │ │ │ │ ├── gluon/ │ │ │ │ │ ├── block.rst │ │ │ │ │ ├── constant.rst │ │ │ │ │ ├── contrib/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── hybrid_block.rst │ │ │ │ │ ├── index.rst │ │ │ │ │ ├── loss/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── metric/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── model_zoo/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── nn/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── parameter.rst │ │ │ │ │ ├── rnn/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── symbol_block.rst │ │ │ │ │ ├── trainer.rst │ │ │ │ │ └── utils/ │ │ │ │ │ └── index.rst │ │ │ │ ├── index.rst │ │ │ │ ├── initializer/ │ │ │ │ │ └── index.rst │ │ │ │ ├── kvstore/ │ │ │ │ │ └── index.rst │ │ │ │ ├── kvstore_server/ │ │ │ │ │ └── index.rst │ │ │ │ ├── legacy/ │ │ │ │ │ ├── callback/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── image/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── index.rst │ │ │ │ │ ├── io/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── ndarray/ │ │ │ │ │ │ ├── contrib/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── image/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── index.rst │ │ │ │ │ │ ├── linalg/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── ndarray.rst │ │ │ │ │ │ ├── op/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── random/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── register/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── sparse/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ └── utils/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── recordio/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── symbol/ │ │ │ │ │ │ ├── contrib/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── image/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── index.rst │ │ │ │ │ │ ├── linalg/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── op/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── random/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── register/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ ├── sparse/ │ │ │ │ │ │ │ └── index.rst │ │ │ │ │ │ └── symbol.rst │ │ │ │ │ └── visualization/ │ │ │ │ │ └── index.rst │ │ │ │ ├── lr_scheduler/ │ │ │ │ │ └── index.rst │ │ │ │ ├── np/ │ │ │ │ │ ├── arrays.indexing.rst │ │ │ │ │ ├── arrays.ndarray.rst │ │ │ │ │ ├── arrays.rst │ │ │ │ │ ├── index.rst │ │ │ │ │ ├── random/ │ │ │ │ │ │ └── index.rst │ │ │ │ │ ├── routines.array-creation.rst │ │ │ │ │ ├── routines.array-manipulation.rst │ │ │ │ │ ├── routines.io.rst │ │ │ │ │ ├── routines.linalg.rst │ │ │ │ │ ├── routines.math.rst │ │ │ │ │ ├── routines.rst │ │ │ │ │ ├── routines.sort.rst │ │ │ │ │ └── routines.statistics.rst │ │ │ │ ├── npx/ │ │ │ │ │ └── index.rst │ │ │ │ ├── optimizer/ │ │ │ │ │ └── index.rst │ │ │ │ ├── profiler/ │ │ │ │ │ └── index.rst │ │ │ │ ├── rtc/ │ │ │ │ │ └── index.rst │ │ │ │ ├── runtime/ │ │ │ │ │ └── index.rst │ │ │ │ ├── test_utils/ │ │ │ │ │ └── index.rst │ │ │ │ └── util/ │ │ │ │ └── index.rst │ │ │ ├── index.rst │ │ │ ├── scripts/ │ │ │ │ ├── conf.py │ │ │ │ ├── md2ipynb.py │ │ │ │ └── process_rst.py │ │ │ └── tutorials/ │ │ │ ├── deploy/ │ │ │ │ ├── export/ │ │ │ │ │ ├── index.rst │ │ │ │ │ └── onnx.md │ │ │ │ ├── index.rst │ │ │ │ ├── inference/ │ │ │ │ │ ├── cpp.rst │ │ │ │ │ ├── image_classification_jetson.md │ │ │ │ │ └── index.rst │ │ │ │ └── run-on-aws/ │ │ │ │ ├── cloud.md │ │ │ │ ├── index.rst │ │ │ │ ├── use_ec2.rst │ │ │ │ └── use_sagemaker.rst │ │ │ ├── extend/ │ │ │ │ ├── customop.md │ │ │ │ └── index.rst │ │ │ ├── getting-started/ │ │ │ │ ├── crash-course/ │ │ │ │ │ ├── 0-introduction.md │ │ │ │ │ ├── 1-nparray.md │ │ │ │ │ ├── 2-create-nn.md │ │ │ │ │ ├── 3-autograd.md │ │ │ │ │ ├── 4-components.md │ │ │ │ │ ├── 5-datasets.md │ │ │ │ │ ├── 6-train-nn.md │ │ │ │ │ ├── 7-use-gpus.md │ │ │ │ │ ├── index.rst │ │ │ │ │ └── prepare_dataset.py │ │ │ │ ├── gluon_from_experiment_to_deployment.md │ │ │ │ ├── gluon_migration_guide.md │ │ │ │ ├── index.rst │ │ │ │ ├── logistic_regression_explained.md │ │ │ │ └── to-mxnet/ │ │ │ │ ├── index.rst │ │ │ │ └── pytorch.md │ │ │ ├── index.rst │ │ │ ├── packages/ │ │ │ │ ├── autograd/ │ │ │ │ │ └── index.md │ │ │ │ ├── gluon/ │ │ │ │ │ ├── blocks/ │ │ │ │ │ │ ├── activations/ │ │ │ │ │ │ │ └── activations.md │ │ │ │ │ │ ├── custom-layer.md │ │ │ │ │ │ ├── hybridize.md │ │ │ │ │ │ ├── index.rst │ │ │ │ │ │ ├── init.md │ │ │ │ │ │ ├── naming.md │ │ │ │ │ │ ├── nn.md │ │ │ │ │ │ ├── parameters.md │ │ │ │ │ │ └── save_load_params.md │ │ │ │ │ ├── image/ │ │ │ │ │ │ ├── index.rst │ │ │ │ │ │ ├── info_gan.md │ │ │ │ │ │ └── mnist.md │ │ │ │ │ ├── index.rst │ │ │ │ │ ├── loss/ │ │ │ │ │ │ ├── custom-loss.md │ │ │ │ │ │ ├── index.rst │ │ │ │ │ │ ├── kl_divergence.md │ │ │ │ │ │ └── loss.md │ │ │ │ │ ├── text/ │ │ │ │ │ │ ├── gnmt.rst │ │ │ │ │ │ ├── index.rst │ │ │ │ │ │ └── transformer.rst │ │ │ │ │ └── training/ │ │ │ │ │ ├── fit_api_tutorial.md │ │ │ │ │ ├── index.rst │ │ │ │ │ ├── learning_rates/ │ │ │ │ │ │ ├── index.rst │ │ │ │ │ │ ├── learning_rate_finder.md │ │ │ │ │ │ ├── learning_rate_schedules.md │ │ │ │ │ │ └── learning_rate_schedules_advanced.md │ │ │ │ │ ├── normalization/ │ │ │ │ │ │ └── index.md │ │ │ │ │ └── trainer.md │ │ │ │ ├── index.rst │ │ │ │ ├── kvstore/ │ │ │ │ │ ├── index.rst │ │ │ │ │ └── kvstore.md │ │ │ │ ├── legacy/ │ │ │ │ │ ├── index.rst │ │ │ │ │ └── ndarray/ │ │ │ │ │ ├── 01-ndarray-intro.md │ │ │ │ │ ├── 02-ndarray-operations.md │ │ │ │ │ ├── 03-ndarray-contexts.md │ │ │ │ │ ├── gotchas_numpy_in_mxnet.md │ │ │ │ │ ├── index.rst │ │ │ │ │ └── sparse/ │ │ │ │ │ ├── csr.md │ │ │ │ │ ├── index.rst │ │ │ │ │ └── row_sparse.md │ │ │ │ ├── np/ │ │ │ │ │ ├── cheat-sheet.md │ │ │ │ │ ├── index.rst │ │ │ │ │ └── np-vs-numpy.md │ │ │ │ ├── onnx/ │ │ │ │ │ ├── fine_tuning_gluon.md │ │ │ │ │ ├── index.rst │ │ │ │ │ └── inference_on_onnx_model.md │ │ │ │ ├── optimizer/ │ │ │ │ │ └── index.md │ │ │ │ └── viz/ │ │ │ │ └── index.rst │ │ │ └── performance/ │ │ │ ├── backend/ │ │ │ │ ├── amp.md │ │ │ │ ├── dnnl/ │ │ │ │ │ ├── dnnl_quantization.md │ │ │ │ │ ├── dnnl_quantization_inc.md │ │ │ │ │ ├── dnnl_readme.md │ │ │ │ │ └── index.rst │ │ │ │ ├── index.rst │ │ │ │ ├── profiler.md │ │ │ │ └── tvm.rst │ │ │ ├── compression/ │ │ │ │ ├── index.rst │ │ │ │ └── int8.rst │ │ │ └── index.rst │ │ ├── requirements │ │ └── themes/ │ │ ├── .babelrc │ │ ├── .circleci/ │ │ │ └── config.yml │ │ ├── .gitignore │ │ ├── .sassrc │ │ └── mx-theme/ │ │ ├── LICENSE │ │ ├── MANIFEST.in │ │ ├── README.md │ │ ├── mxtheme/ │ │ │ ├── __init__.py │ │ │ ├── card.py │ │ │ ├── drawer.html │ │ │ ├── feedback.html │ │ │ ├── footer.html │ │ │ ├── header.html │ │ │ ├── header_search.html │ │ │ ├── header_sourcelink.html │ │ │ ├── header_top.html │ │ │ ├── layout.html │ │ │ ├── localtoc.html │ │ │ ├── relations.html │ │ │ ├── search.html │ │ │ ├── static/ │ │ │ │ ├── fontawesome/ │ │ │ │ │ └── all.css │ │ │ │ ├── fonts.css │ │ │ │ ├── sphinx_materialdesign_theme.css │ │ │ │ └── sphinx_materialdesign_theme.js │ │ │ └── theme.conf │ │ ├── setup.py │ │ └── src/ │ │ ├── js/ │ │ │ ├── adjust-height.js │ │ │ ├── feedback.js │ │ │ ├── scrollspy.js │ │ │ └── sphinx_materialdesign_theme.js │ │ └── scss/ │ │ ├── _root.scss │ │ ├── _variables.scss │ │ ├── admonitions/ │ │ │ └── _admonitions.scss │ │ ├── blockquote/ │ │ │ └── _blockquote.scss │ │ ├── card/ │ │ │ └── _card.scss │ │ ├── code/ │ │ │ └── _code.scss │ │ ├── downloadlink/ │ │ │ └── _downloadlink.scss │ │ ├── drawer/ │ │ │ └── _drawer.scss │ │ ├── fonts/ │ │ │ └── _material-icons.scss │ │ ├── footer/ │ │ │ └── _footer.scss │ │ ├── grid/ │ │ │ └── _simplegrid.scss │ │ ├── header/ │ │ │ └── _header.scss │ │ ├── headerings/ │ │ │ └── _headerings.scss │ │ ├── layout/ │ │ │ └── _layout.scss │ │ ├── lists/ │ │ │ └── _lists.scss │ │ ├── search/ │ │ │ └── _search.scss │ │ ├── sphinx_materialdesign_theme.scss │ │ ├── tables/ │ │ │ └── _tables.scss │ │ └── toc/ │ │ ├── _globaltoc.scss │ │ ├── _localtoc.scss │ │ └── _toctree.scss │ ├── static_site/ │ │ ├── .gitignore │ │ ├── .nojekyll │ │ ├── Makefile │ │ ├── README.md │ │ └── src/ │ │ ├── .asf.yaml │ │ ├── .gitignore │ │ ├── .htaccess │ │ ├── .nojekyll │ │ ├── 404.html │ │ ├── Gemfile │ │ ├── _config.yml │ │ ├── _config_beta.yml │ │ ├── _config_prod.yml │ │ ├── _includes/ │ │ │ ├── callout.html │ │ │ ├── disqus_comments.html │ │ │ ├── feedback.html │ │ │ ├── footer.html │ │ │ ├── get_started/ │ │ │ │ ├── cloud/ │ │ │ │ │ ├── cpu.md │ │ │ │ │ └── gpu.md │ │ │ │ ├── devices/ │ │ │ │ │ ├── nvidia-jetson.md │ │ │ │ │ └── raspberry_pi.md │ │ │ │ ├── get_started.html │ │ │ │ ├── gpu_snippet.md │ │ │ │ ├── linux/ │ │ │ │ │ ├── clojure/ │ │ │ │ │ │ └── build-from-source.md │ │ │ │ │ ├── cpp/ │ │ │ │ │ │ └── build-from-source.md │ │ │ │ │ ├── java/ │ │ │ │ │ │ └── build-from-source.md │ │ │ │ │ ├── julia/ │ │ │ │ │ │ └── build-from-source.md │ │ │ │ │ ├── perl/ │ │ │ │ │ │ └── build-from-source.md │ │ │ │ │ ├── python/ │ │ │ │ │ │ ├── cpu/ │ │ │ │ │ │ │ ├── build-from-source.md │ │ │ │ │ │ │ ├── docker.md │ │ │ │ │ │ │ └── pip.md │ │ │ │ │ │ └── gpu/ │ │ │ │ │ │ ├── build-from-source.md │ │ │ │ │ │ ├── docker.md │ │ │ │ │ │ └── pip.md │ │ │ │ │ ├── r/ │ │ │ │ │ │ └── build-from-source.md │ │ │ │ │ └── scala/ │ │ │ │ │ └── build-from-source.md │ │ │ │ └── pip_snippet.md │ │ │ ├── head.html │ │ │ ├── header.html │ │ │ ├── icon-github.html │ │ │ ├── icon-twitter.html │ │ │ ├── important.html │ │ │ ├── matomo-analytics.html │ │ │ ├── note.html │ │ │ ├── social.html │ │ │ ├── tip.html │ │ │ └── warning.html │ │ ├── _layouts/ │ │ │ ├── default.html │ │ │ ├── home.html │ │ │ ├── page.html │ │ │ ├── page_api.html │ │ │ ├── page_category.html │ │ │ ├── page_landing_tutorials.html │ │ │ └── post.html │ │ ├── _plugins/ │ │ │ └── markdowner.rb │ │ ├── _sass/ │ │ │ ├── feedback.scss │ │ │ ├── generalVersionDropdown.scss │ │ │ ├── globalSearch.scss │ │ │ ├── minima/ │ │ │ │ ├── _base.scss │ │ │ │ ├── _blog.scss │ │ │ │ ├── _docs.scss │ │ │ │ ├── _ecosystem.scss │ │ │ │ ├── _features.scss │ │ │ │ ├── _getting_started.scss │ │ │ │ ├── _home.scss │ │ │ │ ├── _layout.scss │ │ │ │ ├── _syntax-highlighting.scss │ │ │ │ ├── colorful.scss │ │ │ │ └── simple-grid.scss │ │ │ └── minima.scss │ │ ├── assets/ │ │ │ ├── js/ │ │ │ │ ├── clipboard.js │ │ │ │ ├── copycode.js │ │ │ │ ├── feedback.js │ │ │ │ ├── globalSearch.js │ │ │ │ └── options.js │ │ │ └── main.scss │ │ ├── index.html │ │ └── pages/ │ │ ├── api/ │ │ │ ├── api.html │ │ │ ├── architecture/ │ │ │ │ ├── exception_handling.md │ │ │ │ ├── note_data_loading.md │ │ │ │ ├── note_engine.md │ │ │ │ ├── note_memory.md │ │ │ │ ├── overview.md │ │ │ │ └── program_model.md │ │ │ ├── clojure/ │ │ │ │ ├── docs/ │ │ │ │ │ └── tutorials/ │ │ │ │ │ ├── index.md │ │ │ │ │ ├── kvstore.md │ │ │ │ │ ├── module.md │ │ │ │ │ ├── ndarray.md │ │ │ │ │ ├── symbol.md │ │ │ │ │ └── symbol_in_pictures.md │ │ │ │ └── index.md │ │ │ ├── cpp/ │ │ │ │ ├── docs/ │ │ │ │ │ └── tutorials/ │ │ │ │ │ ├── basics.md │ │ │ │ │ ├── index.md │ │ │ │ │ ├── multi_threaded_inference.md │ │ │ │ │ ├── mxnet_cpp_inference_tutorial.md │ │ │ │ │ └── subgraphAPI.md │ │ │ │ └── index.md │ │ │ ├── developer_guide/ │ │ │ │ ├── 1_github_contribution_and_PR_verification_tips.md │ │ │ │ ├── debugging_and_performance_optimization_tips.md │ │ │ │ ├── examine_forward_results_with_hooks.md │ │ │ │ ├── exception_handing_and_custom_error_types.md │ │ │ │ └── profiling.md │ │ │ ├── faq/ │ │ │ │ ├── add_op_in_backend.md │ │ │ │ ├── cloud.md │ │ │ │ ├── distributed_training.md │ │ │ │ ├── env_var.md │ │ │ │ ├── float16.md │ │ │ │ ├── gradient_compression.md │ │ │ │ ├── large_tensor_support.md │ │ │ │ ├── model_parallel_lstm.md │ │ │ │ ├── new_op.md │ │ │ │ ├── perf.md │ │ │ │ ├── recordio.md │ │ │ │ ├── s3_integration.md │ │ │ │ ├── security.md │ │ │ │ ├── tensor_inspector_tutorial.md │ │ │ │ ├── using_rtc.md │ │ │ │ └── why_mxnet.md │ │ │ ├── java/ │ │ │ │ ├── docs/ │ │ │ │ │ └── tutorials/ │ │ │ │ │ ├── index.md │ │ │ │ │ └── ssd_inference.md │ │ │ │ └── index.md │ │ │ ├── julia/ │ │ │ │ └── index.md │ │ │ ├── perl/ │ │ │ │ ├── docs/ │ │ │ │ │ └── tutorials/ │ │ │ │ │ ├── index.md │ │ │ │ │ ├── io.md │ │ │ │ │ ├── kvstore.md │ │ │ │ │ ├── ndarray.md │ │ │ │ │ └── symbol.md │ │ │ │ └── index.md │ │ │ ├── python/ │ │ │ │ └── index.md │ │ │ ├── r/ │ │ │ │ ├── docs/ │ │ │ │ │ └── tutorials/ │ │ │ │ │ ├── char_rnn_model.md │ │ │ │ │ ├── classify_real_image_with_pretrained_model.md │ │ │ │ │ ├── custom_iterator.md │ │ │ │ │ ├── index.md │ │ │ │ │ ├── multi_dim_lstm.md │ │ │ │ │ ├── ndarray.md │ │ │ │ │ └── symbol.md │ │ │ │ └── index.md │ │ │ └── scala/ │ │ │ ├── docs/ │ │ │ │ └── tutorials/ │ │ │ │ ├── index.md │ │ │ │ ├── infer.md │ │ │ │ ├── io.md │ │ │ │ ├── kvstore.md │ │ │ │ ├── ndarray.md │ │ │ │ ├── symbol.md │ │ │ │ └── symbol_in_pictures.md │ │ │ └── index.md │ │ ├── community/ │ │ │ ├── clang_format_guide.md │ │ │ ├── code_guide.md │ │ │ ├── code_review.md │ │ │ ├── committer_guide.md │ │ │ ├── community.md │ │ │ ├── document.md │ │ │ ├── error_handling.md │ │ │ ├── git_howto.md │ │ │ ├── index.md │ │ │ └── pull_request.md │ │ ├── ecosystem.html │ │ ├── features.html │ │ ├── get_started/ │ │ │ ├── build_from_source.md │ │ │ ├── download.md │ │ │ ├── index.html │ │ │ ├── jetson_setup.md │ │ │ └── validate_mxnet.md │ │ └── trusted_by.html │ └── tutorial_utils/ │ └── vision/ │ └── cnn_visualization/ │ └── gradcam.py ├── example/ │ ├── MXNetTutorialTemplate.ipynb │ ├── README.md │ ├── adversary/ │ │ ├── README.md │ │ └── adversary_generation.ipynb │ ├── bi-lstm-sort/ │ │ ├── README.md │ │ └── bi-lstm-sort.ipynb │ ├── distributed_training/ │ │ ├── README.md │ │ ├── cifar10_dist.py │ │ └── cifar10_kvstore_hvd.py │ ├── distributed_training-horovod/ │ │ ├── README.md │ │ ├── gluon_mnist.py │ │ └── resnet50_imagenet.py │ ├── extensions/ │ │ ├── lib_api/ │ │ │ ├── Makefile │ │ │ ├── init_lib.cc │ │ │ ├── libtest.cc │ │ │ └── test_loading.py │ │ ├── lib_custom_op/ │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── gemm_lib.cc │ │ │ ├── relu_lib.cc │ │ │ ├── relu_lib.cu │ │ │ ├── relu_lib.h │ │ │ ├── test_gemm.py │ │ │ ├── test_relu.py │ │ │ ├── test_transposecsr.py │ │ │ ├── test_transposerowsp.py │ │ │ ├── transposecsr_lib.cc │ │ │ └── transposerowsp_lib.cc │ │ ├── lib_external_ops/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── init_lib.cc │ │ │ ├── min_ex-inl.h │ │ │ ├── min_ex.cc │ │ │ ├── min_ex.cu │ │ │ └── test_loading.py │ │ ├── lib_pass/ │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── pass_lib.cc │ │ │ └── test_pass.py │ │ └── lib_subgraph/ │ │ ├── Makefile │ │ ├── README.md │ │ ├── subgraph_lib.cc │ │ └── test_subgraph.py │ ├── gluon/ │ │ ├── actor_critic/ │ │ │ ├── README.md │ │ │ └── actor_critic.py │ │ ├── data.py │ │ ├── house_prices/ │ │ │ ├── README.md │ │ │ └── kaggle_k_fold_cross_validation.py │ │ ├── image_classification.py │ │ ├── mnist/ │ │ │ ├── README.md │ │ │ └── mnist.py │ │ └── super_resolution/ │ │ ├── README.md │ │ └── super_resolution.py │ ├── multi-task/ │ │ ├── README.md │ │ └── multi-task-learning.ipynb │ ├── probability/ │ │ └── VAE/ │ │ └── VAE.md │ ├── profiler/ │ │ ├── README.md │ │ ├── profiler_imageiter.py │ │ ├── profiler_matmul.py │ │ └── profiler_ndarray.py │ ├── quantization/ │ │ ├── README.md │ │ ├── imagenet_gen_qsym_onednn.py │ │ ├── imagenet_inference.py │ │ └── launch_inference_onednn.sh │ ├── quantization_inc/ │ │ ├── custom_strategy.py │ │ ├── resnet50v2_mse.yaml │ │ ├── resnet_measurement.py │ │ ├── resnet_mse.py │ │ └── resnet_tuning.py │ └── recommenders/ │ ├── .gitignore │ ├── README.md │ ├── demo1-MF.ipynb │ ├── demo2-dssm.ipynb │ ├── matrix_fact.py │ └── movielens_data.py ├── include/ │ └── mxnet/ │ ├── api_registry.h │ ├── base.h │ ├── c_api.h │ ├── c_api_error.h │ ├── c_api_test.h │ ├── engine.h │ ├── executor.h │ ├── expr_operator.h │ ├── graph_attr_types.h │ ├── imperative.h │ ├── io.h │ ├── ir/ │ │ └── expr.h │ ├── kvstore.h │ ├── lib_api.h │ ├── libinfo.h │ ├── ndarray.h │ ├── node/ │ │ ├── container.h │ │ └── node.h │ ├── op_attr_types.h │ ├── operator.h │ ├── operator_util.h │ ├── random_generator.h │ ├── resource.h │ ├── rtc.h │ ├── runtime/ │ │ ├── c_runtime_api.h │ │ ├── container.h │ │ ├── container_ext.h │ │ ├── data_type.h │ │ ├── ffi_helper.h │ │ ├── memory.h │ │ ├── ndarray.h │ │ ├── ndarray_handle.h │ │ ├── object.h │ │ ├── packed_func.h │ │ ├── py_arg.h │ │ └── registry.h │ ├── storage.h │ ├── tensor_blob.h │ └── tuple.h ├── licenses/ │ ├── BOOST1_0 │ ├── BSD2 │ ├── BSD3-cmake │ ├── MIT │ └── OFL1_1 ├── plugin/ │ ├── opencv/ │ │ ├── __init__.py │ │ ├── cv_api.cc │ │ ├── cv_api.h │ │ ├── opencv.mk │ │ └── opencv.py │ ├── sframe/ │ │ ├── iter_sframe.cc │ │ └── plugin.mk │ ├── torch/ │ │ ├── torch.mk │ │ ├── torch_base.cc │ │ ├── torch_base.h │ │ ├── torch_criterion-inl.h │ │ ├── torch_criterion.cc │ │ ├── torch_criterion.cu │ │ ├── torch_function.cc │ │ ├── torch_function.h │ │ ├── torch_module-inl.h │ │ ├── torch_module.cc │ │ └── torch_module.cu │ └── warpctc/ │ ├── warpctc-inl.h │ ├── warpctc.cc │ ├── warpctc.cu │ └── warpctc.mk ├── prospector.yaml ├── pytest.ini ├── python/ │ ├── .gitignore │ ├── README.md │ ├── mxnet/ │ │ ├── __init__.py │ │ ├── _api_internal.py │ │ ├── _ctypes/ │ │ │ ├── __init__.py │ │ │ ├── _api_internal.py │ │ │ ├── cached_op.py │ │ │ ├── ndarray.py │ │ │ ├── space.py │ │ │ └── symbol.py │ │ ├── _cy3/ │ │ │ ├── README.md │ │ │ └── __init__.py │ │ ├── _deferred_compute.py │ │ ├── _ffi/ │ │ │ ├── __init__.py │ │ │ ├── _ctypes/ │ │ │ │ ├── __init__.py │ │ │ │ ├── function.py │ │ │ │ ├── object.py │ │ │ │ └── types.py │ │ │ ├── _cy3/ │ │ │ │ └── __init__.py │ │ │ ├── _cython/ │ │ │ │ ├── base.pxi │ │ │ │ ├── core.pyx │ │ │ │ ├── function.pxi │ │ │ │ ├── ndarray.pxi │ │ │ │ └── object.pxi │ │ │ ├── base.py │ │ │ ├── function.py │ │ │ ├── node_generic.py │ │ │ ├── object.py │ │ │ └── runtime_ctypes.py │ │ ├── _global_var.py │ │ ├── _numpy_op_doc.py │ │ ├── amp/ │ │ │ ├── __init__.py │ │ │ ├── amp.py │ │ │ ├── lists/ │ │ │ │ ├── __init__.py │ │ │ │ ├── symbol_bf16.py │ │ │ │ └── symbol_fp16.py │ │ │ └── loss_scaler.py │ │ ├── api.py │ │ ├── attribute.py │ │ ├── autograd.py │ │ ├── base.py │ │ ├── callback.py │ │ ├── container.py │ │ ├── context.py │ │ ├── contrib/ │ │ │ ├── __init__.py │ │ │ ├── io.py │ │ │ ├── ndarray.py │ │ │ ├── onnx/ │ │ │ │ └── __init__.py │ │ │ ├── quantization.py │ │ │ ├── symbol.py │ │ │ ├── tensorboard.py │ │ │ ├── tensorrt.py │ │ │ └── text/ │ │ │ ├── __init__.py │ │ │ ├── _constants.py │ │ │ ├── embedding.py │ │ │ ├── utils.py │ │ │ └── vocab.py │ │ ├── cuda/ │ │ │ ├── __init__.py │ │ │ └── nvtx.py │ │ ├── cython/ │ │ │ ├── __init__.py │ │ │ ├── base.pyi │ │ │ ├── ndarray.pyx │ │ │ └── symbol.pyx │ │ ├── device.py │ │ ├── dlpack.py │ │ ├── engine.py │ │ ├── error.py │ │ ├── executor.py │ │ ├── gluon/ │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── block.py │ │ │ ├── contrib/ │ │ │ │ ├── __init__.py │ │ │ │ ├── data/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── _constants.py │ │ │ │ │ └── vision/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── dataloader.py │ │ │ │ │ └── transforms/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── bbox/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bbox.py │ │ │ │ │ └── utils.py │ │ │ │ └── estimator/ │ │ │ │ ├── __init__.py │ │ │ │ ├── batch_processor.py │ │ │ │ ├── estimator.py │ │ │ │ ├── event_handler.py │ │ │ │ └── utils.py │ │ │ ├── data/ │ │ │ │ ├── __init__.py │ │ │ │ ├── _internal.py │ │ │ │ ├── batchify.py │ │ │ │ ├── dataloader.py │ │ │ │ ├── dataset.py │ │ │ │ ├── sampler.py │ │ │ │ └── vision/ │ │ │ │ ├── __init__.py │ │ │ │ ├── datasets.py │ │ │ │ └── transforms/ │ │ │ │ ├── __init__.py │ │ │ │ └── image.py │ │ │ ├── loss.py │ │ │ ├── metric.py │ │ │ ├── model_zoo/ │ │ │ │ ├── __init__.py │ │ │ │ ├── model_store.py │ │ │ │ └── vision/ │ │ │ │ ├── __init__.py │ │ │ │ ├── alexnet.py │ │ │ │ ├── densenet.py │ │ │ │ ├── inception.py │ │ │ │ ├── mobilenet.py │ │ │ │ ├── resnet.py │ │ │ │ ├── squeezenet.py │ │ │ │ └── vgg.py │ │ │ ├── nn/ │ │ │ │ ├── __init__.py │ │ │ │ ├── activations.py │ │ │ │ ├── basic_layers.py │ │ │ │ └── conv_layers.py │ │ │ ├── parameter.py │ │ │ ├── probability/ │ │ │ │ ├── __init__.py │ │ │ │ ├── block/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── stochastic_block.py │ │ │ │ ├── distributions/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bernoulli.py │ │ │ │ │ ├── beta.py │ │ │ │ │ ├── binomial.py │ │ │ │ │ ├── categorical.py │ │ │ │ │ ├── cauchy.py │ │ │ │ │ ├── chi2.py │ │ │ │ │ ├── constraint.py │ │ │ │ │ ├── dirichlet.py │ │ │ │ │ ├── distribution.py │ │ │ │ │ ├── divergence.py │ │ │ │ │ ├── exp_family.py │ │ │ │ │ ├── exponential.py │ │ │ │ │ ├── fishersnedecor.py │ │ │ │ │ ├── gamma.py │ │ │ │ │ ├── geometric.py │ │ │ │ │ ├── gumbel.py │ │ │ │ │ ├── half_cauchy.py │ │ │ │ │ ├── half_normal.py │ │ │ │ │ ├── independent.py │ │ │ │ │ ├── laplace.py │ │ │ │ │ ├── multinomial.py │ │ │ │ │ ├── multivariate_normal.py │ │ │ │ │ ├── negative_binomial.py │ │ │ │ │ ├── normal.py │ │ │ │ │ ├── one_hot_categorical.py │ │ │ │ │ ├── pareto.py │ │ │ │ │ ├── poisson.py │ │ │ │ │ ├── relaxed_bernoulli.py │ │ │ │ │ ├── relaxed_one_hot_categorical.py │ │ │ │ │ ├── studentT.py │ │ │ │ │ ├── transformed_distribution.py │ │ │ │ │ ├── uniform.py │ │ │ │ │ ├── utils.py │ │ │ │ │ └── weibull.py │ │ │ │ └── transformation/ │ │ │ │ ├── __init__.py │ │ │ │ ├── domain_map.py │ │ │ │ └── transformation.py │ │ │ ├── rnn/ │ │ │ │ ├── __init__.py │ │ │ │ ├── conv_rnn_cell.py │ │ │ │ ├── rnn_cell.py │ │ │ │ └── rnn_layer.py │ │ │ ├── trainer.py │ │ │ └── utils.py │ │ ├── image/ │ │ │ ├── __init__.py │ │ │ ├── detection.py │ │ │ └── image.py │ │ ├── initializer.py │ │ ├── io/ │ │ │ ├── __init__.py │ │ │ ├── io.py │ │ │ └── utils.py │ │ ├── kvstore/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── byteps.py │ │ │ ├── horovod.py │ │ │ ├── kvstore.py │ │ │ └── kvstore_server.py │ │ ├── libinfo.py │ │ ├── library.py │ │ ├── log.py │ │ ├── lr_scheduler.py │ │ ├── misc.py │ │ ├── model.py │ │ ├── name.py │ │ ├── ndarray/ │ │ │ ├── __init__.py │ │ │ ├── _internal.py │ │ │ ├── contrib.py │ │ │ ├── image.py │ │ │ ├── linalg.py │ │ │ ├── ndarray.py │ │ │ ├── numpy/ │ │ │ │ ├── __init__.py │ │ │ │ ├── _api_internal.py │ │ │ │ ├── _internal.py │ │ │ │ ├── _op.py │ │ │ │ ├── _register.py │ │ │ │ ├── linalg.py │ │ │ │ └── random.py │ │ │ ├── numpy_extension/ │ │ │ │ ├── __init__.py │ │ │ │ ├── _api_internal.py │ │ │ │ ├── _op.py │ │ │ │ ├── _register.py │ │ │ │ ├── control_flow.py │ │ │ │ ├── image.py │ │ │ │ └── random.py │ │ │ ├── op.py │ │ │ ├── random.py │ │ │ ├── register.py │ │ │ ├── sparse.py │ │ │ └── utils.py │ │ ├── ndarray_doc.py │ │ ├── notebook/ │ │ │ ├── __init__.py │ │ │ └── callback.py │ │ ├── numpy/ │ │ │ ├── __init__.py │ │ │ ├── _op.py │ │ │ ├── _register.py │ │ │ ├── arrayprint.py │ │ │ ├── fallback.py │ │ │ ├── fallback_linalg.py │ │ │ ├── function_base.py │ │ │ ├── io.py │ │ │ ├── linalg.py │ │ │ ├── multiarray.py │ │ │ ├── random.py │ │ │ ├── set_functions.py │ │ │ ├── stride_tricks.py │ │ │ ├── type_functions.py │ │ │ └── utils.py │ │ ├── numpy_dispatch_protocol.py │ │ ├── numpy_extension/ │ │ │ ├── __init__.py │ │ │ ├── _op.py │ │ │ ├── _register.py │ │ │ ├── control_flow.py │ │ │ ├── image.py │ │ │ ├── random.py │ │ │ └── utils.py │ │ ├── numpy_op_fallback.py │ │ ├── numpy_op_signature.py │ │ ├── onnx/ │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── mx2onnx/ │ │ │ │ ├── __init__.py │ │ │ │ ├── _export_helper.py │ │ │ │ ├── _export_model.py │ │ │ │ ├── _export_onnx.py │ │ │ │ └── _op_translations/ │ │ │ │ ├── __init__.py │ │ │ │ ├── _op_translations_opset12.py │ │ │ │ └── _op_translations_opset13.py │ │ │ └── setup.py │ │ ├── operator.py │ │ ├── optimizer/ │ │ │ ├── __init__.py │ │ │ ├── adabelief.py │ │ │ ├── adadelta.py │ │ │ ├── adagrad.py │ │ │ ├── adam.py │ │ │ ├── adamW.py │ │ │ ├── adamax.py │ │ │ ├── contrib.py │ │ │ ├── dcasgd.py │ │ │ ├── ftml.py │ │ │ ├── ftrl.py │ │ │ ├── lamb.py │ │ │ ├── lans.py │ │ │ ├── lars.py │ │ │ ├── nadam.py │ │ │ ├── nag.py │ │ │ ├── optimizer.py │ │ │ ├── rmsprop.py │ │ │ ├── sgd.py │ │ │ ├── sgld.py │ │ │ ├── signum.py │ │ │ ├── updater.py │ │ │ └── utils.py │ │ ├── profiler.py │ │ ├── random.py │ │ ├── recordio.py │ │ ├── registry.py │ │ ├── rtc.py │ │ ├── runtime.py │ │ ├── symbol/ │ │ │ ├── __init__.py │ │ │ ├── _internal.py │ │ │ ├── contrib.py │ │ │ ├── image.py │ │ │ ├── linalg.py │ │ │ ├── numpy/ │ │ │ │ ├── __init__.py │ │ │ │ ├── _internal.py │ │ │ │ ├── _op.py │ │ │ │ ├── _register.py │ │ │ │ ├── _symbol.py │ │ │ │ ├── linalg.py │ │ │ │ └── random.py │ │ │ ├── numpy_extension/ │ │ │ │ ├── __init__.py │ │ │ │ ├── _op.py │ │ │ │ ├── _register.py │ │ │ │ ├── image.py │ │ │ │ └── random.py │ │ │ ├── op.py │ │ │ ├── random.py │ │ │ ├── register.py │ │ │ ├── sparse.py │ │ │ └── symbol.py │ │ ├── symbol_doc.py │ │ ├── test_utils.py │ │ ├── tvmop.py │ │ ├── util.py │ │ └── visualization.py │ └── setup.py ├── rat-excludes ├── readthedocs.yml ├── snap.python ├── src/ │ ├── api/ │ │ ├── _api_internal/ │ │ │ └── _api_internal.cc │ │ ├── cached_op_api.cc │ │ └── operator/ │ │ ├── numpy/ │ │ │ ├── linalg/ │ │ │ │ ├── np_det.cc │ │ │ │ ├── np_eig.cc │ │ │ │ ├── np_eigvals.cc │ │ │ │ ├── np_gesvd.cc │ │ │ │ ├── np_inv.cc │ │ │ │ ├── np_lstsq.cc │ │ │ │ ├── np_matrix_rank.cc │ │ │ │ ├── np_norm.cc │ │ │ │ ├── np_pinv.cc │ │ │ │ ├── np_potrf.cc │ │ │ │ ├── np_qr.cc │ │ │ │ ├── np_slogdet.cc │ │ │ │ ├── np_solve.cc │ │ │ │ ├── np_tensorinv.cc │ │ │ │ └── np_tensorsolve.cc │ │ │ ├── np_bincount_op.cc │ │ │ ├── np_broadcast_reduce_op_boolean.cc │ │ │ ├── np_broadcast_reduce_op_index.cc │ │ │ ├── np_broadcast_reduce_op_value.cc │ │ │ ├── np_cross.cc │ │ │ ├── np_cumsum.cc │ │ │ ├── np_delete_op.cc │ │ │ ├── np_diff_op.cc │ │ │ ├── np_dot_op.cc │ │ │ ├── np_ediff1d_op.cc │ │ │ ├── np_einsum_op.cc │ │ │ ├── np_elemwise_broadcast_logic_op.cc │ │ │ ├── np_elemwise_broadcast_op.cc │ │ │ ├── np_elemwise_broadcast_op_extended_sec.cc │ │ │ ├── np_elemwise_unary_op_basic.cc │ │ │ ├── np_fill_diagonal_op.cc │ │ │ ├── np_histogram_op.cc │ │ │ ├── np_init_op.cc │ │ │ ├── np_insert_op.cc │ │ │ ├── np_interp_op.cc │ │ │ ├── np_kron.cc │ │ │ ├── np_matmul_op.cc │ │ │ ├── np_matrix_op.cc │ │ │ ├── np_memory_op.cc │ │ │ ├── np_moments_op.cc │ │ │ ├── np_nan_to_num_op.cc │ │ │ ├── np_nonzero_op.cc │ │ │ ├── np_ordering_op.cc │ │ │ ├── np_pad_op.cc │ │ │ ├── np_percentile_op.cc │ │ │ ├── np_polynomial_op.cc │ │ │ ├── np_repeat_op.cc │ │ │ ├── np_tensordot_op.cc │ │ │ ├── np_trace_op.cc │ │ │ ├── np_tri_op.cc │ │ │ ├── np_tril_op.cc │ │ │ ├── np_triu_op.cc │ │ │ ├── np_unique_op.cc │ │ │ ├── np_where_op.cc │ │ │ ├── np_window_op.cc │ │ │ └── random/ │ │ │ ├── np_choice_op.cc │ │ │ ├── np_exponential_op.cc │ │ │ ├── np_laplace_op.cc │ │ │ ├── np_location_scale_op.cc │ │ │ ├── np_multinomial_op.cc │ │ │ ├── np_pareto_op.cc │ │ │ ├── np_power_op.cc │ │ │ ├── np_rayleigh_op.cc │ │ │ └── np_weibull_op.cc │ │ ├── numpy_extension/ │ │ │ ├── npx_activation_op.cc │ │ │ ├── npx_arange_like_op.cc │ │ │ ├── npx_batch_dot_op.cc │ │ │ ├── npx_batch_norm_op.cc │ │ │ ├── npx_broadcast_like_op.cc │ │ │ ├── npx_control_flow_op.cc │ │ │ ├── npx_convolution_op.cc │ │ │ ├── npx_deconvolution_op.cc │ │ │ ├── npx_dropout_op.cc │ │ │ ├── npx_embedding_op.cc │ │ │ ├── npx_fully_connected_op.cc │ │ │ ├── npx_group_norm_op.cc │ │ │ ├── npx_layer_norm_op.cc │ │ │ ├── npx_leaky_relu_op.cc │ │ │ ├── npx_one_hot_op.cc │ │ │ ├── npx_pick_op.cc │ │ │ ├── npx_pooling_op.cc │ │ │ ├── npx_rnn_op.cc │ │ │ ├── npx_softmax_op.cc │ │ │ └── npx_topk_op.cc │ │ ├── op_utils.cc │ │ ├── op_utils.h │ │ ├── random/ │ │ │ ├── np_gamma_op.cc │ │ │ ├── np_normal_op.cc │ │ │ ├── np_randint_op.cc │ │ │ ├── np_uniform_op.cc │ │ │ └── shuffle_op.cc │ │ ├── tensor/ │ │ │ ├── elemwise_binary_broadcast_op_extended.cc │ │ │ ├── indexing_op.cc │ │ │ ├── matrix_op.cc │ │ │ └── unravel.cc │ │ ├── ufunc_helper.cc │ │ ├── ufunc_helper.h │ │ ├── utils.cc │ │ └── utils.h │ ├── base.cc │ ├── c_api/ │ │ ├── .clang-tidy │ │ ├── c_api.cc │ │ ├── c_api_common.h │ │ ├── c_api_function.cc │ │ ├── c_api_ndarray.cc │ │ ├── c_api_profile.cc │ │ ├── c_api_symbolic.cc │ │ └── c_api_test.cc │ ├── common/ │ │ ├── alm.cc │ │ ├── alm.h │ │ ├── cuda/ │ │ │ ├── cudnn_cxx.cc │ │ │ ├── cudnn_cxx.h │ │ │ ├── nvtx.h │ │ │ ├── rtc/ │ │ │ │ ├── backward_functions-inl.h │ │ │ │ ├── forward_functions-inl.h │ │ │ │ ├── half-inl.h │ │ │ │ ├── reducer-inl.h │ │ │ │ ├── special_functions-inl.h │ │ │ │ ├── util-inl.h │ │ │ │ └── vectorization-inl.h │ │ │ ├── rtc.cc │ │ │ ├── rtc.h │ │ │ ├── utils.cc │ │ │ └── utils.h │ │ ├── exec_utils.cc │ │ ├── exec_utils.h │ │ ├── lazy_alloc_array.h │ │ ├── object_pool.h │ │ ├── random_generator.cu │ │ ├── rtc.cc │ │ ├── static_array.h │ │ ├── tensor_inspector.h │ │ ├── utils.cc │ │ ├── utils.cu │ │ └── utils.h │ ├── engine/ │ │ ├── engine.cc │ │ ├── engine_impl.h │ │ ├── naive_engine.cc │ │ ├── openmp.cc │ │ ├── openmp.h │ │ ├── stream_manager.h │ │ ├── thread_pool.h │ │ ├── threaded_engine.cc │ │ ├── threaded_engine.h │ │ ├── threaded_engine_perdevice.cc │ │ └── threaded_engine_pooled.cc │ ├── imperative/ │ │ ├── attach_op_execs_pass.cc │ │ ├── attach_op_resource_pass.cc │ │ ├── cached_op.cc │ │ ├── cached_op.h │ │ ├── cached_op_threadsafe.cc │ │ ├── cached_op_threadsafe.h │ │ ├── cuda_graphs.h │ │ ├── eliminate_common_expr_pass.cc │ │ ├── exec_pass.h │ │ ├── imperative.cc │ │ ├── imperative_utils.cc │ │ ├── imperative_utils.h │ │ ├── infer_graph_attr_pass.cc │ │ ├── inplace_addto_detect_pass.cc │ │ ├── naive_cached_op.cc │ │ ├── naive_cached_op.h │ │ ├── pointwise_fusion_pass.cc │ │ ├── simple_partition_pass.cc │ │ └── simple_partition_pass.h │ ├── initialize.cc │ ├── initialize.h │ ├── io/ │ │ ├── batchify.cc │ │ ├── dataloader.cc │ │ ├── dataset.cc │ │ ├── image_aug_default.cc │ │ ├── image_augmenter.h │ │ ├── image_det_aug_default.cc │ │ ├── image_io.cc │ │ ├── image_iter_common.h │ │ ├── image_recordio.h │ │ ├── inst_vector.h │ │ ├── io.cc │ │ ├── iter_batchloader.h │ │ ├── iter_csv.cc │ │ ├── iter_image_det_recordio.cc │ │ ├── iter_image_recordio.cc │ │ ├── iter_image_recordio_2.cc │ │ ├── iter_libsvm.cc │ │ ├── iter_mnist.cc │ │ ├── iter_normalize.h │ │ ├── iter_prefetcher.h │ │ ├── iter_sampler.cc │ │ ├── iter_sparse.h │ │ ├── iter_sparse_batchloader.h │ │ ├── iter_sparse_prefetcher.h │ │ └── opencv_compatibility.h │ ├── ir/ │ │ └── expr.cc │ ├── kvstore/ │ │ ├── comm.h │ │ ├── comm_tree.h │ │ ├── gpu_topology.h │ │ ├── gradient_compression-inl.h │ │ ├── gradient_compression.cc │ │ ├── gradient_compression.cu │ │ ├── gradient_compression.h │ │ ├── kvstore.cc │ │ ├── kvstore_dist.h │ │ ├── kvstore_dist_server.h │ │ ├── kvstore_local.h │ │ ├── kvstore_nccl.h │ │ ├── kvstore_utils.cc │ │ ├── kvstore_utils.cu │ │ ├── kvstore_utils.h │ │ └── p3store_dist.h │ ├── lang/ │ │ ├── expr.cc │ │ └── ir.cc │ ├── lib_api.cc │ ├── libinfo.cc │ ├── ndarray/ │ │ ├── ndarray.cc │ │ ├── ndarray_function-inl.cuh │ │ ├── ndarray_function-inl.h │ │ ├── ndarray_function.cc │ │ ├── ndarray_function.cu │ │ └── ndarray_function.h │ ├── nnvm/ │ │ ├── error.h │ │ ├── gradient.cc │ │ ├── graph_algorithm.h │ │ ├── graph_editor.cc │ │ ├── legacy_json_util.cc │ │ ├── legacy_op_util.cc │ │ ├── low_precision_pass.cc │ │ ├── node_op_util.h │ │ ├── plan_memory.cc │ │ └── tvm_bridge.cc │ ├── operator/ │ │ ├── all_finite-inl.h │ │ ├── all_finite.cc │ │ ├── all_finite.cu │ │ ├── amp_graph_pass.cc │ │ ├── bilinear_sampler-inl.h │ │ ├── bilinear_sampler.cc │ │ ├── bilinear_sampler.cu │ │ ├── c_lapack_api.cc │ │ ├── c_lapack_api.h │ │ ├── channel_op_common.h │ │ ├── contrib/ │ │ │ ├── adabelief-inl.h │ │ │ ├── adabelief.cc │ │ │ ├── adabelief.cu │ │ │ ├── adamw-inl.h │ │ │ ├── adamw.cc │ │ │ ├── adamw.cu │ │ │ ├── adaptive_avg_pooling-inl.h │ │ │ ├── adaptive_avg_pooling.cc │ │ │ ├── adaptive_avg_pooling.cu │ │ │ ├── allclose_op-inl.h │ │ │ ├── allclose_op.cc │ │ │ ├── allclose_op.cu │ │ │ ├── bilinear_resize-inl.cuh │ │ │ ├── bilinear_resize-inl.h │ │ │ ├── bilinear_resize.cc │ │ │ ├── bilinear_resize.cu │ │ │ ├── boolean_mask-inl.h │ │ │ ├── boolean_mask.cc │ │ │ ├── boolean_mask.cu │ │ │ ├── bounding_box-common.h │ │ │ ├── bounding_box-inl.cuh │ │ │ ├── bounding_box-inl.h │ │ │ ├── bounding_box.cc │ │ │ ├── bounding_box.cu │ │ │ ├── count_sketch-inl.h │ │ │ ├── count_sketch.cc │ │ │ ├── count_sketch.cu │ │ │ ├── deformable_psroi_pooling-inl.h │ │ │ ├── deformable_psroi_pooling.cc │ │ │ ├── deformable_psroi_pooling.cu │ │ │ ├── dgl_graph-inl.h │ │ │ ├── dgl_graph.cc │ │ │ ├── dgl_graph.cu │ │ │ ├── dynamic_shape_ops-inl.h │ │ │ ├── dynamic_shape_ops.cc │ │ │ ├── erfinv-inl.h │ │ │ ├── fft-inl.h │ │ │ ├── fft.cc │ │ │ ├── fft.cu │ │ │ ├── gradient_multiplier_op.cc │ │ │ ├── gradient_multiplier_op.cu │ │ │ ├── hawkes_ll-inl.h │ │ │ ├── hawkes_ll.cc │ │ │ ├── hawkes_ll.cu │ │ │ ├── index_array-inl.h │ │ │ ├── index_array.cc │ │ │ ├── index_array.cu │ │ │ ├── index_copy-inl.h │ │ │ ├── index_copy.cc │ │ │ ├── index_copy.cu │ │ │ ├── intgemm/ │ │ │ │ ├── intgemm_fully_connected_op.cc │ │ │ │ ├── max_absolute_op.cc │ │ │ │ ├── prepare_data_op.cc │ │ │ │ ├── prepare_weight_op.cc │ │ │ │ └── take_weight_op.cc │ │ │ ├── krprod.cc │ │ │ ├── krprod.h │ │ │ ├── mrcnn_mask_target-inl.h │ │ │ ├── mrcnn_mask_target.cu │ │ │ ├── multi_lamb-inl.h │ │ │ ├── multi_lamb.cc │ │ │ ├── multi_lamb.cu │ │ │ ├── multi_lans-inl.h │ │ │ ├── multi_lans.cc │ │ │ ├── multi_lans.cu │ │ │ ├── multi_lars-inl.h │ │ │ ├── multi_lars.cc │ │ │ ├── multi_lars.cu │ │ │ ├── multi_proposal-inl.h │ │ │ ├── multi_proposal.cc │ │ │ ├── multi_proposal.cu │ │ │ ├── multi_sum_sq-inl.h │ │ │ ├── multi_sum_sq.cc │ │ │ ├── multi_sum_sq.cu │ │ │ ├── multibox_detection-inl.h │ │ │ ├── multibox_detection.cc │ │ │ ├── multibox_detection.cu │ │ │ ├── multibox_prior-inl.h │ │ │ ├── multibox_prior.cc │ │ │ ├── multibox_prior.cu │ │ │ ├── multibox_target-inl.h │ │ │ ├── multibox_target.cc │ │ │ ├── multibox_target.cu │ │ │ ├── nn/ │ │ │ │ ├── deformable_im2col.cuh │ │ │ │ ├── deformable_im2col.h │ │ │ │ ├── modulated_deformable_im2col.cuh │ │ │ │ └── modulated_deformable_im2col.h │ │ │ ├── nnz.cc │ │ │ ├── optimizer_op-inl.h │ │ │ ├── optimizer_op.cc │ │ │ ├── optimizer_op.cu │ │ │ ├── preloaded_multi_sgd-inl.h │ │ │ ├── preloaded_multi_sgd.cc │ │ │ ├── preloaded_multi_sgd.cu │ │ │ ├── proposal-inl.h │ │ │ ├── proposal.cc │ │ │ ├── proposal.cu │ │ │ ├── psroi_pooling-inl.h │ │ │ ├── psroi_pooling.cc │ │ │ ├── psroi_pooling.cu │ │ │ ├── quadratic_op-inl.h │ │ │ ├── quadratic_op.cc │ │ │ ├── quadratic_op.cu │ │ │ ├── reset_arrays-inl.h │ │ │ ├── reset_arrays.cc │ │ │ ├── reset_arrays.cu │ │ │ ├── roi_align-inl.h │ │ │ ├── roi_align.cc │ │ │ ├── roi_align.cu │ │ │ ├── rroi_align-inl.h │ │ │ ├── rroi_align.cc │ │ │ ├── stes_op.cc │ │ │ ├── stes_op.cu │ │ │ ├── stes_op.h │ │ │ ├── sync_batch_norm-inl.h │ │ │ ├── sync_batch_norm.cc │ │ │ ├── sync_batch_norm.cu │ │ │ ├── transformer-inl.h │ │ │ ├── transformer.cc │ │ │ ├── transformer.cu │ │ │ └── tvmop/ │ │ │ ├── dot.cc │ │ │ └── ufunc.cc │ │ ├── control_flow.cc │ │ ├── correlation-inl.h │ │ ├── correlation.cc │ │ ├── correlation.cu │ │ ├── crop-inl.h │ │ ├── crop.cc │ │ ├── crop.cu │ │ ├── cross_device_copy.cc │ │ ├── cudnn_bilinear_sampler-inl.h │ │ ├── cudnn_lrn-inl.h │ │ ├── cudnn_ops.cc │ │ ├── cudnn_ops.h │ │ ├── cudnn_spatial_transformer-inl.h │ │ ├── custom/ │ │ │ ├── custom-inl.h │ │ │ ├── custom.cc │ │ │ ├── native_op-inl.h │ │ │ ├── native_op.cc │ │ │ ├── native_op.cu │ │ │ ├── ndarray_op-inl.h │ │ │ └── ndarray_op.cc │ │ ├── deformable_convolution-inl.h │ │ ├── deformable_convolution.cc │ │ ├── deformable_convolution.cu │ │ ├── elemwise_op_common.h │ │ ├── fusion/ │ │ │ ├── fused_op-inl.h │ │ │ ├── fused_op.cc │ │ │ ├── fused_op.cu │ │ │ └── fused_op.h │ │ ├── grid_generator-inl.h │ │ ├── grid_generator.cc │ │ ├── grid_generator.cu │ │ ├── identity_attach_KL_sparse_reg-inl.h │ │ ├── identity_attach_KL_sparse_reg.cc │ │ ├── identity_attach_KL_sparse_reg.cu │ │ ├── image/ │ │ │ ├── crop-inl.h │ │ │ ├── crop.cc │ │ │ ├── crop.cu │ │ │ ├── image_random-inl.h │ │ │ ├── image_random.cc │ │ │ ├── image_random.cu │ │ │ ├── image_utils.h │ │ │ ├── resize-inl.h │ │ │ ├── resize.cc │ │ │ └── resize.cu │ │ ├── instance_norm-inl.h │ │ ├── instance_norm.cc │ │ ├── instance_norm.cu │ │ ├── l2_normalization-inl.h │ │ ├── l2_normalization.cc │ │ ├── l2_normalization.cu │ │ ├── leaky_relu-inl.h │ │ ├── leaky_relu.cc │ │ ├── leaky_relu.cu │ │ ├── linalg.h │ │ ├── linalg_impl.h │ │ ├── loss_binary_op-inl.h │ │ ├── loss_binary_op.cc │ │ ├── loss_binary_op.cu │ │ ├── make_loss-inl.h │ │ ├── make_loss.cc │ │ ├── make_loss.cu │ │ ├── math_functions-inl.h │ │ ├── mkl_functions-inl.h │ │ ├── modulated_deformable_convolution-inl.h │ │ ├── modulated_deformable_convolution.cc │ │ ├── modulated_deformable_convolution.cu │ │ ├── mshadow_op.h │ │ ├── mxnet_op.h │ │ ├── nn/ │ │ │ ├── activation-inl.h │ │ │ ├── activation.cc │ │ │ ├── activation.cu │ │ │ ├── batch_norm-inl.h │ │ │ ├── batch_norm.cc │ │ │ ├── batch_norm.cu │ │ │ ├── concat-inl.h │ │ │ ├── concat.cc │ │ │ ├── concat.cu │ │ │ ├── convolution-inl.h │ │ │ ├── convolution.cc │ │ │ ├── convolution.cu │ │ │ ├── ctc_loss-inl.h │ │ │ ├── ctc_loss.cc │ │ │ ├── ctc_loss.cu │ │ │ ├── cudnn/ │ │ │ │ ├── cudnn_activation-inl.h │ │ │ │ ├── cudnn_algoreg-inl.h │ │ │ │ ├── cudnn_algoreg.cc │ │ │ │ ├── cudnn_batch_norm.cu │ │ │ │ ├── cudnn_batch_norm.h │ │ │ │ ├── cudnn_convolution-inl.h │ │ │ │ ├── cudnn_deconvolution-inl.h │ │ │ │ ├── cudnn_pooling-inl.h │ │ │ │ └── cudnn_softmax_activation-inl.h │ │ │ ├── deconvolution-inl.h │ │ │ ├── deconvolution.cc │ │ │ ├── deconvolution.cu │ │ │ ├── depthwise_convolution-inl.h │ │ │ ├── depthwise_convolution_tf.cuh │ │ │ ├── dnnl/ │ │ │ │ ├── dnnl_act-inl.h │ │ │ │ ├── dnnl_act.cc │ │ │ │ ├── dnnl_base-inl.h │ │ │ │ ├── dnnl_base.cc │ │ │ │ ├── dnnl_batch_dot-inl.h │ │ │ │ ├── dnnl_batch_dot.cc │ │ │ │ ├── dnnl_batch_norm-inl.h │ │ │ │ ├── dnnl_batch_norm.cc │ │ │ │ ├── dnnl_binary-inl.h │ │ │ │ ├── dnnl_binary.cc │ │ │ │ ├── dnnl_concat-inl.h │ │ │ │ ├── dnnl_concat.cc │ │ │ │ ├── dnnl_convolution-inl.h │ │ │ │ ├── dnnl_convolution.cc │ │ │ │ ├── dnnl_copy-inl.h │ │ │ │ ├── dnnl_copy.cc │ │ │ │ ├── dnnl_deconvolution-inl.h │ │ │ │ ├── dnnl_deconvolution.cc │ │ │ │ ├── dnnl_dot-inl.h │ │ │ │ ├── dnnl_dot.cc │ │ │ │ ├── dnnl_eltwise-inl.h │ │ │ │ ├── dnnl_eltwise.cc │ │ │ │ ├── dnnl_fully_connected-inl.h │ │ │ │ ├── dnnl_fully_connected.cc │ │ │ │ ├── dnnl_layer_norm-inl.h │ │ │ │ ├── dnnl_layer_norm.cc │ │ │ │ ├── dnnl_log_softmax.cc │ │ │ │ ├── dnnl_lrn-inl.h │ │ │ │ ├── dnnl_masked_softmax-inl.h │ │ │ │ ├── dnnl_masked_softmax.cc │ │ │ │ ├── dnnl_pooling-inl.h │ │ │ │ ├── dnnl_pooling.cc │ │ │ │ ├── dnnl_pow_mul_scalar-inl.h │ │ │ │ ├── dnnl_pow_mul_scalar.cc │ │ │ │ ├── dnnl_reduce-inl.h │ │ │ │ ├── dnnl_reduce.cc │ │ │ │ ├── dnnl_reshape-inl.h │ │ │ │ ├── dnnl_reshape.cc │ │ │ │ ├── dnnl_rnn-inl.h │ │ │ │ ├── dnnl_rnn.cc │ │ │ │ ├── dnnl_softmax-inl.h │ │ │ │ ├── dnnl_softmax.cc │ │ │ │ ├── dnnl_softmax_output-inl.h │ │ │ │ ├── dnnl_softmax_output.cc │ │ │ │ ├── dnnl_split-inl.h │ │ │ │ ├── dnnl_split.cc │ │ │ │ ├── dnnl_stack-inl.h │ │ │ │ ├── dnnl_stack.cc │ │ │ │ ├── dnnl_sum-inl.h │ │ │ │ ├── dnnl_sum.cc │ │ │ │ ├── dnnl_transpose-inl.h │ │ │ │ ├── dnnl_transpose.cc │ │ │ │ ├── dnnl_where-inl.h │ │ │ │ └── dnnl_where.cc │ │ │ ├── dropout-inl.h │ │ │ ├── dropout.cc │ │ │ ├── dropout.cu │ │ │ ├── fully_connected-inl.h │ │ │ ├── fully_connected.cc │ │ │ ├── fully_connected.cu │ │ │ ├── group_norm-inl.h │ │ │ ├── group_norm.cc │ │ │ ├── group_norm.cu │ │ │ ├── im2col-inl.h │ │ │ ├── im2col.cc │ │ │ ├── im2col.cu │ │ │ ├── im2col.cuh │ │ │ ├── im2col.h │ │ │ ├── layer_norm-inl.h │ │ │ ├── layer_norm.cc │ │ │ ├── layer_norm.cu │ │ │ ├── layer_norm_cpu.h │ │ │ ├── log_softmax.cc │ │ │ ├── log_softmax.cu │ │ │ ├── lrn-inl.h │ │ │ ├── lrn.cc │ │ │ ├── lrn.cu │ │ │ ├── masked_softmax.cc │ │ │ ├── moments-inl.h │ │ │ ├── moments.cc │ │ │ ├── moments.cu │ │ │ ├── pool.cuh │ │ │ ├── pool.h │ │ │ ├── pool_utils.h │ │ │ ├── pooling-inl.h │ │ │ ├── pooling.cc │ │ │ ├── pooling.cu │ │ │ ├── sequence_mask-inl.h │ │ │ ├── softmax-inl.h │ │ │ ├── softmax.cc │ │ │ ├── softmax.cu │ │ │ ├── softmax_activation-inl.h │ │ │ ├── softmax_activation.cc │ │ │ ├── softmax_activation.cu │ │ │ ├── softmin.cc │ │ │ ├── softmin.cu │ │ │ ├── upsampling-inl.h │ │ │ ├── upsampling.cc │ │ │ └── upsampling.cu │ │ ├── npx_control_flow.cc │ │ ├── npx_control_flow.h │ │ ├── numpy/ │ │ │ ├── linalg/ │ │ │ │ ├── broadcast_reduce_customized-inl.h │ │ │ │ ├── broadcast_reduce_op_customized.h │ │ │ │ ├── np_eig-inl.h │ │ │ │ ├── np_eig.cc │ │ │ │ ├── np_eig.cu │ │ │ │ ├── np_eigvals-inl.h │ │ │ │ ├── np_eigvals.cc │ │ │ │ ├── np_eigvals.cu │ │ │ │ ├── np_gesvd-inl.h │ │ │ │ ├── np_gesvd.cc │ │ │ │ ├── np_gesvd.cu │ │ │ │ ├── np_lstsq-inl.h │ │ │ │ ├── np_lstsq.cc │ │ │ │ ├── np_lstsq.cu │ │ │ │ ├── np_matrix_rank-inl.h │ │ │ │ ├── np_matrix_rank.cc │ │ │ │ ├── np_matrix_rank.cu │ │ │ │ ├── np_norm-inl.h │ │ │ │ ├── np_norm.cc │ │ │ │ ├── np_norm_backward.cc │ │ │ │ ├── np_norm_backward.cu │ │ │ │ ├── np_norm_forward.cc │ │ │ │ ├── np_norm_forward.cu │ │ │ │ ├── np_pinv-inl.h │ │ │ │ ├── np_pinv.cc │ │ │ │ ├── np_pinv.cu │ │ │ │ ├── np_potrf-inl.h │ │ │ │ ├── np_potrf.cc │ │ │ │ ├── np_potrf.cu │ │ │ │ ├── np_qr-inl.h │ │ │ │ ├── np_qr.cc │ │ │ │ ├── np_qr.cu │ │ │ │ ├── np_solve-inl.h │ │ │ │ ├── np_solve.cc │ │ │ │ ├── np_solve.cu │ │ │ │ ├── np_tensorinv-inl.h │ │ │ │ ├── np_tensorinv.cc │ │ │ │ ├── np_tensorinv.cu │ │ │ │ ├── np_tensorsolve-inl.h │ │ │ │ ├── np_tensorsolve.cc │ │ │ │ └── np_tensorsolve.cu │ │ │ ├── np_bincount_op-inl.h │ │ │ ├── np_bincount_op.cc │ │ │ ├── np_bincount_op.cu │ │ │ ├── np_boolean_mask_assign.cc │ │ │ ├── np_boolean_mask_assign.cu │ │ │ ├── np_broadcast_reduce_op.cc │ │ │ ├── np_broadcast_reduce_op.h │ │ │ ├── np_broadcast_reduce_op_boolean.cc │ │ │ ├── np_broadcast_reduce_op_boolean.cu │ │ │ ├── np_broadcast_reduce_op_index.cc │ │ │ ├── np_broadcast_reduce_op_index.cu │ │ │ ├── np_broadcast_reduce_op_value.h │ │ │ ├── np_broadcast_reduce_op_value_broadcast_to.cc │ │ │ ├── np_broadcast_reduce_op_value_broadcast_to.cu │ │ │ ├── np_broadcast_reduce_op_value_max.cc │ │ │ ├── np_broadcast_reduce_op_value_max.cu │ │ │ ├── np_broadcast_reduce_op_value_mean.cc │ │ │ ├── np_broadcast_reduce_op_value_mean.cu │ │ │ ├── np_broadcast_reduce_op_value_min.cc │ │ │ ├── np_broadcast_reduce_op_value_min.cu │ │ │ ├── np_broadcast_reduce_op_value_prod.cc │ │ │ ├── np_broadcast_reduce_op_value_prod.cu │ │ │ ├── np_broadcast_reduce_op_value_sum.cc │ │ │ ├── np_broadcast_reduce_op_value_sum.cu │ │ │ ├── np_constraint_check.cc │ │ │ ├── np_constraint_check.cu │ │ │ ├── np_constraint_check.h │ │ │ ├── np_cross-inl.h │ │ │ ├── np_cross.cc │ │ │ ├── np_cross.cu │ │ │ ├── np_cumsum-inl.h │ │ │ ├── np_cumsum.cc │ │ │ ├── np_cumsum.cu │ │ │ ├── np_delete_op-inl.h │ │ │ ├── np_delete_op.cc │ │ │ ├── np_delete_op.cu │ │ │ ├── np_diff-inl.h │ │ │ ├── np_diff.cc │ │ │ ├── np_diff.cu │ │ │ ├── np_dot-inl.h │ │ │ ├── np_dot_backward.cc │ │ │ ├── np_dot_backward.cu │ │ │ ├── np_dot_forward.cc │ │ │ ├── np_dot_forward.cu │ │ │ ├── np_ediff1d_op-inl.h │ │ │ ├── np_ediff1d_op.cc │ │ │ ├── np_ediff1d_op.cu │ │ │ ├── np_einsum_op-inl.h │ │ │ ├── np_einsum_op.cc │ │ │ ├── np_einsum_op.cu │ │ │ ├── np_einsum_path_op-inl.h │ │ │ ├── np_elemwise_broadcast_logic_op.h │ │ │ ├── np_elemwise_broadcast_logic_op_and.cc │ │ │ ├── np_elemwise_broadcast_logic_op_and.cu │ │ │ ├── np_elemwise_broadcast_logic_op_equal.cc │ │ │ ├── np_elemwise_broadcast_logic_op_equal.cu │ │ │ ├── np_elemwise_broadcast_logic_op_greater.cc │ │ │ ├── np_elemwise_broadcast_logic_op_greater.cu │ │ │ ├── np_elemwise_broadcast_logic_op_greater_equal.cc │ │ │ ├── np_elemwise_broadcast_logic_op_greater_equal.cu │ │ │ ├── np_elemwise_broadcast_logic_op_less.cc │ │ │ ├── np_elemwise_broadcast_logic_op_less.cu │ │ │ ├── np_elemwise_broadcast_logic_op_less_equal.cc │ │ │ ├── np_elemwise_broadcast_logic_op_less_equal.cu │ │ │ ├── np_elemwise_broadcast_logic_op_not_equal.cc │ │ │ ├── np_elemwise_broadcast_logic_op_not_equal.cu │ │ │ ├── np_elemwise_broadcast_logic_op_or.cc │ │ │ ├── np_elemwise_broadcast_logic_op_or.cu │ │ │ ├── np_elemwise_broadcast_logic_op_xor.cc │ │ │ ├── np_elemwise_broadcast_logic_op_xor.cu │ │ │ ├── np_elemwise_broadcast_op.h │ │ │ ├── np_elemwise_broadcast_op_add.cc │ │ │ ├── np_elemwise_broadcast_op_add.cu │ │ │ ├── np_elemwise_broadcast_op_extended.cc │ │ │ ├── np_elemwise_broadcast_op_extended.cu │ │ │ ├── np_elemwise_broadcast_op_extended_sec.cc │ │ │ ├── np_elemwise_broadcast_op_extended_sec.cu │ │ │ ├── np_elemwise_broadcast_op_extended_thi.cc │ │ │ ├── np_elemwise_broadcast_op_extended_thi.cu │ │ │ ├── np_elemwise_broadcast_op_lae.cc │ │ │ ├── np_elemwise_broadcast_op_lae.cu │ │ │ ├── np_elemwise_broadcast_op_mod.cc │ │ │ ├── np_elemwise_broadcast_op_mod.cu │ │ │ ├── np_elemwise_broadcast_op_mul.cc │ │ │ ├── np_elemwise_broadcast_op_mul.cu │ │ │ ├── np_elemwise_broadcast_op_pow.cc │ │ │ ├── np_elemwise_broadcast_op_pow.cu │ │ │ ├── np_elemwise_broadcast_op_scalar.cc │ │ │ ├── np_elemwise_broadcast_op_scalar.cu │ │ │ ├── np_elemwise_broadcast_op_sub.cc │ │ │ ├── np_elemwise_broadcast_op_sub.cu │ │ │ ├── np_elemwise_unary_op_basic.cc │ │ │ ├── np_elemwise_unary_op_basic.cu │ │ │ ├── np_fill_diagonal_op-inl.h │ │ │ ├── np_fill_diagonal_op.cc │ │ │ ├── np_fill_diagonal_op.cu │ │ │ ├── np_floor_divide.cc │ │ │ ├── np_floor_divide.cu │ │ │ ├── np_indexing_op.cc │ │ │ ├── np_indexing_op.cu │ │ │ ├── np_indexing_op.h │ │ │ ├── np_init_op.cc │ │ │ ├── np_init_op.cu │ │ │ ├── np_init_op.h │ │ │ ├── np_insert_op-inl.h │ │ │ ├── np_insert_op_scalar-inl.h │ │ │ ├── np_insert_op_scalar.cc │ │ │ ├── np_insert_op_scalar.cu │ │ │ ├── np_insert_op_slice-inl.h │ │ │ ├── np_insert_op_slice.cc │ │ │ ├── np_insert_op_slice.cu │ │ │ ├── np_insert_op_tensor-inl.h │ │ │ ├── np_insert_op_tensor.cc │ │ │ ├── np_insert_op_tensor.cu │ │ │ ├── np_interp_op-inl.h │ │ │ ├── np_interp_op.cc │ │ │ ├── np_interp_op.cu │ │ │ ├── np_kron-inl.h │ │ │ ├── np_kron_backward.cc │ │ │ ├── np_kron_backward.cu │ │ │ ├── np_kron_forward.cc │ │ │ ├── np_kron_forward.cu │ │ │ ├── np_matmul_op-inl.h │ │ │ ├── np_matmul_op.cc │ │ │ ├── np_matmul_op.cu │ │ │ ├── np_matrix_op-inl.h │ │ │ ├── np_matrix_op.cc │ │ │ ├── np_matrix_op.cu │ │ │ ├── np_memory_op.cc │ │ │ ├── np_memory_op.cu │ │ │ ├── np_memory_op.h │ │ │ ├── np_moments_op.cc │ │ │ ├── np_moments_op.cu │ │ │ ├── np_nonzero_op-inl.h │ │ │ ├── np_nonzero_op.cc │ │ │ ├── np_nonzero_op.cu │ │ │ ├── np_pad_op-inl.h │ │ │ ├── np_pad_op.cc │ │ │ ├── np_pad_op.cu │ │ │ ├── np_percentile_op-inl.h │ │ │ ├── np_percentile_op.cc │ │ │ ├── np_percentile_op.cu │ │ │ ├── np_polynomial_op-inl.h │ │ │ ├── np_polynomial_op.cc │ │ │ ├── np_polynomial_op.cu │ │ │ ├── np_repeat_op-inl.h │ │ │ ├── np_repeat_op.cc │ │ │ ├── np_repeat_op.cu │ │ │ ├── np_tensordot_op-inl.h │ │ │ ├── np_tensordot_op.cc │ │ │ ├── np_tensordot_op.cu │ │ │ ├── np_trace_op-inl.h │ │ │ ├── np_trace_op.cc │ │ │ ├── np_trace_op.cu │ │ │ ├── np_tri_op-inl.h │ │ │ ├── np_tri_op.cc │ │ │ ├── np_tri_op.cu │ │ │ ├── np_tril_op-inl.h │ │ │ ├── np_tril_op.cc │ │ │ ├── np_tril_op.cu │ │ │ ├── np_triu_op-inl.h │ │ │ ├── np_triu_op.cc │ │ │ ├── np_triu_op.cu │ │ │ ├── np_true_divide-inl.h │ │ │ ├── np_true_divide.cc │ │ │ ├── np_true_divide.cu │ │ │ ├── np_unique_op.cc │ │ │ ├── np_unique_op.cu │ │ │ ├── np_unique_op.h │ │ │ ├── np_where_backward_op.cc │ │ │ ├── np_where_backward_op.cu │ │ │ ├── np_where_forward_op.cc │ │ │ ├── np_where_forward_op.cu │ │ │ ├── np_where_op-inl.h │ │ │ ├── np_window_op.cc │ │ │ ├── np_window_op.cu │ │ │ ├── np_window_op.h │ │ │ └── random/ │ │ │ ├── dist_common.cc │ │ │ ├── dist_common.cu │ │ │ ├── dist_common.h │ │ │ ├── np_bernoulli_op.cc │ │ │ ├── np_bernoulli_op.cu │ │ │ ├── np_bernoulli_op.h │ │ │ ├── np_choice_op.cc │ │ │ ├── np_choice_op.cu │ │ │ ├── np_choice_op.h │ │ │ ├── np_exponential_op.cc │ │ │ ├── np_exponential_op.cu │ │ │ ├── np_exponential_op.h │ │ │ ├── np_gamma_op.cc │ │ │ ├── np_gamma_op.cu │ │ │ ├── np_gamma_op.h │ │ │ ├── np_laplace_op.cc │ │ │ ├── np_laplace_op.cu │ │ │ ├── np_laplace_op.h │ │ │ ├── np_location_scale_op.cc │ │ │ ├── np_location_scale_op.cu │ │ │ ├── np_location_scale_op.h │ │ │ ├── np_multinomial_op.cc │ │ │ ├── np_multinomial_op.cu │ │ │ ├── np_multinomial_op.h │ │ │ ├── np_normal_op.cc │ │ │ ├── np_normal_op.cu │ │ │ ├── np_normal_op.h │ │ │ ├── np_pareto_op.cc │ │ │ ├── np_pareto_op.cu │ │ │ ├── np_pareto_op.h │ │ │ ├── np_power_op.cc │ │ │ ├── np_power_op.cu │ │ │ ├── np_power_op.h │ │ │ ├── np_rayleigh_op.cc │ │ │ ├── np_rayleigh_op.cu │ │ │ ├── np_rayleigh_op.h │ │ │ ├── np_uniform_op.cc │ │ │ ├── np_uniform_op.cu │ │ │ ├── np_uniform_op.h │ │ │ ├── np_weibull_op.cc │ │ │ ├── np_weibull_op.cu │ │ │ └── np_weibull_op.h │ │ ├── operator.cc │ │ ├── operator_common.h │ │ ├── operator_tune-inl.h │ │ ├── operator_tune.cc │ │ ├── operator_tune.h │ │ ├── operator_util.cc │ │ ├── optimizer_op-inl.h │ │ ├── optimizer_op.cc │ │ ├── optimizer_op.cu │ │ ├── pad-inl.h │ │ ├── pad.cc │ │ ├── pad.cu │ │ ├── quantization/ │ │ │ ├── calibrate-inl.h │ │ │ ├── calibrate.cc │ │ │ ├── dequantize-inl.h │ │ │ ├── dequantize.cc │ │ │ ├── dequantize.cu │ │ │ ├── dnnl/ │ │ │ │ ├── dnnl_dequantize-inl.h │ │ │ │ ├── dnnl_quantize-inl.h │ │ │ │ ├── dnnl_quantize_asym-inl.h │ │ │ │ ├── dnnl_quantize_v2-inl.h │ │ │ │ ├── dnnl_quantized_act.cc │ │ │ │ ├── dnnl_quantized_batch_norm.cc │ │ │ │ ├── dnnl_quantized_concat.cc │ │ │ │ ├── dnnl_quantized_conv.cc │ │ │ │ ├── dnnl_quantized_elemwise_add.cc │ │ │ │ ├── dnnl_quantized_flatten.cc │ │ │ │ ├── dnnl_quantized_fully_connected.cc │ │ │ │ ├── dnnl_quantized_ops-inl.h │ │ │ │ ├── dnnl_quantized_pooling.cc │ │ │ │ ├── dnnl_quantized_reshape.cc │ │ │ │ ├── dnnl_quantized_rnn-inl.h │ │ │ │ ├── dnnl_quantized_rnn.cc │ │ │ │ ├── dnnl_quantized_transpose.cc │ │ │ │ └── dnnl_requantize-inl.h │ │ │ ├── quantization_utils.h │ │ │ ├── quantize-inl.h │ │ │ ├── quantize.cc │ │ │ ├── quantize.cu │ │ │ ├── quantize_asym-inl.h │ │ │ ├── quantize_asym.cc │ │ │ ├── quantize_graph_pass.cc │ │ │ ├── quantize_v2-inl.h │ │ │ ├── quantize_v2.cc │ │ │ ├── quantize_v2.cu │ │ │ ├── quantized_activation.cc │ │ │ ├── quantized_batch_norm.cc │ │ │ ├── quantized_batch_norm_relu.cc │ │ │ ├── quantized_concat.cc │ │ │ ├── quantized_conv.cc │ │ │ ├── quantized_conv.cu │ │ │ ├── quantized_elemwise_add-inl.h │ │ │ ├── quantized_elemwise_add.cc │ │ │ ├── quantized_elemwise_mul-inl.h │ │ │ ├── quantized_elemwise_mul.cc │ │ │ ├── quantized_flatten-inl.h │ │ │ ├── quantized_flatten.cc │ │ │ ├── quantized_flatten.cu │ │ │ ├── quantized_fully_connected.cc │ │ │ ├── quantized_fully_connected.cu │ │ │ ├── quantized_indexing_op.cc │ │ │ ├── quantized_pooling.cc │ │ │ ├── quantized_pooling.cu │ │ │ ├── quantized_reshape-inl.h │ │ │ ├── quantized_reshape.cc │ │ │ ├── quantized_rnn-inl.h │ │ │ ├── quantized_rnn.cc │ │ │ ├── quantized_transpose.cc │ │ │ ├── requantize-inl.h │ │ │ ├── requantize.cc │ │ │ └── requantize.cu │ │ ├── random/ │ │ │ ├── multisample_op.cc │ │ │ ├── multisample_op.cu │ │ │ ├── multisample_op.h │ │ │ ├── pdf_op.cc │ │ │ ├── pdf_op.cu │ │ │ ├── pdf_op.h │ │ │ ├── sample_multinomial_op.cc │ │ │ ├── sample_multinomial_op.cu │ │ │ ├── sample_multinomial_op.h │ │ │ ├── sample_op.cc │ │ │ ├── sample_op.cu │ │ │ ├── sample_op.h │ │ │ ├── sampler.h │ │ │ ├── shuffle_op.cc │ │ │ ├── shuffle_op.cu │ │ │ ├── unique_sample_op.cc │ │ │ └── unique_sample_op.h │ │ ├── regression_output-inl.h │ │ ├── regression_output.cc │ │ ├── regression_output.cu │ │ ├── rnn-inl.h │ │ ├── rnn.cc │ │ ├── rnn.cu │ │ ├── rnn_impl.h │ │ ├── roi_pooling-inl.h │ │ ├── roi_pooling.cc │ │ ├── roi_pooling.cu │ │ ├── sequence_last-inl.h │ │ ├── sequence_last.cc │ │ ├── sequence_last.cu │ │ ├── sequence_mask-inl.h │ │ ├── sequence_mask.cc │ │ ├── sequence_mask.cu │ │ ├── sequence_op_common.h │ │ ├── sequence_reverse-inl.h │ │ ├── sequence_reverse.cc │ │ ├── sequence_reverse.cu │ │ ├── slice_channel-inl.h │ │ ├── slice_channel.cc │ │ ├── slice_channel.cu │ │ ├── softmax_output-inl.h │ │ ├── softmax_output.cc │ │ ├── softmax_output.cu │ │ ├── spatial_transformer-inl.h │ │ ├── spatial_transformer.cc │ │ ├── spatial_transformer.cu │ │ ├── special_functions-inl.h │ │ ├── subgraph/ │ │ │ ├── build_subgraph.cc │ │ │ ├── common.h │ │ │ ├── default_subgraph_property.cc │ │ │ ├── default_subgraph_property_v2.cc │ │ │ ├── dnnl/ │ │ │ │ ├── dnnl_batch_dot.cc │ │ │ │ ├── dnnl_batch_dot_property.h │ │ │ │ ├── dnnl_bn_relu.cc │ │ │ │ ├── dnnl_bn_relu_property.h │ │ │ │ ├── dnnl_common.h │ │ │ │ ├── dnnl_conv-inl.h │ │ │ │ ├── dnnl_conv.cc │ │ │ │ ├── dnnl_conv_property.h │ │ │ │ ├── dnnl_fc-inl.h │ │ │ │ ├── dnnl_fc.cc │ │ │ │ ├── dnnl_fc_property.h │ │ │ │ ├── dnnl_fc_sum_fuse_property.h │ │ │ │ ├── dnnl_identity_property.h │ │ │ │ ├── dnnl_post_amp_property.h │ │ │ │ ├── dnnl_post_quantize_align_scale_property.h │ │ │ │ ├── dnnl_post_quantize_property.h │ │ │ │ ├── dnnl_pow_mul_scalar.cc │ │ │ │ ├── dnnl_pow_mul_scalar_property.h │ │ │ │ ├── dnnl_remove_casts_property.h │ │ │ │ ├── dnnl_subgraph_base-inl.h │ │ │ │ ├── dnnl_subgraph_property.cc │ │ │ │ ├── dnnl_transformer-inl.h │ │ │ │ ├── dnnl_transformer.cc │ │ │ │ ├── dnnl_transformer_qk_common.h │ │ │ │ ├── dnnl_transformer_qk_property.h │ │ │ │ └── dnnl_transformer_valatt_property.h │ │ │ ├── eliminate_common_nodes_pass.cc │ │ │ ├── partitioner/ │ │ │ │ └── custom_subgraph_property.h │ │ │ ├── static_shape_subgraph_property.cc │ │ │ ├── subgraph_property.h │ │ │ └── tensorrt/ │ │ │ ├── nnvm_to_onnx-inl.h │ │ │ ├── nnvm_to_onnx.cc │ │ │ ├── onnx_to_tensorrt.cc │ │ │ ├── onnx_to_tensorrt.h │ │ │ ├── tensorrt-inl.h │ │ │ ├── tensorrt.cc │ │ │ └── tensorrt.cu │ │ ├── subgraph_op_common.cc │ │ ├── subgraph_op_common.h │ │ ├── svm_output-inl.h │ │ ├── svm_output.cc │ │ ├── svm_output.cu │ │ ├── swapaxis-inl.h │ │ ├── swapaxis.cc │ │ ├── swapaxis.cu │ │ ├── tensor/ │ │ │ ├── amp_cast.cc │ │ │ ├── amp_cast.cu │ │ │ ├── amp_cast.h │ │ │ ├── broadcast_reduce-inl.h │ │ │ ├── broadcast_reduce_minmax_value.cc │ │ │ ├── broadcast_reduce_minmax_value.cu │ │ │ ├── broadcast_reduce_norm_value.cc │ │ │ ├── broadcast_reduce_norm_value.cu │ │ │ ├── broadcast_reduce_op.cc │ │ │ ├── broadcast_reduce_op.h │ │ │ ├── broadcast_reduce_op_index.cc │ │ │ ├── broadcast_reduce_op_index.cu │ │ │ ├── broadcast_reduce_op_value.cc │ │ │ ├── broadcast_reduce_op_value.cu │ │ │ ├── broadcast_reduce_prod_value.cc │ │ │ ├── broadcast_reduce_prod_value.cu │ │ │ ├── broadcast_reduce_sum_value.cc │ │ │ ├── broadcast_reduce_sum_value.cu │ │ │ ├── cast_storage-inl.cuh │ │ │ ├── cast_storage-inl.h │ │ │ ├── cast_storage.cc │ │ │ ├── cast_storage.cu │ │ │ ├── control_flow_op.cc │ │ │ ├── control_flow_op.cu │ │ │ ├── control_flow_op.h │ │ │ ├── diag_op-inl.h │ │ │ ├── diag_op.cc │ │ │ ├── diag_op.cu │ │ │ ├── dot-inl.cuh │ │ │ ├── dot-inl.h │ │ │ ├── dot.cc │ │ │ ├── dot.cu │ │ │ ├── elemwise_binary_broadcast_op.cc │ │ │ ├── elemwise_binary_broadcast_op.h │ │ │ ├── elemwise_binary_broadcast_op_basic.cc │ │ │ ├── elemwise_binary_broadcast_op_basic.cu │ │ │ ├── elemwise_binary_broadcast_op_extended.cc │ │ │ ├── elemwise_binary_broadcast_op_extended.cu │ │ │ ├── elemwise_binary_broadcast_op_logic.cc │ │ │ ├── elemwise_binary_broadcast_op_logic.cu │ │ │ ├── elemwise_binary_op-inl.h │ │ │ ├── elemwise_binary_op.cc │ │ │ ├── elemwise_binary_op.h │ │ │ ├── elemwise_binary_op_basic.cc │ │ │ ├── elemwise_binary_op_basic.cu │ │ │ ├── elemwise_binary_op_extended.cc │ │ │ ├── elemwise_binary_op_extended.cu │ │ │ ├── elemwise_binary_op_logic.cc │ │ │ ├── elemwise_binary_op_logic.cu │ │ │ ├── elemwise_binary_scalar_op.cc │ │ │ ├── elemwise_binary_scalar_op.h │ │ │ ├── elemwise_binary_scalar_op_basic.cc │ │ │ ├── elemwise_binary_scalar_op_basic.cu │ │ │ ├── elemwise_binary_scalar_op_extended.cc │ │ │ ├── elemwise_binary_scalar_op_extended.cu │ │ │ ├── elemwise_binary_scalar_op_logic.cc │ │ │ ├── elemwise_binary_scalar_op_logic.cu │ │ │ ├── elemwise_sum.cc │ │ │ ├── elemwise_sum.cu │ │ │ ├── elemwise_sum.h │ │ │ ├── elemwise_unary_op.cc │ │ │ ├── elemwise_unary_op.h │ │ │ ├── elemwise_unary_op_basic.cc │ │ │ ├── elemwise_unary_op_basic.cu │ │ │ ├── elemwise_unary_op_logexp.cc │ │ │ ├── elemwise_unary_op_logexp.cu │ │ │ ├── elemwise_unary_op_pow.cc │ │ │ ├── elemwise_unary_op_pow.cu │ │ │ ├── elemwise_unary_op_trig.cc │ │ │ ├── elemwise_unary_op_trig.cu │ │ │ ├── histogram-inl.h │ │ │ ├── histogram.cc │ │ │ ├── histogram.cu │ │ │ ├── index_add-inl.h │ │ │ ├── index_add_backward.cc │ │ │ ├── index_add_backward.cu │ │ │ ├── index_add_forward.cc │ │ │ ├── index_add_forward.cu │ │ │ ├── index_update-inl.h │ │ │ ├── index_update.cc │ │ │ ├── index_update.cu │ │ │ ├── indexing_op-inl.cuh │ │ │ ├── indexing_op.cc │ │ │ ├── indexing_op.cu │ │ │ ├── indexing_op.h │ │ │ ├── init_op.cc │ │ │ ├── init_op.cu │ │ │ ├── init_op.h │ │ │ ├── la_op-inl.h │ │ │ ├── la_op.cc │ │ │ ├── la_op.cu │ │ │ ├── la_op.h │ │ │ ├── matrix_op-inl.h │ │ │ ├── matrix_op.cc │ │ │ ├── matrix_op.cu │ │ │ ├── ordering_op-inl.h │ │ │ ├── ordering_op.cc │ │ │ ├── ordering_op.cu │ │ │ ├── pseudo2DTranspose_op-inl.cuh │ │ │ ├── ravel.cc │ │ │ ├── ravel.cu │ │ │ ├── ravel.h │ │ │ ├── reduce_rtc.cc │ │ │ ├── slice-inl.h │ │ │ ├── sort_op-inl.cuh │ │ │ ├── sort_op.h │ │ │ ├── sparse_retain-inl.h │ │ │ ├── sparse_retain.cc │ │ │ ├── sparse_retain.cu │ │ │ ├── square_sum-inl.h │ │ │ ├── square_sum.cc │ │ │ ├── square_sum.cu │ │ │ └── util/ │ │ │ ├── tensor_util-inl.cuh │ │ │ └── tensor_util-inl.h │ │ └── tvmop/ │ │ ├── op_module.cc │ │ └── op_module.h │ ├── optimizer/ │ │ └── sgd-inl.h │ ├── profiler/ │ │ ├── aggregate_stats.cc │ │ ├── aggregate_stats.h │ │ ├── custom_op_profiler.h │ │ ├── profiler.cc │ │ ├── profiler.h │ │ ├── storage_profiler.cc │ │ ├── storage_profiler.h │ │ ├── vtune.cc │ │ └── vtune.h │ ├── resource.cc │ ├── runtime/ │ │ ├── c_runtime_api.cc │ │ ├── container.cc │ │ ├── ndarray_handle.cc │ │ ├── object.cc │ │ ├── object_internal.h │ │ └── registry.cc │ ├── serialization/ │ │ ├── cnpy.cc │ │ └── cnpy.h │ └── storage/ │ ├── cpu_device_storage.h │ ├── cpu_shared_storage_manager.h │ ├── gpu_device_storage.h │ ├── naive_storage_manager.h │ ├── pinned_memory_storage.h │ ├── pooled_storage_manager.h │ ├── storage.cc │ ├── storage_manager.h │ └── storage_manager_helpers.h ├── tests/ │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── cpp/ │ │ ├── .gitignore │ │ ├── engine/ │ │ │ ├── engine_shutdown_test.cc │ │ │ ├── omp_test.cc │ │ │ ├── thread_local_test.cc │ │ │ └── threaded_engine_test.cc │ │ ├── include/ │ │ │ ├── test_core_op.h │ │ │ ├── test_dnnl.h │ │ │ ├── test_legacy_op.h │ │ │ ├── test_ndarray_utils.h │ │ │ ├── test_op.h │ │ │ ├── test_op_runner.h │ │ │ ├── test_perf.h │ │ │ ├── test_tune.h │ │ │ └── test_util.h │ │ ├── kvstore/ │ │ │ └── gpu_topology_test.cc │ │ ├── misc/ │ │ │ ├── base.cc │ │ │ └── libinfo_test.cc │ │ ├── operator/ │ │ │ ├── activation_perf.cc │ │ │ ├── batchnorm_test.cc │ │ │ ├── coreop_perf.cc │ │ │ ├── dnnl_operator_test.cc │ │ │ ├── dnnl_test.cc │ │ │ ├── dropout_perf.cc │ │ │ ├── fully_conn_perf.cc │ │ │ ├── krprod_test.cc │ │ │ ├── runner/ │ │ │ │ └── core_op_runner_test.cc │ │ │ ├── slice_channel_perf.cc │ │ │ └── tune/ │ │ │ └── operator_tune_test.cc │ │ ├── storage/ │ │ │ └── storage_test.cc │ │ └── test_main.cc │ ├── nightly/ │ │ ├── .gitignore │ │ ├── Jenkinsfile │ │ ├── JenkinsfileForBinaries │ │ ├── README.md │ │ ├── TestDoc/ │ │ │ ├── doc_spell_checker.py │ │ │ └── doc_spell_grammar.sh │ │ ├── common.py │ │ ├── dist_async_kvstore.py │ │ ├── dist_device_sync_kvstore.py │ │ ├── dist_device_sync_kvstore_byteps.py │ │ ├── dist_device_sync_kvstore_custom.py │ │ ├── dist_device_sync_kvstore_horovod.py │ │ ├── dist_sync_kvstore.py │ │ ├── estimator/ │ │ │ ├── test_estimator_cnn.py │ │ │ └── test_sentiment_rnn.py │ │ ├── model_backwards_compatibility_check/ │ │ │ ├── JenkinsfileForMBCC │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── model_backward_compat_checker.sh │ │ │ ├── model_backwards_compat_inference.py │ │ │ ├── model_backwards_compat_train.py │ │ │ ├── train_mxnet_legacy_models.sh │ │ │ └── upload_models_to_s3.sh │ │ ├── test_distributed_training-gpu.sh │ │ ├── test_kvstore.py │ │ ├── test_large_array.py │ │ ├── test_large_vector.py │ │ ├── test_np_large_array.py │ │ ├── test_np_random.py │ │ └── test_server_profiling.py │ ├── python/ │ │ ├── README.md │ │ ├── amp/ │ │ │ └── common.py │ │ ├── array-api/ │ │ │ └── test_data_interchange.py │ │ ├── common/ │ │ │ └── models.py │ │ ├── conftest.py │ │ ├── dnnl/ │ │ │ ├── op_cfg.py │ │ │ ├── subgraphs/ │ │ │ │ ├── subgraph_common.py │ │ │ │ ├── test_amp_subgraph.py │ │ │ │ ├── test_conv_subgraph.py │ │ │ │ ├── test_fc_subgraph.py │ │ │ │ ├── test_matmul_subgraph.py │ │ │ │ └── test_pow_mul_subgraph.py │ │ │ ├── test_amp.py │ │ │ ├── test_bf16_operator.py │ │ │ ├── test_dnnl.py │ │ │ └── test_quantization_dnnl.py │ │ ├── doctest/ │ │ │ └── test_docstring.py │ │ ├── gpu/ │ │ │ ├── test_amp.py │ │ │ ├── test_amp_init.py │ │ │ ├── test_deferred_compute_gpu.py │ │ │ ├── test_device.py │ │ │ ├── test_extensions_gpu.py │ │ │ ├── test_fusion.py │ │ │ ├── test_gluon_gpu.py │ │ │ ├── test_gluon_model_zoo_gpu.py │ │ │ ├── test_gluon_transforms.py │ │ │ ├── test_kvstore_gpu.py │ │ │ ├── test_nccl.py │ │ │ ├── test_numpy_einsum.py │ │ │ ├── test_numpy_fallback.py │ │ │ ├── test_operator_gpu.py │ │ │ ├── test_profiler_gpu.py │ │ │ ├── test_rtc.py │ │ │ ├── test_tvm_bridge.py │ │ │ └── test_tvm_op_gpu.py │ │ ├── onnx/ │ │ │ ├── test_models.py │ │ │ └── test_operators.py │ │ ├── profiling/ │ │ │ ├── simple_forward.py │ │ │ └── test_nvtx.py │ │ ├── quantization/ │ │ │ └── test_quantization.py │ │ ├── test_quantization_gpu.py │ │ ├── train/ │ │ │ ├── common.py │ │ │ └── test_autograd.py │ │ └── unittest/ │ │ ├── common.py │ │ ├── legacy_ndarray.v0 │ │ ├── test_attr.py │ │ ├── test_autograd.py │ │ ├── test_base.py │ │ ├── test_contrib_control_flow.py │ │ ├── test_contrib_gluon_data_vision.py │ │ ├── test_contrib_hawkesll.py │ │ ├── test_contrib_intgemm.py │ │ ├── test_contrib_io.py │ │ ├── test_contrib_krprod.py │ │ ├── test_contrib_operator.py │ │ ├── test_contrib_optimizer.py │ │ ├── test_contrib_stes_op.py │ │ ├── test_deferred_compute.py │ │ ├── test_dgl_graph.py │ │ ├── test_dynamic_shape.py │ │ ├── test_engine.py │ │ ├── test_engine_import.py │ │ ├── test_exc_handling.py │ │ ├── test_executor.py │ │ ├── test_extensions.py │ │ ├── test_ffi_container.py │ │ ├── test_gluon.py │ │ ├── test_gluon_batch_processor.py │ │ ├── test_gluon_control_flow.py │ │ ├── test_gluon_data.py │ │ ├── test_gluon_estimator.py │ │ ├── test_gluon_event_handler.py │ │ ├── test_gluon_indexing.py │ │ ├── test_gluon_model_zoo.py │ │ ├── test_gluon_probability_v2.py │ │ ├── test_gluon_rnn.py │ │ ├── test_gluon_save.py │ │ ├── test_gluon_trainer.py │ │ ├── test_gluon_utils.py │ │ ├── test_higher_order_grad.py │ │ ├── test_image.py │ │ ├── test_infer_shape.py │ │ ├── test_infer_type.py │ │ ├── test_io.py │ │ ├── test_kvstore.py │ │ ├── test_kvstore_custom.py │ │ ├── test_loss.py │ │ ├── test_memory_opt.py │ │ ├── test_metric.py │ │ ├── test_ndarray.py │ │ ├── test_numpy_contrib_gluon_data_vision.py │ │ ├── test_numpy_default_dtype.py │ │ ├── test_numpy_gluon.py │ │ ├── test_numpy_gluon_data_vision.py │ │ ├── test_numpy_interoperability.py │ │ ├── test_numpy_loss.py │ │ ├── test_numpy_ndarray.py │ │ ├── test_numpy_op.py │ │ ├── test_operator.py │ │ ├── test_optimizer.py │ │ ├── test_profiler.py │ │ ├── test_random.py │ │ ├── test_recordio.py │ │ ├── test_runtime.py │ │ ├── test_smoke.py │ │ ├── test_sparse_ndarray.py │ │ ├── test_sparse_operator.py │ │ ├── test_subgraph.py │ │ ├── test_subgraph_op.py │ │ ├── test_symbol.py │ │ ├── test_test_utils.py │ │ ├── test_thread_local.py │ │ ├── test_tvm_op.py │ │ └── test_viz.py │ ├── tutorials/ │ │ ├── test_sanity_tutorials.py │ │ └── test_tutorials.py │ └── utils/ │ └── notebook_test/ │ └── __init__.py └── tools/ ├── bandwidth/ │ ├── .gitignore │ ├── README.md │ ├── measure.py │ └── test_measure.py ├── cfn/ │ └── Readme.md ├── create_source_archive.sh ├── dependencies/ │ ├── LICENSE.binary.dependencies │ ├── README.md │ ├── cityhash.sh │ ├── curl.sh │ ├── eigen.sh │ ├── libpng.sh │ ├── libtiff.sh │ ├── libturbojpeg.sh │ ├── libz.sh │ ├── lz4.sh │ ├── make_shared_dependencies.sh │ ├── mkl.sh │ ├── numpy_mkl.sh │ ├── openblas.sh │ ├── opencv.sh │ ├── openssl.sh │ ├── patch/ │ │ └── opencv_lapack.h │ ├── protobuf.sh │ └── zmq.sh ├── diagnose.py ├── flakiness_checker.py ├── git-pre-commit ├── im2rec.cc ├── im2rec.py ├── ipynb2md.py ├── kill-mxnet.py ├── launch.py ├── license_header.py ├── lint/ │ ├── clang_format_ci.sh │ └── git-clang-format-13 ├── parse_log.py ├── pip/ │ ├── MANIFEST.in │ ├── doc/ │ │ ├── CPU_ADDITIONAL.md │ │ ├── CU101_ADDITIONAL.md │ │ ├── CU102_ADDITIONAL.md │ │ ├── CU110_ADDITIONAL.md │ │ ├── CU112_ADDITIONAL.md │ │ ├── NATIVE_ADDITIONAL.md │ │ └── PYPI_README.md │ ├── sanity_test.py │ └── setup.py ├── profile/ │ └── tune_python.sh ├── rec2idx.py ├── staticbuild/ │ ├── README.md │ ├── build.sh │ ├── build_lib.sh │ └── build_wheel.sh └── windowsbuild/ ├── README.md ├── gen_warp.cpp └── warp_dll.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .asf.yaml ================================================ notifications: commits: commits@mxnet.apache.org issues: issues@mxnet.apache.org pullrequests: commits@mxnet.apache.org github: features: wiki: true issues: true projects: true enabled_merge_buttons: squash: true merge: false rebase: true ================================================ FILE: .clang-format ================================================ # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. --- Language: Cpp BasedOnStyle: Google ColumnLimit: 100 AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: false AlignConsecutiveMacros: true DerivePointerAlignment: false SortIncludes: true MaxEmptyLinesToKeep: 1 PointerAlignment: Left AllowAllParametersOfDeclarationOnNextLine: false AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: true BinPackArguments: false BinPackParameters: false SortIncludes: false BreakBeforeTernaryOperators: false --- Language: JavaScript DisableFormat: true ================================================ FILE: .clang-tidy ================================================ # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # The checks defined here will be run and will display by default as warnings. Checks: > -*, cppcoreguidelines-* clang-analyzer-*, modernize-*, performance-faster-string-find, performance-for-range-copy, performance-implicit-conversion-in-loop, performance-inefficient-algorithm, performance-inefficient-string-concatenation, performance-trivially-destructible, performance-inefficient-vector-operation, performance-move-const-arg, performance-move-constructor-init, performance-noexcept-move-constructor, performance-no-automatic-move, performance-unnecessary-copy-initialization, performance-type-promotion-in-math-fn # performance checks not enabled due to segmentation fault in clang-tidy v8+: # performance-unnecessary-value-param # In order to trigger an error, you must have a rule defined both in checks and in this section. WarningsAsErrors: > cppcoreguidelines-no-malloc, modernize-deprecated-headers, modernize-loop-convert, modernize-make-shared, modernize-pass-by-value, modernize-make-unique, modernize-raw-string-literal, modernize-redundant-void-arg, modernize-replace-auto-ptr, modernize-replace-random-shuffle, modernize-return-braced-init-list, modernize-shrink-to-fit, modernize-unary-static-assert, modernize-use-bool-literals, modernize-use-default-member-init, modernize-use-emplace, modernize-use-equals-default, modernize-use-equals-delete, modernize-use-noexcept, modernize-use-nullptr, modernize-use-override, modernize-use-transparent-functors, modernize-use-using, performance-faster-string-find, performance-implicit-conversion-in-loop, performance-inefficient-algorithm, performance-inefficient-string-concatenation, performance-trivially-destructible, performance-inefficient-vector-operation, performance-move-const-arg, performance-move-constructor-init, performance-noexcept-move-constructor, performance-no-automatic-move, performance-unnecessary-copy-initialization, performance-type-promotion-in-math-fn # modernize checks not enforced: # modernize-use-auto # modernize-avoid-bind # performance checks not enforced due to segmentation fault # performance-for-range-copy # Todo: define a better regex match that includes most project headers, but excludes third party # code. HeaderFilterRegex: '^src/.*' ================================================ FILE: .cmakelintrc ================================================ # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # build and install are separated so changes to build don't invalidate # the whole docker cache for the image # --filter= options: https://pypi.org/project/cmakelint/ # "-" disable option # "+" enable option filter=-convention/filename,-linelength,-package/consistency,-readability/logic,-readability/mixedcase,-readability/wonkycase,-syntax,-whitespace/eol,+whitespace/extra,-whitespace/indent,-whitespace/mismatch,-whitespace/newline,-whitespace/tabs ================================================ FILE: .codecov.yml ================================================ # Codecov.io configuration file # See https://docs.codecov.io/docs/codecovyml-reference codecov: notify: require_ci_to_pass: yes coverage: status: project: off patch: on precision: 2 round: down range: "70...100" parsers: gcov: branch_detection: conditional: yes loop: yes method: no macro: no ignore: - "tests/**/*" # Disable comments for now to gather data in the background comment: false # layout: "header, diff" # behavior: default # require_changes: no ================================================ FILE: .git-blame-ignore-revs ================================================ # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # Clang-formatter initial commit - /src directory is formatted e359bcd65e453d4bc86d3d8e5b1dee3916a2e426 # Clang-formatter initial commit - OneDNN files 718a860f3aa8f24acca2aec867a3b31bc60a6e79 ================================================ FILE: .gitattributes ================================================ .gitattributes export-ignore R-package/* export-ignore ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: 'Bug, needs triage' assignees: '' --- ## Description (A clear and concise description of what the bug is.) ### Error Message (Paste the complete error message. Please also include stack trace by setting environment variable `DMLC_LOG_STACK_TRACE_DEPTH=100` before running your script.) ## To Reproduce (If you developed your own code, please provide a short script that reproduces the error. For existing examples, please provide link.) ### Steps to reproduce (Paste the commands you ran that produced the error.) 1. 2. ## What have you tried to solve it? 1. 2. ## Environment ***We recommend using our script for collecting the diagnostic information with the following command*** `curl --retry 10 -s https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/diagnose.py | python3`
Environment Information ``` # Paste the diagnose.py command output here ```
================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: GitHub Discussions url: https://github.com/apache/mxnet/discussions about: Use GitHub Discussions to ask and answer questions, exchange ideas, and share learning. - name: Discourse Forum url: https://discuss.mxnet.io/ about: Discuss forum for usage questions. - name: Stack Overflow url: https://stackoverflow.com/questions/tagged/mxnet about: Ask and answer usage questions on Stack Overflow ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: 'Feature request' assignees: '' --- ## Description (A clear and concise description of what the feature is.) - If the proposal is about a new model, provide description of what the model is. - If the proposal is about an API, provide mock examples if possible. ## References - list reference and related literature - list known implementations ================================================ FILE: .github/ISSUE_TEMPLATE/flaky_test.md ================================================ --- name: Flaky test about: Report a flaky test title: '' labels: 'Flaky' assignees: '' --- ## Description (The location and name of the flaky test.) ## Occurrences (Links to the known occurrences.) ## What have you tried to solve it? 1. 2. ================================================ FILE: .github/ISSUE_TEMPLATE/rfc.md ================================================ --- name: Request for comment (RFC) about: RFC process requests for review on the design of a new feature or bug fix that involves more efforts. This thread is automatically mirrored to the dev@mxnet.apache.org mailing list. title: '[RFC] ' labels: 'RFC' assignees: '' --- ## Problem statement (A clear and concise description of what this contribution is trying to solve.) ## Proposed solutions (Description of the approach this contribution takes to solve the problem.) ## References - list reference and related literature - list known implementations ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ ## Description ## (Brief description on what this PR is about) ## Checklist ## ### Essentials ### - [ ] PR's title starts with a category (e.g. [BUGFIX], [MODEL], [TUTORIAL], [FEATURE], [DOC], etc) - [ ] Changes are complete (i.e. I finished coding on this PR) - [ ] All changes have test coverage - [ ] Code is well-documented ### Changes ### - [ ] Feature1, tests, (and when applicable, API doc) - [ ] Feature2, tests, (and when applicable, API doc) ## Comments ## - If this change is a backward incompatible change, why must this change be made. - Interesting edge cases to note here ================================================ FILE: .github/workflows/greetings.yml ================================================ name: Greetings on: [pull_request, issues] jobs: greeting: runs-on: ubuntu-latest steps: - uses: actions/first-interaction@v1 env: GITHUB_PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} GITHUB_PR_RUN_ID: ${{ github.run_id }} GITHUB_PR_BASE_REF: ${{ github.event.pull_request.base.ref }} with: repo-token: ${{ secrets.GITHUB_TOKEN }} issue-message: | Welcome to Apache MXNet (incubating)! We are on a mission to democratize AI, and we are glad that you are contributing to it by opening this issue. Please make sure to include all the relevant context, and one of the @apache/mxnet-committers will be here shortly. If you are interested in contributing to our project, let us know! Also, be sure to check out our guide on [contributing to MXNet](https://mxnet.apache.org/community/contribute) and our [development guides wiki](https://cwiki.apache.org/confluence/display/MXNET/Developments). pr-message: | Welcome to Apache MXNet (incubating)! We are on a mission to democratize AI, and we are glad that you are contributing to it by opening this pull request. Please make sure that the changes are covered by tests. One of the @apache/mxnet-committers will be here shortly. If you run into any issue with the CI and tests, we recommend that you first check out our guide on [developer guides wiki](https://cwiki.apache.org/confluence/display/MXNET/Developments). Let our @apache/mxnet-committers know if you need any help! ================================================ FILE: .github/workflows/license_check.yml ================================================ name: license check on: [push, pull_request] defaults: run: shell: bash jobs: licensecheck: runs-on: ubuntu-latest strategy: fail-fast: false steps: - name: Checkout repository uses: actions/checkout@v2 - name: Update Submodules run: | git submodule update --init --recursive - name: Check License Header uses: apache/skywalking-eyes@main env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/link_check.yml ================================================ name: link check on: [push, pull_request] defaults: run: shell: bash jobs: linkcheck: runs-on: ubuntu-20.04 strategy: fail-fast: false steps: - name: Checkout repository uses: actions/checkout@v2 - name: Compilation cache uses: actions/cache@v2 with: path: ~/.ccache # We include the commit sha in the cache key, as new cache entries are # only created if there is no existing entry for the key yet. key: ${{ runner.os }}-ccache-${{ github.sha }} # Restore any ccache cache entry, if none for # ${{ runner.os }}-ccache-${{ github.sha }} exists restore-keys: | ${{ runner.os }}-ccache - name: Setup python uses: actions/setup-python@v2 with: python-version: '3.8' architecture: x64 - name: Install Dependencies run: | sudo apt-get update sudo apt-get install -y libopenblas-dev ninja-build ccache python3-sphinx \ pandoc gcc-7 g++-7 libopencv-dev protobuf-compiler libprotobuf-dev ccache -M 500M # Limit the ccache size; Github's overall cache limit is 5GB python -m pip install pandoc-attributes==0.1.7 python -m pip install -r docs/python_docs/requirements python -m pip install docs/python_docs/themes/mx-theme shell: bash - name: Build project env: CC: gcc-7 CXX: g++-7 run: | git submodule update --init --recursive mkdir build; cd build CXXFLAGS="-Wno-error=strict-overflow" cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DUSE_ONEDNN=OFF \ -DUSE_CUDA=OFF \ -G Ninja .. ninja cd .. shell: bash - name: Setup Python run: | python -m pip install --user -e python - name: Link Check env: MAX_RETRY: 3 run: | for run in {1..$MAX_RETRY} do cd docs/python_docs/python make clean timeout 10m make linkcheck EVAL=0 if [[ $? -eq 0 ]] then break else if [[ run -eq $MAX_RETRY ]] then exit 1 fi fi done ================================================ FILE: .github/workflows/os_x_mklbuild.yml ================================================ name: mkl continuous build on: [push, pull_request] jobs: macosx-x86_64: runs-on: macos-10.15 steps: - name: Checkout repository uses: actions/checkout@v2 - name: Compilation cache uses: actions/cache@v2 with: path: ~/.ccache # We include the commit sha in the cache key, as new cache entries are # only created if there is no existing entry for the key yet. key: ${{ runner.os }}-ccache-${{ github.sha }} # Restore any ccache cache entry, if none for # ${{ runner.os }}-ccache-${{ github.sha }} exists restore-keys: | ${{ runner.os }}-ccache - name: Setup python uses: actions/setup-python@v2 with: python-version: '3.8' architecture: x64 - name: Install Dependencies run: | brew install nasm automake ninja libtool cmake pkgconfig protobuf hdf5 zlib ccache ccache -M 500M # Limit the ccache size; Github's overall cache limit is 5GB python -m pip install -r ci/docker/install/requirements shell: bash - name: Build project run: | ./tools/staticbuild/build.sh cpu mkl - name: Setup Python run: | python -m pip install --user -e python - name: Test project run: | python -m pytest -n 4 --durations=50 --verbose tests/python/unittest/ -k 'not test_operator and not (test_subgraph or test_custom_op or test_external_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'not serial' MXNET_ENGINE_TYPE=NaiveEngine python -m pytest -n 4 --durations=50 --verbose tests/python/unittest/ -k 'test_operator and not (test_subgraph or test_custom_op or test_external_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'not serial' python -m pytest --durations=50 --verbose tests/python/unittest/ -k 'not (test_subgraph or test_custom_op or test_external_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'serial' python -m pytest -n 4 --durations=50 --verbose tests/python/dnnl -k 'not (test_bf16_operator or test_amp or test_amp_subgraph)' ================================================ FILE: .github/workflows/os_x_staticbuild.yml ================================================ name: continuous build on: [push, pull_request] jobs: macosx-x86_64: runs-on: macos-latest steps: - name: Checkout repository uses: actions/checkout@v2 - name: Compilation cache uses: actions/cache@v2 with: path: ~/.ccache # We include the commit sha in the cache key, as new cache entries are # only created if there is no existing entry for the key yet. key: ${{ runner.os }}-ccache-${{ github.sha }} # Restore any ccache cache entry, if none for # ${{ runner.os }}-ccache-${{ github.sha }} exists restore-keys: | ${{ runner.os }}-ccache - name: Setup python uses: actions/setup-python@v2 with: python-version: '3.8' architecture: x64 - name: Install Dependencies run: | brew install nasm automake ninja libtool cmake pkgconfig protobuf hdf5 zlib ccache ccache -M 500M # Limit the ccache size; Github's overall cache limit is 5GB python -m pip install -r ci/docker/install/requirements shell: bash - name: Build project run: | CMAKE_STATICBUILD=1 ./tools/staticbuild/build.sh cpu - name: Setup Python run: | python -m pip install --user -e python - name: Build with Cython run: | cd python python setup.py build_ext --inplace --with-cython - name: Test project env: MXNET_ENABLE_CYTHON: 1 run: | python3 -m pytest -n 4 --durations=50 --verbose tests/python/unittest/ -k 'not test_operator and not (test_subgraph or test_custom_op or test_external_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'not serial' MXNET_ENGINE_TYPE=NaiveEngine python3 -m pytest -n 4 --durations=50 --verbose tests/python/unittest/ -k 'test_operator and not (test_subgraph or test_custom_op or test_external_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'not serial' python3 -m pytest --durations=50 --verbose tests/python/unittest/ -k 'not (test_subgraph or test_custom_op or test_external_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'serial' - name: Test Array API env: MXNET_ENABLE_CYTHON: 1 run: | cd .. git clone https://github.com/data-apis/array-api-tests.git cd array-api-tests git checkout c1dba80a196a03f880d2e0a998a272fb3867b720 export ARRAY_API_TESTS_MODULE=mxnet.numpy pytest export DMLC_LOG_STACK_TRACE_DEPTH=100 python3 -m pytest --reruns 3 --durations=50 --verbose array_api_tests/test_creation_functions.py python3 -m pytest --reruns 3 --durations=50 --verbose array_api_tests/test_indexing.py python3 -m pytest --reruns 3 --durations=50 --verbose array_api_tests/test_constants.py python3 -m pytest --reruns 3 --durations=50 --verbose array_api_tests/test_elementwise_functions.py python3 -m pytest --reruns 3 --durations=50 --verbose array_api_tests/test_broadcasting.py python3 -m pytest --reruns 3 --durations=50 --verbose \ array_api_tests/test_type_promotion.py::test_elementwise_function_two_arg_bool_type_promotion python3 -m pytest --reruns 3 --durations=50 --verbose \ array_api_tests/test_type_promotion.py::test_elementwise_function_two_arg_promoted_type_promotion python3 -m pytest --reruns 3 --durations=50 --verbose \ array_api_tests/test_type_promotion.py::test_elementwise_function_one_arg_bool python3 -m pytest --reruns 3 --durations=50 --verbose \ array_api_tests/test_type_promotion.py::test_elementwise_function_one_arg_type_promotion python3 -m pytest --reruns 3 --durations=50 --verbose \ array_api_tests/test_type_promotion.py::test_operator_one_arg_type_promotion python3 -m pytest --reruns 3 --durations=50 --verbose \ array_api_tests/test_type_promotion.py::test_operator_two_arg_bool_promotion python3 -m pytest --reruns 3 --durations=50 --verbose \ array_api_tests/test_type_promotion.py::test_operator_two_arg_promoted_promotion python3 -m pytest --reruns 3 --durations=50 --verbose \ array_api_tests/test_type_promotion.py::test_operator_inplace_two_arg_promoted_promotion ================================================ FILE: .gitignore ================================================ # Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Dynamic libraries *.so *.dylib *.dll # Fortran module files *.mod # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app *~ # doc doc/html doc/latex doc/doc docs/web-data .jekyll-cache *.lock #dmlc config.mk config.cmake *.pyc .Rhistory *log Debug *suo tracker # vim *.swp *.swo *.swn .vimrc .ycm_extra_conf.py .ycm_extra_conf.pyc # Emacs .#* .clang_complete .dir-locals.el __pycache__ *.pkl *.params *.states *.json *.d cmake-build* data model recommonmark # R *.Rcheck *.rds *.Rproj .Rproj.user R-package/inst/* *.tar.gz *.tgz R-package/man/*.Rd R-package/R/mxnet_generated.R # data *.rec *.lst *.zip *ubyte *.bin *.txt !CMakeLists.txt # ipython notebook *_pb2.py *.ipynb_checkpoints* input.txt* # Jetbrain .idea .gradle *.iml # ctags tags # cscope cscope.out cscope.files # Eclipse project config .project .cproject .classpath .settings .pydevproject CMakeFiles cmake_install.cmake # Visual Studio Code .vscode # Mac OS X .DS_Store # Windows windows_package.7z windows_package #Notebook Automated Test !tests/nightly/test_tutorial_config.txt !tests/nightly/TestNotebook tests/nightly/tmp_notebook # pip building tools tools/pip_package/build tools/pip_package/dist tools/pip_package/mxnet.egg-info tools/pip_package/mxnet # temporary path for building dependencies when building wheel deps/ staticdeps/ tmp/ build/ lib/ bin/ model/ # VTune ./r0*hs # generated function signature for IDE auto-complete python/mxnet/symbol/gen_* python/mxnet/ndarray/gen_* python/.eggs # tests if built insource *CTestTestfile.cmake *DartConfiguration.tcl tests/Makefile tests/mxnet_unit_tests # Code coverage related .coverage *.gcov *.gcno coverage.xml # Local CMake build config cmake_options.yml # header file generated at compile time include/onednn/oneapi/dnnl/dnnl_version.h include/onednn/oneapi/dnnl/dnnl_config.h ================================================ FILE: .gitmodules ================================================ [submodule "3rdparty/dmlc-core"] path = 3rdparty/dmlc-core url = https://github.com/dmlc/dmlc-core.git [submodule "3rdparty/ps-lite"] path = 3rdparty/ps-lite url = https://github.com/dmlc/ps-lite [submodule "3rdparty/dlpack"] path = 3rdparty/dlpack url = https://github.com/dmlc/dlpack [submodule "3rdparty/googletest"] path = 3rdparty/googletest url = https://github.com/google/googletest.git [submodule "3rdparty/tvm"] path = 3rdparty/tvm url = https://github.com/apache/incubator-tvm.git [submodule "3rdparty/onnx-tensorrt"] path = 3rdparty/onnx-tensorrt url = https://github.com/onnx/onnx-tensorrt.git [submodule "3rdparty/nvidia_cub"] path = 3rdparty/nvidia_cub url = https://github.com/NVlabs/cub.git [submodule "3rdparty/libzip"] path = 3rdparty/libzip url = https://github.com/nih-at/libzip.git [submodule "3rdparty/intgemm"] path = 3rdparty/intgemm url = https://github.com/kpu/intgemm [submodule "3rdparty/onednn"] path = 3rdparty/onednn url = https://github.com/oneapi-src/oneDNN ================================================ FILE: .licenserc.yaml ================================================ header: license: spdx-id: Apache-2.0 copyright-owner: Apache Software Foundation paths-ignore: - 'licenses' - 'LICENSE' - 'NOTICE' - '3rdparty' - 'DISCLAIMER' - 'KEYS' - 'tools/dependencies/LICENSE.binary.dependencies' - 'tools/lint/git-clang-format-13' # files not distributed in source archive (listed in tools/source-exclude-artifacts.txt) - 'docs' - 'CODEOWNERS' - '.gitignore' - '.codecov.yml' - '.gitattributes' - '.github' - '.gitmodules' - '.licenserc.yaml' - '.asf.yaml' - 'CODEOWNERS' - 'python/mxnet/_cy3/README.md' - 'tools/dependencies/LICENSE.binary.dependencies' # files not distributed in source archive (listed in tools/source-exclude-artifacts.txt) - 'docs' # files licensed under apache-2.0 license but do not include full license headers recognized by skywalking-eyes - '**/*.ipynb' - 'src/operator/deformable_convolution-inl.h' - 'src/operator/deformable_convolution.cc' - 'src/operator/deformable_convolution.cu' - 'src/operator/contrib/deformable_psroi_pooling-inl.h' - 'src/operator/contrib/deformable_psroi_pooling.cc' - 'src/operator/contrib/deformable_psroi_pooling.cu' - 'src/operator/contrib/multi_proposal-inl.h' - 'src/operator/contrib/multi_proposal.cc' - 'src/operator/contrib/multi_proposal.cu' - 'src/operator/contrib/psroi_pooling.cc' - 'src/operator/contrib/psroi_pooling.cu' - 'src/operator/nn/dnnl/dnnl_base-inl.h' # files licensed under boost license - 'cmake/Modules/FindJeMalloc.cmake' # files licensed under bsd 2-clause + caffe - 'src/operator/nn/pool.cuh' - 'src/operator/nn/pool.h' - 'src/operator/nn/im2col.cuh' - 'src/operator/nn/im2col.h' - 'src/operator/contrib/nn/deformable_im2col.cuh' - 'src/operator/contrib/nn/deformable_im2col.h' - 'src/operator/contrib/nn/modulated_deformable_im2col.cuh' - 'src/operator/contrib/nn/modulated_deformable_im2col.h' # files licensed under bsd 3-clause - 'cmake/upstream/FindBLAS.cmake' - 'cmake/upstream/FindCUDAToolkit.cmake' - 'cmake/upstream/select_compute_arch.cmake' - 'python/mxnet/onnx/mx2onnx/_export_onnx.py' - 'python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py' - 'python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py' - 'src/operator/contrib/erfinv-inl.h' - 'src/operator/numpy/np_einsum_op-inl.h' - 'src/operator/numpy/np_einsum_op.cc' - 'src/operator/numpy/np_einsum_path_op-inl.h' # files licensed under mit license - 'src/operator/modulated_deformable_convolution-inl.h' - 'src/operator/modulated_deformable_convolution.cc' - 'src/operator/modulated_deformable_convolution.cu' - 'src/operator/nn/layer_norm_cpu.h' # symlinks - 'include/dlpack' # symlink to 3rdparty/dlpack/include/dlpack - 'include/dmlc' # symlink to 3rdparty/dmlc-core/include/dmlc - 'include/mshadow' # symlink to 3rdparty/mshadow/mshadow - 'include/onednn' # symlinks to 3rdparty/onednn - 'include/nnvm' # symlinks to 3rdparty/tvm/nnvm/include/nnvm # test/build data - 'tests/python/dnnl/data/test_dnnl_test_dnnl_model_model1.json' comment: on-failure ================================================ FILE: .mxnet_root ================================================ # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # This file marks the root directory of the Apache MXNet repository. ================================================ FILE: 3rdparty/ctc_include/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ---- Copyright 2015-2016, Baidu USA LLC. ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/LICENSE ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/ctaloadbalance.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "ctasearch.cuh" #include "loadstore.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // DeviceLoadBalancingSearch // Upper Bound search from A (needles) into B (haystack). The A values are // natural numbers from aBegin to aEnd. bFirst is the index of the B value at // bBegin in shared memory. template MGPU_DEVICE void DeviceSerialLoadBalanceSearch(const int* b_shared, int aBegin, int aEnd, int bFirst, int bBegin, int bEnd, int* a_shared) { int bKey = b_shared[bBegin]; #pragma unroll for(int i = 0; i < VT; ++i) { bool p; if(RangeCheck) p = (aBegin < aEnd) && ((bBegin >= bEnd) || (aBegin < bKey)); else p = aBegin < bKey; if(p) // Advance A (the needle). a_shared[aBegin++] = bFirst + bBegin; else // Advance B (the haystack). bKey = b_shared[++bBegin]; } } //////////////////////////////////////////////////////////////////////////////// // CTALoadBalance // Computes upper_bound(counting_iterator(first), b_global) - 1. // Unlike most other CTA* functions, CTALoadBalance loads from global memory. // This returns the loaded B elements at the beginning or end of shared memory // depending on the aFirst argument. // CTALoadBalance requires NT * VT + 2 slots of shared memory. template MGPU_DEVICE int4 CTALoadBalance(int destCount, InputIt b_global, int sourceCount, int block, int tid, const int* mp_global, int* indices_shared, bool loadPrecedingB) { int4 range = ComputeMergeRange(destCount, sourceCount, block, 0, NT * VT, mp_global); int a0 = range.x; int a1 = range.y; int b0 = range.z; int b1 = range.w; if(!b0) loadPrecedingB = false; // Load one trailing term from B. If we're already at the end, fill the // end of the buffer with destCount. int aCount = a1 - a0; int bCount = b1 - b0; int extended = b1 < sourceCount; int loadCount = bCount + extended; int fillCount = NT * VT + 1 - loadCount - aCount; int* a_shared = indices_shared; int* b_shared = indices_shared + aCount + (int)loadPrecedingB; // Load the B values. // DeviceMemToMemLoop(bCount + extended + (int)loadPrecedingB, // b_global + b0 - (int)loadPrecedingB, tid, // b_shared - (int)loadPrecedingB); for(int i = tid - (int)loadPrecedingB; i < bCount + extended; i += NT) b_shared[i] = b_global[b0 + i]; // Fill the end of the array with destCount. for(int i = tid + extended; i < fillCount; i += NT) b_shared[bCount + i] = destCount; __syncthreads(); // Run a merge path to find the start of the serial merge for each thread. int diag = VT * tid; int mp = MergePath(mgpu::counting_iterator(a0), aCount, b_shared, bCount, diag, mgpu::less()); int a0tid = a0 + mp; int b0tid = diag - mp; // Subtract 1 from b0 because we want to return upper_bound - 1. DeviceSerialLoadBalanceSearch(b_shared, a0tid, a1, b0 - 1, b0tid, bCount, a_shared - a0); __syncthreads(); b0 -= (int)loadPrecedingB; return make_int4(a0, a1, b0, b1); } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/ctamerge.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "ctasearch.cuh" #include "loadstore.cuh" #include "sortnetwork.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // SerialMerge template MGPU_DEVICE void SerialMerge(const T* keys_shared, int aBegin, int aEnd, int bBegin, int bEnd, T* results, int* indices, Comp comp) { T aKey = keys_shared[aBegin]; T bKey = keys_shared[bBegin]; #pragma unroll for(int i = 0; i < VT; ++i) { bool p; if(RangeCheck) p = (bBegin >= bEnd) || ((aBegin < aEnd) && !comp(bKey, aKey)); else p = !comp(bKey, aKey); results[i] = p ? aKey : bKey; indices[i] = p ? aBegin : bBegin - !RangeCheck; if(p) aKey = keys_shared[++aBegin]; else bKey = keys_shared[++bBegin]; } __syncthreads(); } //////////////////////////////////////////////////////////////////////////////// // FindMergeFrame and FindMergesortInterval help mergesort (both CTA and global // merge pass levels) locate lists within the single source array. // Returns (offset of a, offset of b, length of list). MGPU_HOST_DEVICE int3 FindMergesortFrame(int coop, int block, int nv) { // coop is the number of CTAs or threads cooperating to merge two lists into // one. We round block down to the first CTA's ID that is working on this // merge. int start = ~(coop - 1) & block; int size = nv * (coop>> 1); return make_int3(nv * start, nv * start + size, size); } // Returns (a0, a1, b0, b1) into mergesort input lists between mp0 and mp1. MGPU_HOST_DEVICE int4 FindMergesortInterval(int3 frame, int coop, int block, int nv, int count, int mp0, int mp1) { // Locate diag from the start of the A sublist. int diag = nv * block - frame.x; int a0 = frame.x + mp0; int a1 = min(count, frame.x + mp1); int b0 = min(count, frame.y + diag - mp0); int b1 = min(count, frame.y + diag + nv - mp1); // The end partition of the last block for each merge operation is computed // and stored as the begin partition for the subsequent merge. i.e. it is // the same partition but in the wrong coordinate system, so its 0 when it // should be listSize. Correct that by checking if this is the last block // in this merge operation. if(coop - 1 == ((coop - 1) & block)) { a1 = min(count, frame.x + frame.z); b1 = min(count, frame.y + frame.z); } return make_int4(a0, a1, b0, b1); } //////////////////////////////////////////////////////////////////////////////// // ComputeMergeRange MGPU_HOST_DEVICE int4 ComputeMergeRange(int aCount, int bCount, int block, int coop, int NV, const int* mp_global) { // Load the merge paths computed by the partitioning kernel. int mp0 = mp_global[block]; int mp1 = mp_global[block + 1]; int gid = NV * block; // Compute the ranges of the sources in global memory. int4 range; if(coop) { int3 frame = FindMergesortFrame(coop, block, NV); range = FindMergesortInterval(frame, coop, block, NV, aCount, mp0, mp1); } else { range.x = mp0; // a0 range.y = mp1; // a1 range.z = gid - range.x; // b0 range.w = min(aCount + bCount, gid + NV) - range.y; // b1 } return range; } //////////////////////////////////////////////////////////////////////////////// // CTA mergesort support template MGPU_DEVICE void CTABlocksortPass(T* keys_shared, int tid, int count, int coop, T* keys, int* indices, Comp comp) { int list = ~(coop - 1) & tid; int diag = min(count, VT * ((coop - 1) & tid)); int start = VT * list; int a0 = min(count, start); int b0 = min(count, start + VT * (coop / 2)); int b1 = min(count, start + VT * coop); int p = MergePath(keys_shared + a0, b0 - a0, keys_shared + b0, b1 - b0, diag, comp); SerialMerge(keys_shared, a0 + p, b0, b0 + diag - p, b1, keys, indices, comp); } template MGPU_DEVICE void CTABlocksortLoop(ValType threadValues[VT], KeyType* keys_shared, ValType* values_shared, int tid, int count, Comp comp) { #pragma unroll for(int coop = 2; coop <= NT; coop *= 2) { int indices[VT]; KeyType keys[VT]; CTABlocksortPass(keys_shared, tid, count, coop, keys, indices, comp); if(HasValues) { // Exchange the values through shared memory. DeviceThreadToShared(threadValues, tid, values_shared); DeviceGather(NT * VT, values_shared, indices, tid, threadValues); } // Store results in shared memory in sorted order. DeviceThreadToShared(keys, tid, keys_shared); } } //////////////////////////////////////////////////////////////////////////////// // CTAMergesort // Caller provides the keys in shared memory. This functions sorts the first // count elements. template MGPU_DEVICE void CTAMergesort(KeyType threadKeys[VT], ValType threadValues[VT], KeyType* keys_shared, ValType* values_shared, int count, int tid, Comp comp) { // Stable sort the keys in the thread. if(VT * tid < count) { if(Stable) OddEvenTransposeSort(threadKeys, threadValues, comp); else OddEvenMergesort(threadKeys, threadValues, comp); } // Store the locally sorted keys into shared memory. DeviceThreadToShared(threadKeys, tid, keys_shared); // Recursively merge lists until the entire CTA is sorted. CTABlocksortLoop(threadValues, keys_shared, values_shared, tid, count, comp); } template MGPU_DEVICE void CTAMergesortKeys(KeyType threadKeys[VT], KeyType* keys_shared, int count, int tid, Comp comp) { int valuesTemp[VT]; CTAMergesort(threadKeys, valuesTemp, keys_shared, (int*)keys_shared, count, tid, comp); } template MGPU_DEVICE void CTAMergesortPairs(KeyType threadKeys[VT], ValType threadValues[VT], KeyType* keys_shared, ValType* values_shared, int count, int tid, Comp comp) { CTAMergesort(threadKeys, threadValues, keys_shared, values_shared, count, tid, comp); } //////////////////////////////////////////////////////////////////////////////// // DeviceMergeKeysIndices template MGPU_DEVICE void DeviceMergeKeysIndices(It1 a_global, int aCount, It2 b_global, int bCount, int4 range, int tid, T* keys_shared, T* results, int* indices, Comp comp) { int a0 = range.x; int a1 = range.y; int b0 = range.z; int b1 = range.w; if(LoadExtended) { bool extended = (a1 < aCount) && (b1 < bCount); aCount = a1 - a0; bCount = b1 - b0; int aCount2 = aCount + (int)extended; int bCount2 = bCount + (int)extended; // Load one element past the end of each input to avoid having to use // range checking in the merge loop. DeviceLoad2ToShared(a_global + a0, aCount2, b_global + b0, bCount2, tid, keys_shared); // Run a Merge Path search for each thread's starting point. int diag = VT * tid; int mp = MergePath(keys_shared, aCount, keys_shared + aCount2, bCount, diag, comp); // Compute the ranges of the sources in shared memory. int a0tid = mp; int b0tid = aCount2 + diag - mp; if(extended) { SerialMerge(keys_shared, a0tid, 0, b0tid, 0, results, indices, comp); } else { int a1tid = aCount; int b1tid = aCount2 + bCount; SerialMerge(keys_shared, a0tid, a1tid, b0tid, b1tid, results, indices, comp); } } else { // Use the input intervals from the ranges between the merge path // intersections. aCount = a1 - a0; bCount = b1 - b0; // Load the data into shared memory. DeviceLoad2ToShared(a_global + a0, aCount, b_global + b0, bCount, tid, keys_shared); // Run a merge path to find the start of the serial merge for each // thread. int diag = VT * tid; int mp = MergePath(keys_shared, aCount, keys_shared + aCount, bCount, diag, comp); // Compute the ranges of the sources in shared memory. int a0tid = mp; int a1tid = aCount; int b0tid = aCount + diag - mp; int b1tid = aCount + bCount; // Serial merge into register. SerialMerge(keys_shared, a0tid, a1tid, b0tid, b1tid, results, indices, comp); } } //////////////////////////////////////////////////////////////////////////////// // DeviceMerge // Merge pairs from global memory into global memory. Useful factorization to // enable calling from merge, mergesort, and locality sort. template MGPU_DEVICE void DeviceMerge(KeysIt1 aKeys_global, ValsIt1 aVals_global, int aCount, KeysIt2 bKeys_global, ValsIt2 bVals_global, int bCount, int tid, int block, int4 range, KeyType* keys_shared, int* indices_shared, KeysIt3 keys_global, ValsIt3 vals_global, Comp comp) { KeyType results[VT]; int indices[VT]; DeviceMergeKeysIndices(aKeys_global, aCount, bKeys_global, bCount, range, tid, keys_shared, results, indices, comp); // Store merge results back to shared memory. DeviceThreadToShared(results, tid, keys_shared); // Store merged keys to global memory. aCount = range.y - range.x; bCount = range.w - range.z; DeviceSharedToGlobal(aCount + bCount, keys_shared, tid, keys_global + NT * VT * block); // Copy the values. if(HasValues) { DeviceThreadToShared(indices, tid, indices_shared); DeviceTransferMergeValuesShared(aCount + bCount, aVals_global + range.x, bVals_global + range.z, aCount, indices_shared, tid, vals_global + NT * VT * block); } } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/ctascan.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "../mgpuenums.h" #include "deviceutil.cuh" #include "intrinsics.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // CTAReduce template > struct CTAReduce { typedef typename Op::first_argument_type T; enum { Size = NT, Capacity = NT }; struct Storage { T shared[Capacity]; }; MGPU_DEVICE static T Reduce(int tid, T x, Storage& storage, Op op = Op()) { storage.shared[tid] = x; __syncthreads(); // Fold the data in half with each pass. #pragma unroll for(int destCount = NT / 2; destCount >= 1; destCount /= 2) { if(tid < destCount) { // Read from the right half and store to the left half. x = op(x, storage.shared[destCount + tid]); storage.shared[tid] = x; } __syncthreads(); } T total = storage.shared[0]; __syncthreads(); return total; } }; #if __CUDA_ARCH__ >= 300 template struct CTAReduce > { typedef mgpu::plus Op; typedef int T; enum { Size = NT, Capacity = WARP_SIZE }; struct Storage { int shared[Capacity]; }; MGPU_DEVICE static int Reduce(int tid, int x, Storage& storage, Op op = Op()) { const int NumSections = WARP_SIZE; const int SecSize = NT / NumSections; int lane = (SecSize - 1) & tid; int sec = tid / SecSize; // In the first phase, threads cooperatively find the reduction within // their segment. The segments are SecSize threads (NT / WARP_SIZE) // wide. #pragma unroll for(int offset = 1; offset < SecSize; offset *= 2) x = shfl_add(x, offset, SecSize); // The last thread in each segment stores the local reduction to shared // memory. if(SecSize - 1 == lane) storage.shared[sec] = x; __syncthreads(); // Reduce the totals of each input segment. The spine is WARP_SIZE // threads wide. if(tid < NumSections) { x = storage.shared[tid]; #pragma unroll for(int offset = 1; offset < NumSections; offset *= 2) x = shfl_add(x, offset, NumSections); storage.shared[tid] = x; } __syncthreads(); int reduction = storage.shared[NumSections - 1]; __syncthreads(); return reduction; } }; template struct CTAReduce > { typedef mgpu::maximum Op; enum { Size = NT, Capacity = WARP_SIZE }; struct Storage { int shared[Capacity]; }; MGPU_DEVICE static int Reduce(int tid, int x, Storage& storage, Op op = Op()) { const int NumSections = WARP_SIZE; const int SecSize = NT / NumSections; int lane = (SecSize - 1) & tid; int sec = tid / SecSize; #pragma unroll for(int offset = 1; offset < SecSize; offset *= 2) x = shfl_max(x, offset, SecSize); if(SecSize - 1 == lane) storage.shared[sec] = x; __syncthreads(); if(tid < NumSections) { x = storage.shared[tid]; #pragma unroll for(int offset = 1; offset < NumSections; offset *= 2) x = shfl_max(x, offset, NumSections); storage.shared[tid] = x; } __syncthreads(); int reduction = storage.shared[NumSections - 1]; __syncthreads(); return reduction; } }; #endif // __CUDA_ARCH__ >= 300 //////////////////////////////////////////////////////////////////////////////// // CTAScan template > struct CTAScan { typedef typename Op::result_type T; enum { Size = NT, Capacity = 2 * NT + 1 }; struct Storage { T shared[Capacity]; }; MGPU_DEVICE static T Scan(int tid, T x, Storage& storage, T* total, MgpuScanType type = MgpuScanTypeExc, T identity = (T)0, Op op = Op()) { storage.shared[tid] = x; int first = 0; __syncthreads(); #pragma unroll for(int offset = 1; offset < NT; offset += offset) { if(tid >= offset) x = op(storage.shared[first + tid - offset], x); first = NT - first; storage.shared[first + tid] = x; __syncthreads(); } *total = storage.shared[first + NT - 1]; if(MgpuScanTypeExc == type) x = tid ? storage.shared[first + tid - 1] : identity; __syncthreads(); return x; } MGPU_DEVICE static T Scan(int tid, T x, Storage& storage) { T total; return Scan(tid, x, storage, &total, MgpuScanTypeExc, (T)0, Op()); } }; //////////////////////////////////////////////////////////////////////////////// // Special partial specialization for CTAScan on Kepler. // This uses the shfl intrinsic to reduce scan latency. #if __CUDA_ARCH__ >= 300 template struct CTAScan > { typedef mgpu::plus Op; enum { Size = NT, NumSegments = WARP_SIZE, SegSize = NT / NumSegments }; enum { Capacity = NumSegments + 1 }; struct Storage { int shared[Capacity + 1]; }; MGPU_DEVICE static int Scan(int tid, int x, Storage& storage, int* total, MgpuScanType type = MgpuScanTypeExc, int identity = 0, Op op = Op()) { // Define WARP_SIZE segments that are NT / WARP_SIZE large. // Each warp makes log(SegSize) shfl_add calls. // The spine makes log(WARP_SIZE) shfl_add calls. int lane = (SegSize - 1) & tid; int segment = tid / SegSize; // Scan each segment using shfl_add. int scan = x; #pragma unroll for(int offset = 1; offset < SegSize; offset *= 2) scan = shfl_add(scan, offset, SegSize); // Store the reduction (last element) of each segment into storage. if(SegSize - 1 == lane) storage.shared[segment] = scan; __syncthreads(); // Warp 0 does a full shfl warp scan on the partials. The total is // stored to shared[NumSegments]. (NumSegments = WARP_SIZE) if(tid < NumSegments) { int y = storage.shared[tid]; int scan = y; #pragma unroll for(int offset = 1; offset < NumSegments; offset *= 2) scan = shfl_add(scan, offset, NumSegments); storage.shared[tid] = scan - y; if(NumSegments - 1 == tid) storage.shared[NumSegments] = scan; } __syncthreads(); // Add the scanned partials back in and convert to exclusive scan. scan += storage.shared[segment]; if(MgpuScanTypeExc == type) { scan -= x; if(identity && !tid) scan = identity; } *total = storage.shared[NumSegments]; __syncthreads(); return scan; } MGPU_DEVICE static int Scan(int tid, int x, Storage& storage) { int total; return Scan(tid, x, storage, &total, MgpuScanTypeExc, 0); } }; #endif // __CUDA_ARCH__ >= 300 //////////////////////////////////////////////////////////////////////////////// // CTABinaryScan template MGPU_DEVICE int CTABinaryScan(int tid, bool x, int* shared, int* total) { const int NumWarps = NT / WARP_SIZE; int warp = tid / WARP_SIZE; int lane = (WARP_SIZE - 1); // Store the bit totals for each warp. uint bits = __ballot(x); shared[warp] = popc(bits); __syncthreads(); #if __CUDA_ARCH__ >= 300 if(tid < NumWarps) { int x = shared[tid]; int scan = x; #pragma unroll for(int offset = 1; offset < NumWarps; offset *= 2) scan = shfl_add(scan, offset, NumWarps); shared[tid] = scan - x; } __syncthreads(); #else // Thread 0 scans warp totals. if(!tid) { int scan = 0; #pragma unroll for(int i = 0; i < NumWarps; ++i) { int y = shared[i]; shared[i] = scan; scan += y; } shared[NumWarps] = scan; } __syncthreads(); #endif // __CUDA_ARCH__ >= 300 // Add the warp scan back into the partials. int scan = shared[warp] + __popc(bfe(bits, 0, lane)); *total = shared[NumWarps]; __syncthreads(); return scan; } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/ctasearch.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "deviceutil.cuh" #include "../mgpudevice.cuh" namespace mgpu { template MGPU_HOST_DEVICE void BinarySearchIt(It data, int& begin, int& end, T key, int shift, Comp comp) { IntT scale = (1<< shift) - 1; int mid = (int)((begin + scale * end)>> shift); T key2 = data[mid]; bool pred = (MgpuBoundsUpper == Bounds) ? !comp(key, key2) : comp(key2, key); if(pred) begin = mid + 1; else end = mid; } template MGPU_HOST_DEVICE int BiasedBinarySearch(It data, int count, T key, int levels, Comp comp) { int begin = 0; int end = count; if(levels >= 4 && begin < end) BinarySearchIt(data, begin, end, key, 9, comp); if(levels >= 3 && begin < end) BinarySearchIt(data, begin, end, key, 7, comp); if(levels >= 2 && begin < end) BinarySearchIt(data, begin, end, key, 5, comp); if(levels >= 1 && begin < end) BinarySearchIt(data, begin, end, key, 4, comp); while(begin < end) BinarySearchIt(data, begin, end, key, 1, comp); return begin; } template MGPU_HOST_DEVICE int BinarySearch(It data, int count, T key, Comp comp) { int begin = 0; int end = count; while(begin < end) BinarySearchIt(data, begin, end, key, 1, comp); return begin; } //////////////////////////////////////////////////////////////////////////////// // MergePath search template MGPU_HOST_DEVICE int MergePath(It1 a, int aCount, It2 b, int bCount, int diag, Comp comp) { typedef typename std::iterator_traits::value_type T; int begin = max(0, diag - bCount); int end = min(diag, aCount); while(begin < end) { int mid = (begin + end)>> 1; T aKey = a[mid]; T bKey = b[diag - 1 - mid]; bool pred = (MgpuBoundsUpper == Bounds) ? comp(aKey, bKey) : !comp(bKey, aKey); if(pred) begin = mid + 1; else end = mid; } return begin; } //////////////////////////////////////////////////////////////////////////////// // SegmentedMergePath search template MGPU_HOST_DEVICE int SegmentedMergePath(InputIt keys, int aOffset, int aCount, int bOffset, int bCount, int leftEnd, int rightStart, int diag, Comp comp) { // leftEnd and rightStart are defined from the origin, and diag is defined // from aOffset. // We only need to run a Merge Path search if the diagonal intersects the // segment that strides the left and right halves (i.e. is between leftEnd // and rightStart). if(aOffset + diag <= leftEnd) return diag; if(aOffset + diag >= rightStart) return aCount; bCount = min(bCount, rightStart - bOffset); int begin = max(max(leftEnd - aOffset, 0), diag - bCount); int end = min(diag, aCount); while(begin < end) { int mid = (begin + end)>> 1; int ai = aOffset + mid; int bi = bOffset + diag - 1 - mid; bool pred = !comp(keys[bi], keys[ai]); if(pred) begin = mid + 1; else end = mid; } return begin; } //////////////////////////////////////////////////////////////////////////////// // BalancedPath search template MGPU_HOST_DEVICE int2 BalancedPath(InputIt1 a, int aCount, InputIt2 b, int bCount, int diag, int levels, Comp comp) { typedef typename std::iterator_traits::value_type T; int p = MergePath(a, aCount, b, bCount, diag, comp); int aIndex = p; int bIndex = diag - p; bool star = false; if(bIndex < bCount) { if(Duplicates) { T x = b[bIndex]; // Search for the beginning of the duplicate run in both A and B. // Because int aStart = BiasedBinarySearch(a, aIndex, x, levels, comp); int bStart = BiasedBinarySearch(b, bIndex, x, levels, comp); // The distance between the merge path and the lower_bound is the // 'run'. We add up the a- and b- runs and evenly distribute them to // get a stairstep path. int aRun = aIndex - aStart; int bRun = bIndex - bStart; int xCount = aRun + bRun; // Attempt to advance b and regress a. int bAdvance = max(xCount>> 1, bRun); int bEnd = min(bCount, bStart + bAdvance + 1); int bRunEnd = BinarySearch(b + bIndex, bEnd - bIndex, x, comp) + bIndex; bRun = bRunEnd - bStart; bAdvance = min(bAdvance, bRun); int aAdvance = xCount - bAdvance; bool roundUp = (aAdvance == bAdvance + 1) && (bAdvance < bRun); aIndex = aStart + aAdvance; if(roundUp) star = true; } else { if(aIndex && aCount) { T aKey = a[aIndex - 1]; T bKey = b[bIndex]; // If the last consumed element in A (aIndex - 1) is the same as // the next element in B (bIndex), we're sitting at a starred // partition. if(!comp(aKey, bKey)) star = true; } } } return make_int2(aIndex, star); } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/ctasegreduce.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "ctasegscan.cuh" #include "ctasearch.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // Segmented reduce utility functions. // Extract the upper-bound indices from the coded ranges. Decrement to include // the first addressed row/segment. struct SegReduceRange { int begin; int end; int total; bool flushLast; }; MGPU_DEVICE SegReduceRange DeviceShiftRange(int limit0, int limit1) { SegReduceRange range; range.begin = 0x7fffffff & limit0; range.end = 0x7fffffff & limit1; range.total = range.end - range.begin; range.flushLast = 0 == (0x80000000 & limit1); range.end += !range.flushLast; return range; } // Reconstitute row/segment indices from a starting row index and packed end // flags. Used for pre-processed versions of interval reduce and interval Spmv. template MGPU_DEVICE void DeviceExpandFlagsToRows(int first, int endFlags, int rows[VT + 1]) { rows[0] = first; #pragma unroll for(int i = 0; i < VT; ++i) { if((1<< i) & endFlags) ++first; rows[i + 1] = first; } } //////////////////////////////////////////////////////////////////////////////// // After loading CSR terms into shared memory, each thread binary searches // (upper-bound) to find its starting point. Each thread then walks forward, // emitting the csr0-relative row indices to register. template MGPU_DEVICE int DeviceExpandCsrRows(int tidOffset, int* csr_shared, int numRows, int end, int rows[VT + 1], int rowStarts[VT]) { // Each thread binary searches for its starting row. int row = BinarySearch(csr_shared, numRows, tidOffset, mgpu::less()) - 1; // Each thread starts at row and scans forward, emitting row IDs into // register. Store the CTA-local row index (starts at 0) to rows and the // start of the row (globally) to rowStarts. int curOffset = csr_shared[row]; int nextOffset = (row + 1 < numRows) ? csr_shared[row + 1] : end; rows[0] = row; rowStarts[0] = curOffset; int endFlags = 0; #pragma unroll for(int i = 1; i <= VT; ++i) { // Advance the row cursor when the iterator hits the next row offset. if(tidOffset + i == nextOffset) { // Set an end flag when the cursor advances to the next row. endFlags |= 1<< (i - 1); // Advance the cursor and load the next row offset. ++row; curOffset = nextOffset; nextOffset = (row + 1 < numRows) ? csr_shared[row + 1] : end; } rows[i] = row; if(i < VT) rowStarts[i] = curOffset; } __syncthreads(); return endFlags; } //////////////////////////////////////////////////////////////////////////////// // DeviceSegReducePrepare // Expand non-empty interval of CSR elements into row indices. Compute end-flags // by comparing adjacent row IDs. // DeviceSegReducePrepare may be called either by a pre-processing kernel or by // the kernel that actually evaluates the segmented reduction if no preprocesing // is desired. struct SegReduceTerms { int endFlags; int tidDelta; }; template MGPU_DEVICE SegReduceTerms DeviceSegReducePrepare(int* csr_shared, int numRows, int tid, int gid, bool flushLast, int rows[VT + 1], int rowStarts[VT]) { // Pass a sentinel (end) to point to the next segment start. If we flush, // this is the end of this tile. Otherwise it is INT_MAX int endFlags = DeviceExpandCsrRows(gid + VT * tid, csr_shared, numRows, flushLast ? (gid + NT * VT) : INT_MAX, rows, rowStarts); // Find the distance to to scan to compute carry-in for each thread. Use the // existance of an end flag anywhere in the thread to determine if carry-out // values from the left should propagate through to the right. int tidDelta = DeviceFindSegScanDelta(tid, rows[0] != rows[VT], csr_shared); SegReduceTerms terms = { endFlags, tidDelta }; return terms; } //////////////////////////////////////////////////////////////////////////////// // CTASegReduce // Core segmented reduction code. Supports fast-path and slow-path for intra-CTA // segmented reduction. Stores partials to global memory. // Callers feed CTASegReduce::ReduceToGlobal values in thread order. template struct CTASegReduce { typedef CTASegScan SegScan; enum { NV = NT * VT, Capacity = HalfCapacity ? (NV / 2) : NV }; union Storage { typename SegScan::Storage segScanStorage; T values[Capacity]; }; template MGPU_DEVICE static void ReduceToGlobal(const int rows[VT + 1], int total, int tidDelta, int startRow, int block, int tid, T data[VT], DestIt dest_global, T* carryOut_global, T identity, Op op, Storage& storage) { // Run a segmented scan within the thread. T x, localScan[VT]; #pragma unroll for(int i = 0; i < VT; ++i) { x = i ? op(x, data[i]) : data[i]; localScan[i] = x; if(rows[i] != rows[i + 1]) x = identity; } // Run a parallel segmented scan over the carry-out values to compute // carry-in. T carryOut; T carryIn = SegScan::SegScanDelta(tid, tidDelta, x, storage.segScanStorage, &carryOut, identity, op); // Store the carry-out for the entire CTA to global memory. if(!tid) carryOut_global[block] = carryOut; dest_global += startRow; if(HalfCapacity && total > Capacity) { // Add carry-in to each thread-local scan value. Store directly // to global. #pragma unroll for(int i = 0; i < VT; ++i) { // Add the carry-in to the local scan. T x2 = op(carryIn, localScan[i]); // Store on the end flag and clear the carry-in. if(rows[i] != rows[i + 1]) { carryIn = identity; dest_global[rows[i]] = x2; } } } else { // All partials fit in shared memory. Add carry-in to each thread- // local scan value. #pragma unroll for(int i = 0; i < VT; ++i) { // Add the carry-in to the local scan. T x2 = op(carryIn, localScan[i]); // Store reduction when the segment changes and clear the // carry-in. if(rows[i] != rows[i + 1]) { storage.values[rows[i]] = x2; carryIn = identity; } } __syncthreads(); // Cooperatively store reductions to global memory. for(int index = tid; index < total; index += NT) dest_global[index] = storage.values[index]; __syncthreads(); } } }; } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/ctasegscan.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "ctascan.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // DeviceFindSegScanDelta // Runs an inclusive max-index scan over binary inputs. template MGPU_DEVICE int DeviceFindSegScanDelta(int tid, bool flag, int* delta_shared) { const int NumWarps = NT / 32; int warp = tid / 32; int lane = 31 & tid; uint warpMask = 0xffffffff>> (31 - lane); // inclusive search uint ctaMask = 0x7fffffff>> (31 - lane); // exclusive search uint warpBits = __ballot(flag); delta_shared[warp] = warpBits; __syncthreads(); if(tid < NumWarps) { uint ctaBits = __ballot(0 != delta_shared[tid]); int warpSegment = 31 - clz(ctaMask & ctaBits); int start = (-1 != warpSegment) ? (31 - clz(delta_shared[warpSegment]) + 32 * warpSegment) : 0; delta_shared[NumWarps + tid] = start; } __syncthreads(); // Find the closest flag to the left of this thread within the warp. // Include the flag for this thread. int start = 31 - clz(warpMask & warpBits); if(-1 != start) start += ~31 & tid; else start = delta_shared[NumWarps + warp]; __syncthreads(); return tid - start; } //////////////////////////////////////////////////////////////////////////////// // CTASegScan template > struct CTASegScan { typedef _Op Op; typedef typename Op::result_type T; enum { NumWarps = NT / 32, Size = NT, Capacity = 2 * NT }; union Storage { int delta[NumWarps]; T values[Capacity]; }; // Each thread passes the reduction of the LAST SEGMENT that it covers. // flag is set to true if there's at least one segment flag in the thread. // SegScan returns the reduction of values for the first segment in this // thread over the preceding threads. // Return the value init for the first thread. // When scanning single elements per thread, interpret the flag as a BEGIN // FLAG. If tid's flag is set, its value belongs to thread tid + 1, not // thread tid. // The function returns the reduction of the last segment in the CTA. MGPU_DEVICE static T SegScanDelta(int tid, int tidDelta, T x, Storage& storage, T* carryOut, T identity = (T)0, Op op = Op()) { // Run an inclusive scan int first = 0; storage.values[first + tid] = x; __syncthreads(); #pragma unroll for(int offset = 1; offset < NT; offset += offset) { if(tidDelta >= offset) x = op(storage.values[first + tid - offset], x); first = NT - first; storage.values[first + tid] = x; __syncthreads(); } // Get the exclusive scan. x = tid ? storage.values[first + tid - 1] : identity; *carryOut = storage.values[first + NT - 1]; __syncthreads(); return x; } MGPU_DEVICE static T SegScan(int tid, T x, bool flag, Storage& storage, T* carryOut, T identity = (T)0, Op op = Op()) { // Find the left-most thread that covers the first segment of this // thread. int tidDelta = DeviceFindSegScanDelta(tid, flag, storage.delta); return SegScanDelta(tid, tidDelta, x, storage, carryOut, identity, op); } }; } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/ctasegsort.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "ctascan.cuh" #include "ctasearch.cuh" #include "loadstore.cuh" #include "sortnetwork.cuh" namespace mgpu { template MGPU_DEVICE void SegmentedSerialMerge(const T* keys_shared, int aBegin, int aEnd, int bBegin, int bEnd, T results[VT], int indices[VT], int leftEnd, int rightStart, Comp comp, bool sync = true) { bEnd = min(rightStart, bEnd); T aKey = keys_shared[aBegin]; T bKey = keys_shared[bBegin]; #pragma unroll for(int i = 0; i < VT; ++i) { bool p; // If A has run out of inputs, emit B. if(aBegin >= aEnd) p = false; else if(bBegin >= bEnd || aBegin < leftEnd) // B has hit the end of the middle segment. // Emit A if A has inputs remaining in the middle segment. p = true; else // Emit the smaller element in the middle segment. p = !comp(bKey, aKey); results[i] = p ? aKey : bKey; indices[i] = p ? aBegin : bBegin; if(p) aKey = keys_shared[++aBegin]; else bKey = keys_shared[++bBegin]; } if(sync) { __syncthreads(); } } //////////////////////////////////////////////////////////////////////////////// // CTASegsortPass template MGPU_DEVICE void CTASegsortPass(T* keys_shared, int* ranges_shared, int tid, int pass, T results[VT], int indices[VT], int2& activeRange, Comp comp) { // Locate the intervals of the input lists. int3 frame = FindMergesortFrame(2<< pass, tid, VT); int a0 = frame.x; int b0 = frame.y; int listLen = frame.z; int list = tid>> pass; int listParity = 1 & list; int diag = VT * tid - frame.x; // Fetch the active range for the list this thread's list is merging with. int siblingRange = ranges_shared[1 ^ list]; int siblingStart = 0x0000ffff & siblingRange; int siblingEnd = siblingRange>> 16; // Create a new active range for the merge. int leftEnd = listParity ? siblingEnd : activeRange.y; int rightStart = listParity ? activeRange.x : siblingStart; activeRange.x = min(activeRange.x, siblingStart); activeRange.y = max(activeRange.y, siblingEnd); int p = SegmentedMergePath(keys_shared, a0, listLen, b0, listLen, leftEnd, rightStart, diag, comp); int a0tid = a0 + p; int b0tid = b0 + diag - p; SegmentedSerialMerge(keys_shared, a0tid, b0, b0tid, b0 + listLen, results, indices, leftEnd, rightStart, comp); // Store the ranges to shared memory. if(0 == diag) ranges_shared[list>> 1] = (int)bfi(activeRange.y, activeRange.x, 16, 16); } //////////////////////////////////////////////////////////////////////////////// // CTASegsortLoop template MGPU_DEVICE int2 CTASegsortLoop(KeyType threadKeys[VT], ValType threadValues[VT], KeyType* keys_shared, ValType* values_shared, int* ranges_shared, int tid, int2 activeRange, Comp comp) { const int NumPasses = sLogPow2::value; #pragma unroll for(int pass = 0; pass < NumPasses; ++pass) { int indices[VT]; CTASegsortPass(keys_shared, ranges_shared, tid, pass, threadKeys, indices, activeRange, comp); if(HasValues) { // Exchange values through shared memory. DeviceThreadToShared(threadValues, tid, values_shared); DeviceGather(NT * VT, values_shared, indices, tid, threadValues); } // Store results in shared memory in sorted order. DeviceThreadToShared(threadKeys, tid, keys_shared); } return activeRange; } //////////////////////////////////////////////////////////////////////////////// // CTASegsort // Pass keys and values in register. On return, values are returned in register // and keys returned in shared memory. template MGPU_DEVICE int2 CTASegsort(KeyType threadKeys[VT], ValType threadValues[VT], int tid, int headFlags, KeyType* keys_shared, ValType* values_shared, int* ranges_shared, Comp comp) { if(Stable) // Odd-even transpose sort. OddEvenTransposeSortFlags(threadKeys, threadValues, headFlags, comp); else // Batcher's odd-even mergesort. OddEvenMergesortFlags(threadKeys, threadValues, headFlags, comp); // Record the first and last occurrence of head flags in this segment. int blockEnd = 31 - clz(headFlags); if(-1 != blockEnd) blockEnd += VT * tid; int blockStart = ffs(headFlags); blockStart = blockStart ? (VT * tid - 1 + blockStart) : (NT * VT); ranges_shared[tid] = (int)bfi(blockEnd, blockStart, 16, 16); // Store back to shared mem. The values are in VT-length sorted lists. // These are merged recursively. DeviceThreadToShared(threadKeys, tid, keys_shared); int2 activeRange = CTASegsortLoop(threadKeys, threadValues, keys_shared, values_shared, ranges_shared, tid, make_int2(blockStart, blockEnd), comp); return activeRange; } template MGPU_DEVICE int2 CTASegsortKeys(KeyType threadKeys[VT], int tid, int headFlags, KeyType* keys_shared, int* ranges_shared, Comp comp) { int valuesTemp[VT]; return CTASegsort(threadKeys, valuesTemp, tid, headFlags, keys_shared, (int*)keys_shared, ranges_shared, comp); } template MGPU_DEVICE int2 CTASegsortPairs(KeyType threadKeys[VT], ValType threadValues[VT], int tid, int headFlags, KeyType* keys_shared, ValType* values_shared, int* ranges_shared, Comp comp) { return CTASegsort(threadKeys, threadValues, tid, headFlags, keys_shared, values_shared, ranges_shared, comp); } //////////////////////////////////////////////////////////////////////////////// // DeviceSegBlocksort // Load keys and values from global memory, sort in shared memory, and store // back to global memory. Store the left-most and right-most encountered // headflag locations to ranges_global to prepare for the next pass. // This function is factored out of the blocksort kernel to allow easier // customization of that kernel - we have two implementations currently: // sort over indices and sort over bitfield. template MGPU_DEVICE void DeviceSegBlocksort(InputIt1 keys_global, InputIt2 values_global, int count2, KeyType* keys_shared, ValType* values_shared, int* ranges_shared, int headFlags, int tid, int block, OutputIt1 keysDest_global, OutputIt2 valsDest_global, int* ranges_global, Comp comp) { // Load keys into register in thread order. int gid = NT * VT * block; KeyType threadKeys[VT]; DeviceGlobalToShared(count2, keys_global + gid, tid, keys_shared); DeviceSharedToThread(keys_shared, tid, threadKeys); // Load the values from global memory and into register in thread order. ValType threadValues[VT]; if(HasValues) { DeviceGlobalToShared(count2, values_global + gid, tid, values_shared); DeviceSharedToThread(values_shared, tid, threadValues); } // Run the CTA segmented blocksort. int2 activeRange = CTASegsort(threadKeys, threadValues, tid, headFlags, keys_shared, values_shared, ranges_shared, comp); // Store the keys to global memory. DeviceSharedToGlobal(count2, keys_shared, tid, keysDest_global + gid); if(HasValues) { // Store the values to global memory.xk b DeviceThreadToShared(threadValues, tid, values_shared); DeviceSharedToGlobal(count2, values_shared, tid, valsDest_global + gid, false); } // Store the 16-bit packed ranges. These are used by all merge kernels and // the first level of global segmented merge path partitioning. if(!tid) ranges_global[block] = bfi(activeRange.y, activeRange.x, 16, 16); } //////////////////////////////////////////////////////////////////////////////// // DeviceIndicesToHeadFlags // Load indices from an array and cooperatively turn into a head flag bitfield // for each thread. template MGPU_DEVICE int DeviceIndicesToHeadFlags(const int* indices_global, const int* partitions_global, int tid, int block, int count2, int* words_shared, byte* flags_shared) { const int FlagWordsPerThread = MGPU_DIV_UP(VT, 4); int gid = NT * VT * block; int p0 = partitions_global[block]; int p1 = partitions_global[block + 1]; int headFlags = 0; if(p1 > p0 || count2 < NT * VT) { // Clear the flag bytes, then loop through the indices and poke in flag // values. #pragma unroll for(int i = 0; i < FlagWordsPerThread; ++i) words_shared[NT * i + tid] = 0; __syncthreads(); for(int index = p0 + tid; index < p1; index += NT) { int headFlag = indices_global[index]; flags_shared[headFlag - gid] = 1; } __syncthreads(); // Combine all the head flags for this thread. int first = VT * tid; int offset = first / 4; int prev = words_shared[offset]; int mask = 0x3210 + 0x1111 * (3 & first); #pragma unroll for(int i = 0; i < FlagWordsPerThread; ++i) { // Gather the next four flags. int next = words_shared[offset + 1 + i]; int x = prmt(prev, next, mask); prev = next; // Set the head flag bits. if(0x00000001 & x) headFlags |= 1<< (4 * i); if(0x00000100 & x) headFlags |= 1<< (4 * i + 1); if(0x00010000 & x) headFlags |= 1<< (4 * i + 2); if(0x01000000 & x) headFlags |= 1<< (4 * i + 3); } __syncthreads(); // Set head flags for out-of-range keys. int outOfRange = min(VT, first + VT - count2); if(outOfRange > 0) headFlags = bfi(0xffffffff, headFlags, VT - outOfRange, outOfRange); // Clear head flags above VT. headFlags &= (1<< VT) - 1; } return headFlags; } //////////////////////////////////////////////////////////////////////////////// // SegSortSupport struct SegSortSupport { int* ranges_global; int2* ranges2_global; int4* mergeList_global; int* copyList_global; int2* queueCounters_global; int2* nextCounters_global; byte* copyStatus_global; }; //////////////////////////////////////////////////////////////////////////////// // DeviceSegSortMerge template MGPU_DEVICE void DeviceSegSortMerge(const KeyType* keys_global, const ValueType* values_global, int2 segmentRange, int tid, int block, int4 range, int pass, KeyType* keys_shared, int* indices_shared, KeyType* keysDest_global, ValueType* valsDest_global, Comp comp) { const int NV = NT * VT; int gid = NV * block; // Load the local compressed segment indices. int a0 = range.x; int aCount = range.y - range.x; int b0 = range.z; int bCount = range.w - range.z; DeviceLoad2ToShared(keys_global + a0, aCount, keys_global + b0, bCount, tid, keys_shared); //////////////////////////////////////////////////////////////////////////// // Run a merge path to find the starting point for each thread to merge. // If the entire warp fits into the already-sorted segments, we can skip // sorting it and leave its keys in shared memory. Doing this on the warp // level rather than thread level (also legal) gives slightly better // performance. int segStart = segmentRange.x; int segEnd = segmentRange.y; int listParity = 1 & (block>> pass); int warpOffset = VT * (~31 & tid); bool sortWarp = listParity ? // The spliced segment is to the left (segStart). (warpOffset < segStart) : // The spliced segment is to the right (segEnd). (warpOffset + 32 * VT > segEnd); KeyType threadKeys[VT]; int indices[VT]; if(sortWarp) { int diag = VT * tid; int mp = SegmentedMergePath(keys_shared, 0, aCount, aCount, bCount, listParity ? 0 : segEnd, listParity ? segStart : NV, diag, comp); int a0tid = mp; int a1tid = aCount; int b0tid = aCount + diag - mp; int b1tid = aCount + bCount; // Serial merge into register. All threads in the CTA so we hoist the // check for list parity outside the function call to simplify the // logic. Unlike in the blocksort, this does not cause warp divergence. SegmentedSerialMerge(keys_shared, a0tid, a1tid, b0tid, b1tid, threadKeys, indices, listParity ? 0 : segEnd, listParity ? segStart : NV, comp, false); } __syncthreads(); // Store sorted data in register back to shared memory. Then copy to global. if(sortWarp) DeviceThreadToShared(threadKeys, tid, keys_shared, false); __syncthreads(); DeviceSharedToGlobal(aCount + bCount, keys_shared, tid, keysDest_global + gid); //////////////////////////////////////////////////////////////////////////// // Use the merge indices to gather values from global memory. Store directly // to valsDest_global. if(HasValues) { // Transpose the gather indices to help coalesce loads. if(sortWarp) DeviceThreadToShared(indices, tid, indices_shared, false); else { #pragma unroll for(int i = 0; i < VT; ++i) indices_shared[VT * tid + i] = VT * tid + i; } __syncthreads(); DeviceTransferMergeValuesShared(aCount + bCount, values_global + a0, values_global + b0, aCount, indices_shared, tid, valsDest_global + NV * block); } } //////////////////////////////////////////////////////////////////////////////// // DeviceSegSortCopy template MGPU_DEVICE void DeviceSegSortCopy(const KeyType* keys_global, const ValueType* values_global, int tid, int block, int count, KeyType* keysDest_global, ValueType* valsDest_global) { int gid = NT * VT * block; int count2 = min(NT * VT, count - gid); DeviceGlobalToGlobal(count2, keys_global + gid, tid, keysDest_global + gid); if(HasValues) DeviceGlobalToGlobal(count2, values_global + gid, tid, valsDest_global + gid); } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/ctasortedsearch.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "../mgpudevice.cuh" #include "ctasearch.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // DeviceSerialSearch template MGPU_DEVICE int3 DeviceSerialSearch(const T* keys_shared, int aBegin, int aEnd, int bBegin, int bEnd, int aOffset, int bOffset, int* indices, Comp comp) { const int FlagA = IndexA ? 0x80000000 : 1; const int FlagB = IndexB ? 0x80000000 : 1; T aKey = keys_shared[aBegin]; T bKey = keys_shared[bBegin]; T aPrev, bPrev; if(aBegin > 0) aPrev = keys_shared[aBegin - 1]; if(bBegin > 0) bPrev = keys_shared[bBegin - 1]; int decisions = 0; int matchCountA = 0; int matchCountB = 0; #pragma unroll for(int i = 0; i < VT; ++i) { bool p; if(RangeCheck && aBegin >= aEnd) p = false; else if(RangeCheck && bBegin >= bEnd) p = true; else p = (MgpuBoundsUpper == Bounds) ? comp(aKey, bKey) : !comp(bKey, aKey); if(p) { // aKey is smaller than bKey, so it is inserted before bKey. // Save bKey's index (bBegin + first) as the result of the search // and advance to the next needle in A. bool match = false; if(MatchA) { // Test if there is an element in B that matches aKey. if(MgpuBoundsUpper == Bounds) { // Upper Bound: We're inserting aKey after bKey. If there // is a match for aKey it must be bPrev. Check that bPrev // is in range and equal to aKey. // The predicate test result !comp(aKey, bPrev) was // established on the previous A-advancing iteration (it // failed the comp(aKey, bKey) test to get us to this // point). Check the other half of the equality condition // with a second comparison. bool inRange = !RangeCheck || (bBegin > aEnd); match = inRange && !comp(bPrev, aKey); } else { // Lower Bound: We're inserting aKey before bKey. If there // is a match for aKey, it must be bKey. Check that bKey // is in range and equal to aKey. // The predicate test !comp(bKey, aKey) has established one // half of the equality condition. We establish the other // half with a second comparison. bool inRange = !RangeCheck || (bBegin < bEnd); match = inRange && !comp(aKey, bKey); } } int index = 0; if(IndexA) index = bOffset + bBegin; if(match) index |= FlagA; if(IndexA || MatchA) indices[i] = index; matchCountA += match; // Mark the decision bit to indicate that this iteration has // progressed A (the needles). decisions |= 1<< i; aPrev = aKey; aKey = keys_shared[++aBegin]; } else { // aKey is larger than bKey, so it is inserted after bKey (but we // don't know where yet). Advance the B index to the next element in // the haystack to continue the search for the current needle. bool match = false; if(MatchB) { if(MgpuBoundsUpper == Bounds) { // Upper Bound: aKey is not smaller than bKey. We advance to // the next haystack element in B. If there is a match in A // for bKey it must be aKey. By entering this branch we've // verified that !comp(aKey, bKey). Making the reciprocal // comparison !comp(bKey, aKey) establishes aKey == bKey. bool inRange = !RangeCheck || ((bBegin < bEnd) && (aBegin < aEnd)); match = inRange && !comp(bKey, aKey); } else { // Lower Bound: bKey is smaller than aKey. We advance to the // next element in B. If there is a match for bKey, it must // be aPrev. The previous A-advancing iteration proved that // !comp(bKey, aPrev). We test !comp(aPrev, bKey) for the // other half of the equality condition. bool inRange = !RangeCheck || ((bBegin < bEnd) && (aBegin > 0)); match = inRange && !comp(aPrev, bKey); } } int index = 0; if(IndexB) index = aOffset + aBegin; if(match) index |= FlagB; if(IndexB || MatchB) indices[i] = index; matchCountB += match; // Keep the decision bit cleared to indicate that this iteration // has progressed B (the haystack). bPrev = bKey; bKey = keys_shared[++bBegin]; } } return make_int3(decisions, matchCountA, matchCountB); } //////////////////////////////////////////////////////////////////////////////// // CTASortedSearch // Take keys in shared memory and return indices and b-match flags in shared // memory. // NOTE: This function doesn't do any strided-to-thread order transposes so // using an even number of values per thread will incur no additional bank // conflicts. template MGPU_DEVICE int2 CTASortedSearch(T* keys_shared, int aStart, int aCount, int aEnd, int a0, int bStart, int bCount, int bEnd, int b0, bool extended, int tid, int* indices_shared, Comp comp) { // Run a merge path to find the start of the serial search for each thread. int diag = VT * tid; int mp = MergePath(keys_shared + aStart, aCount, keys_shared + bStart, bCount, diag, comp); int a0tid = mp; int b0tid = diag - mp; // Serial search into register. int3 results; int indices[VT]; if(extended) results = DeviceSerialSearch(keys_shared, a0tid + aStart, aEnd, b0tid + bStart, bEnd, a0 - aStart, b0 - bStart, indices, comp); else results = DeviceSerialSearch(keys_shared, a0tid + aStart, aEnd, b0tid + bStart, bEnd, a0 - aStart, b0 - bStart, indices, comp); __syncthreads(); // Compact the indices into shared memory. Use the decision bits (set is A, // cleared is B) to select the destination. int decisions = results.x; b0tid += aCount; #pragma unroll for(int i = 0; i < VT; ++i) { if((1<< i) & decisions) { if(IndexA || MatchA) indices_shared[a0tid++] = indices[i]; } else { if(IndexB || MatchB) indices_shared[b0tid++] = indices[i]; } } __syncthreads(); // Return the match counts for A and B keys. return make_int2(results.y, results.z); } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/devicetypes.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #if __CUDA_ARCH__ == 100 #error "COMPUTE CAPABILITY 1.0 NOT SUPPORTED BY MPGU. TRY 2.0!" #endif #include #include "../util/static.h" #ifdef _MSC_VER #define INLINESYMBOL __forceinline__ #else #define INLINESYMBOL inline #endif namespace mgpu { #define MGPU_HOST __host__ INLINESYMBOL #define MGPU_DEVICE __device__ INLINESYMBOL #define MGPU_HOST_DEVICE __host__ __device__ INLINESYMBOL const int WARP_SIZE = 32; const int LOG_WARP_SIZE = 5; //////////////////////////////////////////////////////////////////////////////// // Device-side comparison operators template struct less : public std::binary_function { MGPU_HOST_DEVICE bool operator()(T a, T b) { return a < b; } }; template struct less_equal : public std::binary_function { MGPU_HOST_DEVICE bool operator()(T a, T b) { return a <= b; } }; template struct greater : public std::binary_function { MGPU_HOST_DEVICE bool operator()(T a, T b) { return a > b; } }; template struct greater_equal : public std::binary_function { MGPU_HOST_DEVICE bool operator()(T a, T b) { return a >= b; } }; template struct equal_to : public std::binary_function { MGPU_HOST_DEVICE bool operator()(T a, T b) { return a == b; } }; template struct not_equal_to : public std::binary_function { MGPU_HOST_DEVICE bool operator()(T a, T b) { return a != b; } }; //////////////////////////////////////////////////////////////////////////////// // Device-side arithmetic operators template struct plus : public std::binary_function { MGPU_HOST_DEVICE T operator()(T a, T b) { return a + b; } }; template struct minus : public std::binary_function { MGPU_HOST_DEVICE T operator()(T a, T b) { return a - b; } }; template struct multiplies : public std::binary_function { MGPU_HOST_DEVICE T operator()(T a, T b) { return a * b; } }; template struct modulus : public std::binary_function { MGPU_HOST_DEVICE T operator()(T a, T b) { return a % b; } }; template struct bit_or : public std::binary_function { MGPU_HOST_DEVICE T operator()(T a, T b) { return a | b; } }; template struct bit_and : public std::binary_function { MGPU_HOST_DEVICE T operator()(T a, T b) { return a & b; } }; template struct bit_xor : public std::binary_function { MGPU_HOST_DEVICE T operator()(T a, T b) { return a ^ b; } }; template struct maximum : public std::binary_function { MGPU_HOST_DEVICE T operator()(T a, T b) { return max(a, b); } }; template struct minimum : public std::binary_function { MGPU_HOST_DEVICE T operator()(T a, T b) { return min(a, b); } }; //////////////////////////////////////////////////////////////////////////////// template MGPU_HOST_DEVICE void swap(T& a, T& b) { T c = a; a = b; b = c; } template struct DevicePair { T x, y; }; template MGPU_HOST_DEVICE DevicePair MakeDevicePair(T x, T y) { DevicePair p = { x, y }; return p; } template struct numeric_limits; template<> struct numeric_limits { MGPU_HOST_DEVICE static int min() { return INT_MIN; } MGPU_HOST_DEVICE static int max() { return INT_MAX; } MGPU_HOST_DEVICE static int lowest() { return INT_MIN; } MGPU_HOST_DEVICE static int AddIdent() { return 0; } MGPU_HOST_DEVICE static int MulIdent() { return 1; } }; template<> struct numeric_limits { MGPU_HOST_DEVICE static long long min() { return LLONG_MIN; } MGPU_HOST_DEVICE static long long max() { return LLONG_MAX; } MGPU_HOST_DEVICE static long long lowest() { return LLONG_MIN; } MGPU_HOST_DEVICE static long long AddIdent() { return 0; } MGPU_HOST_DEVICE static long long MulIdent() { return 1; } }; template<> struct numeric_limits { MGPU_HOST_DEVICE static uint min() { return 0; } MGPU_HOST_DEVICE static uint max() { return UINT_MAX; } MGPU_HOST_DEVICE static uint lowest() { return 0; } MGPU_HOST_DEVICE static uint AddIdent() { return 0; } MGPU_HOST_DEVICE static uint MulIdent() { return 1; } }; template<> struct numeric_limits { MGPU_HOST_DEVICE static unsigned long long min() { return 0; } MGPU_HOST_DEVICE static unsigned long long max() { return ULLONG_MAX; } MGPU_HOST_DEVICE static unsigned long long lowest() { return 0; } MGPU_HOST_DEVICE static unsigned long long AddIdent() { return 0; } MGPU_HOST_DEVICE static unsigned long long MulIdent() { return 1; } }; template<> struct numeric_limits { MGPU_HOST_DEVICE static float min() { return FLT_MIN; } MGPU_HOST_DEVICE static float max() { return FLT_MAX; } MGPU_HOST_DEVICE static float lowest() { return -FLT_MAX; } MGPU_HOST_DEVICE static float AddIdent() { return 0; } MGPU_HOST_DEVICE static float MulIdent() { return 1; } }; template<> struct numeric_limits { MGPU_HOST_DEVICE static double min() { return DBL_MIN; } MGPU_HOST_DEVICE static double max() { return DBL_MAX; } MGPU_HOST_DEVICE static double lowest() { return -DBL_MAX; } MGPU_HOST_DEVICE static double AddIdent() { return 0; } MGPU_HOST_DEVICE static double MulIdent() { return 1; } }; MGPU_HOST_DEVICE int2 operator+(int2 a, int2 b) { return make_int2(a.x + b.x, a.y + b.y); } MGPU_HOST_DEVICE int2& operator+=(int2& a, int2 b) { a = a + b; return a; } MGPU_HOST_DEVICE int2 operator*(int2 a, int2 b) { return make_int2(a.x * b.x, a.y * b.y); } MGPU_HOST_DEVICE int2& operator*=(int2& a, int2 b) { a = a * b; return a; } template MGPU_HOST_DEVICE T max(T a, T b) { #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ < 100) return std::max(a, b); #else return (a < b) ? b : a; #endif } template MGPU_HOST_DEVICE T min(T a, T b) { #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ < 100) return std::min(a, b); #else return (b < a) ? b : a; #endif } MGPU_HOST_DEVICE int2 max(int2 a, int2 b) { return make_int2(max(a.x, b.x), max(a.y, b.y)); } MGPU_HOST_DEVICE int2 min(int2 a, int2 b) { return make_int2(min(a.x, b.x), min(a.y, b.y)); } template<> struct numeric_limits { MGPU_HOST_DEVICE static int2 min() { return make_int2(INT_MIN, INT_MIN); } MGPU_HOST_DEVICE static int2 max() { return make_int2(INT_MAX, INT_MAX); } MGPU_HOST_DEVICE static int2 lowest() { return make_int2(INT_MIN, INT_MIN); } MGPU_HOST_DEVICE static int2 AddIdent() { return make_int2(0, 0); } MGPU_HOST_DEVICE static int2 MulIdent() { return make_int2(1, 1); } }; template class constant_iterator : public std::iterator_traits { public: MGPU_HOST_DEVICE constant_iterator(T value) : _value(value) { } MGPU_HOST_DEVICE T operator[](ptrdiff_t i) const { return _value; } MGPU_HOST_DEVICE T operator*() const { return _value; } MGPU_HOST_DEVICE constant_iterator operator+(ptrdiff_t diff) const { return constant_iterator(_value); } MGPU_HOST_DEVICE constant_iterator operator-(ptrdiff_t diff) const { return constant_iterator(_value); } MGPU_HOST_DEVICE constant_iterator& operator+=(ptrdiff_t diff) { return *this; } MGPU_HOST_DEVICE constant_iterator& operator-=(ptrdiff_t diff) { return *this; } private: T _value; }; template class counting_iterator : public std::iterator_traits { public: MGPU_HOST_DEVICE counting_iterator(T value) : _value(value) { } MGPU_HOST_DEVICE T operator[](ptrdiff_t i) { return _value + i; } MGPU_HOST_DEVICE T operator*() { return _value; } MGPU_HOST_DEVICE counting_iterator operator+(ptrdiff_t diff) { return counting_iterator(_value + diff); } MGPU_HOST_DEVICE counting_iterator operator-(ptrdiff_t diff) { return counting_iterator(_value - diff); } MGPU_HOST_DEVICE counting_iterator& operator+=(ptrdiff_t diff) { _value += diff; return *this; } MGPU_HOST_DEVICE counting_iterator& operator-=(ptrdiff_t diff) { _value -= diff; return *this; } private: T _value; }; template class step_iterator : public std::iterator_traits { public: MGPU_HOST_DEVICE step_iterator(T base, T step) : _base(base), _step(step), _offset(0) { } MGPU_HOST_DEVICE T operator[](ptrdiff_t i) { return _base + (_offset + i) * _step; } MGPU_HOST_DEVICE T operator*() { return _base + _offset * _step; } MGPU_HOST_DEVICE step_iterator operator+(ptrdiff_t diff) { step_iterator it = *this; it._offset += diff; return it; } MGPU_HOST_DEVICE step_iterator operator-(ptrdiff_t diff) { step_iterator it = *this; it._offset -= diff; return it; } MGPU_HOST_DEVICE step_iterator& operator+=(ptrdiff_t diff) { _offset += diff; return *this; } MGPU_HOST_DEVICE step_iterator& operator-=(ptrdiff_t diff) { _offset -= diff; return *this; } private: ptrdiff_t _offset; T _base, _step; }; } // namespace mgpu template MGPU_HOST_DEVICE mgpu::counting_iterator operator+(ptrdiff_t diff, mgpu::counting_iterator it) { return it + diff; } template MGPU_HOST_DEVICE mgpu::counting_iterator operator-(ptrdiff_t diff, mgpu::counting_iterator it) { return it + (-diff); } template MGPU_HOST_DEVICE mgpu::step_iterator operator+(ptrdiff_t diff, mgpu::step_iterator it) { return it + diff; } template MGPU_HOST_DEVICE mgpu::step_iterator operator-(ptrdiff_t diff, mgpu::step_iterator it) { return it + (-diff); } ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/deviceutil.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "intrinsics.cuh" namespace mgpu { // Get the difference between two pointers in bytes. MGPU_HOST_DEVICE ptrdiff_t PtrDiff(const void* a, const void* b) { return (const byte*)b - (const byte*)a; } // Offset a pointer by i bytes. template MGPU_HOST_DEVICE const T* PtrOffset(const T* p, ptrdiff_t i) { return (const T*)((const byte*)p + i); } template MGPU_HOST_DEVICE T* PtrOffset(T* p, ptrdiff_t i) { return (T*)((byte*)p + i); } //////////////////////////////////////////////////////////////////////////////// // Task range support // Evenly distributes variable-length arrays over a fixed number of CTAs. MGPU_HOST int2 DivideTaskRange(int numItems, int numWorkers) { div_t d = div(numItems, numWorkers); return make_int2(d.quot, d.rem); } MGPU_HOST_DEVICE int2 ComputeTaskRange(int block, int2 task) { int2 range; range.x = task.x * block; range.x += min(block, task.y); range.y = range.x + task.x + (block < task.y); return range; } MGPU_HOST_DEVICE int2 ComputeTaskRange(int block, int2 task, int blockSize, int count) { int2 range = ComputeTaskRange(block, task); range.x *= blockSize; range.y = min(count, range.y * blockSize); return range; } //////////////////////////////////////////////////////////////////////////////// // DeviceExtractHeadFlags // Input array flags is a bit array with 32 head flags per word. // ExtractThreadHeadFlags returns numBits flags starting at bit index. MGPU_HOST_DEVICE uint DeviceExtractHeadFlags(const uint* flags, int index, int numBits) { int index2 = index>> 5; int shift = 31 & index; uint headFlags = flags[index2]>> shift; int shifted = 32 - shift; if(shifted < numBits) // We also need to shift in the next set of bits. headFlags = bfi(flags[index2 + 1], headFlags, shifted, shift); headFlags &= (1<< numBits) - 1; return headFlags; } //////////////////////////////////////////////////////////////////////////////// // DevicePackHeadFlags // Pack VT bits per thread at 32 bits/thread. Will consume an integer number of // words, because CTA size is a multiple of 32. The first NT * VT / 32 threads // return packed words. template MGPU_DEVICE uint DevicePackHeadFlags(uint threadBits, int tid, uint* flags_shared) { const int WordCount = NT * VT / 32; // Each thread stores its thread bits to flags_shared[tid]. flags_shared[tid] = threadBits; __syncthreads(); uint packed = 0; if(tid < WordCount) { const int Items = MGPU_DIV_UP(32, VT); int index = 32 * tid; int first = index / VT; int bit = 0; int rem = index - VT * first; packed = flags_shared[first]>> rem; bit = VT - rem; ++first; #pragma unroll for(int i = 0; i < Items; ++i) { if(i < Items - 1 || bit < 32) { uint x = flags_shared[first + i]; if(bit < 32) packed |= x<< bit; bit += VT; } } } __syncthreads(); return packed; } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/intrinsics.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #include "devicetypes.cuh" #pragma once #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstrict-aliasing" namespace mgpu { MGPU_HOST_DEVICE uint2 ulonglong_as_uint2(uint64 x) { return *reinterpret_cast(&x); } MGPU_HOST_DEVICE uint64 uint2_as_ulonglong(uint2 x) { return *reinterpret_cast(&x); } MGPU_HOST_DEVICE int2 longlong_as_int2(int64 x) { return *reinterpret_cast(&x); } MGPU_HOST_DEVICE int64 int2_as_longlong(int2 x) { return *reinterpret_cast(&x); } MGPU_HOST_DEVICE int2 double_as_int2(double x) { return *reinterpret_cast(&x); } MGPU_HOST_DEVICE double int2_as_double(int2 x) { return *reinterpret_cast(&x); } MGPU_HOST_DEVICE void SetDoubleX(double& d, int x) { reinterpret_cast(&d)[0] = x; } MGPU_HOST_DEVICE int GetDoubleX(double d) { return double_as_int2(d).x; } MGPU_HOST_DEVICE void SetDoubleY(double& d, int y) { reinterpret_cast(&d)[1] = y; } MGPU_HOST_DEVICE int GetDoubleY(double d) { return double_as_int2(d).y; } //////////////////////////////////////////////////////////////////////////////// // PTX for bfe and bfi #if __CUDA_ARCH__ >= 200 MGPU_DEVICE uint bfe_ptx(uint x, uint bit, uint numBits) { uint result; asm("bfe.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(bit), "r"(numBits)); return result; } MGPU_DEVICE uint bfi_ptx(uint x, uint y, uint bit, uint numBits) { uint result; asm("bfi.b32 %0, %1, %2, %3, %4;" : "=r"(result) : "r"(x), "r"(y), "r"(bit), "r"(numBits)); return result; } MGPU_DEVICE uint prmt_ptx(uint a, uint b, uint index) { uint ret; asm("prmt.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(a), "r"(b), "r"(index)); return ret; } #endif // __CUDA_ARCH__ >= 200 #if CUDA_VERSION >= 9000 //////////////////////////////////////////////////////////////////////////////// // shfl_add MGPU_DEVICE int shfl_add(int x, int offset, int width = WARP_SIZE, unsigned int threadmask = 0xFFFFFFFF) { int result = 0; #if __CUDA_ARCH__ >= 300 int mask = (WARP_SIZE - width)<< 8; asm( "{.reg .s32 r0;" ".reg .pred p;" "shfl.sync.up.b32 r0|p, %1, %2, %3, %4;" "@p add.s32 r0, r0, %5;" "mov.s32 %0, r0; }" : "=r"(result) : "r"(x), "r"(offset), "r"(mask), "r"(threadmask), "r"(x)); #endif return result; } MGPU_DEVICE int shfl_max(int x, int offset, int width = WARP_SIZE, unsigned int threadmask = 0xFFFFFFFF) { int result = 0; #if __CUDA_ARCH__ >= 300 int mask = (WARP_SIZE - width)<< 8; asm( "{.reg .s32 r0;" ".reg .pred p;" "shfl.sync.up.b32 r0|p, %1, %2, %3, %4;" "@p max.s32 r0, r0, %5;" "mov.s32 %0, r0; }" : "=r"(result) : "r"(x), "r"(offset), "r"(mask), "r"(threadmask), "r"(x)); #endif return result; } #else //////////////////////////////////////////////////////////////////////////////// // shfl_add MGPU_DEVICE int shfl_add(int x, int offset, int width = WARP_SIZE) { int result = 0; #if __CUDA_ARCH__ >= 300 int mask = (WARP_SIZE - width)<< 8; asm( "{.reg .s32 r0;" ".reg .pred p;" "shfl.up.b32 r0|p, %1, %2, %3;" "@p add.s32 r0, r0, %4;" "mov.s32 %0, r0; }" : "=r"(result) : "r"(x), "r"(offset), "r"(mask), "r"(x)); #endif return result; } MGPU_DEVICE int shfl_max(int x, int offset, int width = WARP_SIZE) { int result = 0; #if __CUDA_ARCH__ >= 300 int mask = (WARP_SIZE - width)<< 8; asm( "{.reg .s32 r0;" ".reg .pred p;" "shfl.up.b32 r0|p, %1, %2, %3;" "@p max.s32 r0, r0, %4;" "mov.s32 %0, r0; }" : "=r"(result) : "r"(x), "r"(offset), "r"(mask), "r"(x)); #endif return result; } #endif //////////////////////////////////////////////////////////////////////////////// // brev, popc, clz, bfe, bfi, prmt // Reverse the bits in an integer. MGPU_HOST_DEVICE uint brev(uint x) { #if __CUDA_ARCH__ >= 200 uint y = __brev(x); #else uint y = 0; for(int i = 0; i < 32; ++i) y |= (1 & (x>> i))<< (31 - i); #endif return y; } // Count number of bits in a register. MGPU_HOST_DEVICE int popc(uint x) { #if __CUDA_ARCH__ >= 200 return __popc(x); #else int c; for(c = 0; x; ++c) x &= x - 1; return c; #endif } // Count leading zeros - start from most significant bit. MGPU_HOST_DEVICE int clz(int x) { #if __CUDA_ARCH__ >= 200 return __clz(x); #else for(int i = 31; i >= 0; --i) if((1<< i) & x) return 31 - i; return 32; #endif } // Find first set - start from least significant bit. LSB is 1. ffs(0) is 0. MGPU_HOST_DEVICE int ffs(int x) { #if __CUDA_ARCH__ >= 200 return __ffs(x); #else for(int i = 0; i < 32; ++i) if((1<< i) & x) return i + 1; return 0; #endif } MGPU_HOST_DEVICE uint bfe(uint x, uint bit, uint numBits) { #if __CUDA_ARCH__ >= 200 return bfe_ptx(x, bit, numBits); #else return ((1<< numBits) - 1) & (x>> bit); #endif } MGPU_HOST_DEVICE uint bfi(uint x, uint y, uint bit, uint numBits) { uint result; #if __CUDA_ARCH__ >= 200 result = bfi_ptx(x, y, bit, numBits); #else if(bit + numBits > 32) numBits = 32 - bit; uint mask = ((1<< numBits) - 1)<< bit; result = y & ~mask; result |= mask & (x<< bit); #endif return result; } MGPU_HOST_DEVICE uint prmt(uint a, uint b, uint index) { uint result; #if __CUDA_ARCH__ >= 200 result = prmt_ptx(a, b, index); #else result = 0; for(int i = 0; i < 4; ++i) { uint sel = 0xf & (index>> (4 * i)); uint x = ((7 & sel) > 3) ? b : a; x = 0xff & (x>> (8 * (3 & sel))); if(8 & sel) x = (128 & x) ? 0xff : 0; result |= x<< (8 * i); } #endif return result; } // Find log2(x) and optionally round up to the next integer logarithm. MGPU_HOST_DEVICE int FindLog2(int x, bool roundUp = false) { int a = 31 - clz(x); if(roundUp) a += !MGPU_IS_POW_2(x); return a; } //////////////////////////////////////////////////////////////////////////////// // vset4 #if __CUDA_ARCH__ >= 300 // Performs four byte-wise comparisons and returns 1 for each byte that // satisfies the conditional, and zero otherwise. MGPU_DEVICE uint vset4_lt_add_ptx(uint a, uint b, uint c) { uint result; asm("vset4.u32.u32.lt.add %0, %1, %2, %3;" : "=r"(result) : "r"(a), "r"(b), "r"(c)); return result; } MGPU_DEVICE uint vset4_eq_ptx(uint a, uint b) { uint result; asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(result) : "r"(a), "r"(b), "r"(0)); return result; } #endif // __CUDA_ARCH__ >= 300 MGPU_HOST_DEVICE uint vset4_lt_add(uint a, uint b, uint c) { uint result; #if __CUDA_ARCH__ >= 300 result = vset4_lt_add_ptx(a, b, c); #else result = c; if((0x000000ff & a) < (0x000000ff & b)) result += 0x00000001; if((0x0000ff00 & a) < (0x0000ff00 & b)) result += 0x00000100; if((0x00ff0000 & a) < (0x00ff0000 & b)) result += 0x00010000; if((0xff000000 & a) < (0xff000000 & b)) result += 0x01000000; #endif return result; } MGPU_HOST_DEVICE uint vset4_eq(uint a, uint b) { uint result; #if __CUDA_ARCH__ >= 300 result = vset4_eq_ptx(a, b); #else result = 0; if((0x000000ff & a) == (0x000000ff & b)) result = 0x00000001; if((0x0000ff00 & a) == (0x0000ff00 & b)) result += 0x00000100; if((0x00ff0000 & a) == (0x00ff0000 & b)) result += 0x00010000; if((0xff000000 & a) == (0xff000000 & b)) result += 0x01000000; #endif return result; } //////////////////////////////////////////////////////////////////////////////// // MGPU_HOST_DEVICE uint umulhi(uint x, uint y) { #if __CUDA_ARCH__ >= 100 return __umulhi(x, y); #else uint64 product = (uint64)x * y; return (uint)(product>> 32); #endif } //////////////////////////////////////////////////////////////////////////////// // ldg() function defined for all devices and all types. Only compiles to __ldg // intrinsic for __CUDA_ARCH__ >= 320 && __CUDA_ARCH__ < 400 for types supported // by __ldg in sm_32_intrinsics.h template struct IsLdgType { enum { value = false }; }; #define DEFINE_LDG_TYPE(T) \ template<> struct IsLdgType { enum { value = true }; }; template::value> struct LdgShim { MGPU_DEVICE static T Ldg(const T* p) { return *p; } }; #if __CUDA_ARCH__ >= 320 && __CUDA_ARCH__ < 400 // List of __ldg-compatible types from sm_32_intrinsics.h. DEFINE_LDG_TYPE(char) DEFINE_LDG_TYPE(short) DEFINE_LDG_TYPE(int) DEFINE_LDG_TYPE(long long) DEFINE_LDG_TYPE(char2) DEFINE_LDG_TYPE(char4) DEFINE_LDG_TYPE(short2) DEFINE_LDG_TYPE(short4) DEFINE_LDG_TYPE(int2) DEFINE_LDG_TYPE(int4) DEFINE_LDG_TYPE(longlong2) DEFINE_LDG_TYPE(unsigned char) DEFINE_LDG_TYPE(unsigned short) DEFINE_LDG_TYPE(unsigned int) DEFINE_LDG_TYPE(unsigned long long) DEFINE_LDG_TYPE(uchar2) DEFINE_LDG_TYPE(uchar4) DEFINE_LDG_TYPE(ushort2) DEFINE_LDG_TYPE(ushort4) DEFINE_LDG_TYPE(uint2) DEFINE_LDG_TYPE(uint4) DEFINE_LDG_TYPE(ulonglong2) DEFINE_LDG_TYPE(float) DEFINE_LDG_TYPE(double) DEFINE_LDG_TYPE(float2) DEFINE_LDG_TYPE(float4) DEFINE_LDG_TYPE(double2) template struct LdgShim { MGPU_DEVICE static T Ldg(const T* p) { return __ldg(p); } }; #endif template MGPU_DEVICE T ldg(const T* p) { return LdgShim::Ldg(p); } //////////////////////////////////////////////////////////////////////////////// // Fast division for 31-bit integers. // Uses the method in Hacker's Delight (2nd edition) page 228. // Evaluates for denom > 1 and x < 2^31. struct FastDivide { uint denom; uint coef; uint shift; MGPU_HOST_DEVICE uint Divide(uint x) { return umulhi(x, coef)>> shift; } MGPU_HOST_DEVICE uint Modulus(uint x) { return x - Divide(x) * denom; } explicit FastDivide(uint denom_) { denom = denom_; uint p = 31 + FindLog2(denom, true); coef = (uint)(((1ull<< p) + denom - 1) / denom); shift = p - 32; } }; #pragma GCC diagnostic pop } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/loadstore.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "../mgpudevice.cuh" #include "deviceutil.cuh" #include "intrinsics.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // Cooperative load functions. template MGPU_DEVICE void DeviceSharedToReg(InputIt data, int tid, T* reg, bool sync) { #pragma unroll for(int i = 0; i < VT; ++i) reg[i] = data[NT * i + tid]; if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceGlobalToRegPred(int count, InputIt data, int tid, T* reg, bool sync) { // TODO: Attempt to issue 4 loads at a time. #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; if(index < count) reg[i] = data[index]; } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceGlobalToReg(int count, InputIt data, int tid, T* reg, bool sync) { if(count >= NT * VT) { #pragma unroll for(int i = 0; i < VT; ++i) reg[i] = data[NT * i + tid]; } else DeviceGlobalToRegPred(count, data, tid, reg, false); if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceGlobalToReg2(int count, InputIt data, int tid, T* reg, bool sync) { DeviceGlobalToReg(count, data, tid, reg, false); #pragma unroll for(int i = VT0; i < VT1; ++i) { int index = NT * i + tid; if(index < count) reg[i] = data[index]; } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceGlobalToRegDefault(int count, InputIt data, int tid, T* reg, T init, bool sync) { if(count >= NT * VT) { #pragma unroll for(int i = 0; i < VT; ++i) reg[i] = data[NT * i + tid]; } else { #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; reg[i] = init; if(index < count) reg[i] = data[index]; } } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceGlobalToRegDefault2(int count, InputIt data, int tid, T* reg, T init, bool sync) { DeviceGlobalToRegDefault(count, data, tid, reg, init, false); #pragma unroll for(int i = VT0; i < VT1; ++i) { int index = NT * i + tid; reg[i] = init; if(index < count) reg[i] = data[index]; } if(sync) __syncthreads(); } //////////////////////////////////////////////////////////////////////////////// template MGPU_DEVICE void DeviceGlobalToThread(int count, InputIt data, int tid, T* reg) { data += VT * tid; if(count >= NT * VT) { #pragma unroll for(int i = 0; i < VT; ++i) reg[i] = ldg(data + i); } else { count -= VT * tid; #pragma unroll for(int i = 0; i < VT; ++i) if(i < count) reg[i] = ldg(data + i); } } template MGPU_DEVICE void DeviceGlobalToThreadDefault(int count, InputIt data, int tid, T* reg, T init) { data += VT * tid; if(count >= NT * VT) { #pragma unroll for(int i = 0; i < VT; ++i) reg[i] = ldg(data + i); } else { count -= VT * tid; #pragma unroll for(int i = 0; i < VT; ++i) reg[i] = (i < count) ? ldg(data + i) : init; } } //////////////////////////////////////////////////////////////////////////////// // Cooperative store functions. template MGPU_DEVICE void DeviceRegToShared(const T* reg, int tid, OutputIt dest, bool sync) { typedef typename std::iterator_traits::value_type T2; #pragma unroll for(int i = 0; i < VT; ++i) dest[NT * i + tid] = (T2)reg[i]; if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceRegToGlobal(int count, const T* reg, int tid, OutputIt dest, bool sync) { #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; if(index < count) dest[index] = reg[i]; } if(sync) __syncthreads(); } //////////////////////////////////////////////////////////////////////////////// // DeviceMemToMemLoop // Transfer from shared memory to global, or global to shared, for transfers // that are smaller than NT * VT in the average case. The goal is to reduce // unnecessary comparison logic. template MGPU_DEVICE void DeviceMemToMem4(int count, InputIt source, int tid, OutputIt dest, bool sync) { typedef typename std::iterator_traits::value_type T; T x[VT]; const int Count = (VT < 4) ? VT : 4; if(count >= NT * VT) { #pragma unroll for(int i = 0; i < Count; ++i) x[i] = source[NT * i + tid]; #pragma unroll for(int i = 0; i < Count; ++i) dest[NT * i + tid] = x[i]; } else { #pragma unroll for(int i = 0; i < Count; ++i) { int index = NT * i + tid; if(index < count) x[i] = source[NT * i + tid]; } #pragma unroll for(int i = 0; i < Count; ++i) { int index = NT * i + tid; if(index < count) dest[index] = x[i]; } } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceMemToMemLoop(int count, InputIt source, int tid, OutputIt dest, bool sync) { for(int i = 0; i < count; i += 4 * NT) DeviceMemToMem4(count - i, source + i, tid, dest + i, false); if(sync) __syncthreads(); } //////////////////////////////////////////////////////////////////////////////// // Functions to copy between shared and global memory where the average case is // to transfer NT * VT elements. template MGPU_DEVICE void DeviceSharedToGlobal(int count, const T* source, int tid, OutputIt dest, bool sync) { typedef typename std::iterator_traits::value_type T2; #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; if(index < count) dest[index] = (T2)source[index]; } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceGlobalToShared(int count, InputIt source, int tid, T* dest, bool sync) { T reg[VT]; DeviceGlobalToReg(count, source, tid, reg, false); DeviceRegToShared(reg, tid, dest, sync); } template MGPU_DEVICE void DeviceGlobalToShared2(int count, InputIt source, int tid, T* dest, bool sync) { T reg[VT1]; DeviceGlobalToReg2(count, source, tid, reg, false); DeviceRegToShared(reg, tid, dest, sync); } template MGPU_DEVICE void DeviceGlobalToSharedDefault(int count, InputIt source, int tid, T* dest, T init, bool sync) { T reg[VT]; DeviceGlobalToRegDefault(count, source, tid, reg, init, false); DeviceRegToShared(reg, tid, dest, sync); } template MGPU_DEVICE void DeviceGlobalToSharedDefault2(int count, InputIt data, int tid, T* dest, T init, bool sync) { T reg[VT1]; DeviceGlobalToRegDefault2(count, data, tid, reg, init, false); DeviceRegToShared(reg, tid, dest, sync); } //////////////////////////////////////////////////////////////////////////////// template MGPU_DEVICE void DeviceGlobalToSharedLoop(int count, InputIt source, int tid, T* dest, bool sync) { const int Granularity = MGPU_MIN(VT, 3); DeviceGlobalToShared(count, source, tid, dest, false); int offset = Granularity * NT; if(count > offset) DeviceGlobalToShared(count - offset, source + offset, tid, dest + offset, false); if(sync) __syncthreads(); /* source += tid; while(count > 0) { T reg[Granularity]; #pragma unroll for(int i = 0; i < Granularity; ++i) { int index = NT * i + tid; if(index < count) reg[i] = source[NT * i]; } DeviceRegToShared(reg, tid, dest, false); source += Granularity * NT; dest += Granularity * NT; count -= Granularity * NT; } if(sync) __syncthreads();*/ } template MGPU_DEVICE void DeviceGlobalToGlobal(int count, InputIt source, int tid, OutputIt dest, bool sync) { typedef typename std::iterator_traits::value_type T; T values[VT]; DeviceGlobalToReg(count, source, tid, values, false); DeviceRegToGlobal(count, values, tid, dest, sync); } //////////////////////////////////////////////////////////////////////////////// // Transponse VT elements in NT threads (x) into thread-order registers (y) // using only NT * VT / 2 elements of shared memory. //This function definitely has a bug, don't use!!! fix TODO(erich) template MGPU_DEVICE void HalfSmemTranspose(const T* x, int tid, T* shared, T* y) { printf("HalfSmemTranspose has a bug, use WAR SmemTranpose or find bug before using in production"); // Transpose the first half values (tid < NT / 2) #pragma unroll for(int i = 0; i <= VT / 2; ++i) if(i < VT / 2 || tid < NT / 2) shared[NT * i + tid] = x[i]; __syncthreads(); if(tid < NT / 2) { #pragma unroll for(int i = 0; i < VT; ++i) y[i] = shared[VT * tid + i]; } __syncthreads(); // Transpose the second half values (tid >= NT / 2) #pragma unroll for(int i = VT / 2; i < VT; ++i) if(i > VT / 2 || tid >= NT / 2) shared[NT * i - NT * VT / 2 + tid] = x[i]; __syncthreads(); if(tid >= NT / 2) { #pragma unroll for(int i = 0; i < VT; ++i) y[i] = shared[VT * tid + i - NT * VT / 2]; } __syncthreads(); } //////////////////////////////////////////////////////////////////////////////// // Gather/scatter functions template MGPU_DEVICE void DeviceGather(int count, InputIt data, int indices[VT], int tid, T* reg, bool sync) { if(count >= NT * VT) { #pragma unroll for(int i = 0; i < VT; ++i) reg[i] = data[indices[i]]; } else { #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; if(index < count) reg[i] = data[indices[i]]; } } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceGatherDefault(int count, InputIt data, int indices[VT], int tid, T* reg, T identity, bool sync) { if(count >= NT * VT) { #pragma unroll for(int i = 0; i < VT; ++i) reg[i] = data[indices[i]]; } else { #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; reg[i] = (index < count) ? data[indices[i]] : identity; } } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceScatter(int count, const T* reg, int tid, int indices[VT], OutputIt data, bool sync) { if(count >= NT * VT) { #pragma unroll for(int i = 0; i < VT; ++i) data[indices[i]] = reg[i]; } else { #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; if(index < count) data[indices[i]] = reg[i]; } } if(sync) __syncthreads(); } //////////////////////////////////////////////////////////////////////////////// // Cooperative transpose functions (strided to thread order) template MGPU_DEVICE void DeviceThreadToShared(const T* threadReg, int tid, T* shared, bool sync) { if(1 & VT) { // Odd grain size. Store as type T. #pragma unroll for(int i = 0; i < VT; ++i) shared[VT * tid + i] = threadReg[i]; } else { // Even grain size. Store as DevicePair. This lets us exploit the // 8-byte shared memory mode on Kepler. DevicePair* dest = (DevicePair*)(shared + VT * tid); #pragma unroll for(int i = 0; i < VT / 2; ++i) dest[i] = MakeDevicePair(threadReg[2 * i], threadReg[2 * i + 1]); } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceSharedToThread(const T* shared, int tid, T* threadReg, bool sync) { if(1 & VT) { #pragma unroll for(int i = 0; i < VT; ++i) threadReg[i] = shared[VT * tid + i]; } else { const DevicePair* source = (const DevicePair*)(shared + VT * tid); #pragma unroll for(int i = 0; i < VT / 2; ++i) { DevicePair p = source[i]; threadReg[2 * i] = p.x; threadReg[2 * i + 1] = p.y; } } if(sync) __syncthreads(); } //////////////////////////////////////////////////////////////////////////////// // DeviceLoad2 - load from pointers of the same type. Optimize for a single LD // statement. template MGPU_DEVICE void DeviceLoad2ToReg(const T* a_global, int aCount, const T* b_global, int bCount, int tid, T* reg, bool sync) { int b0 = b_global - a_global - aCount; int total = aCount + bCount; if(total >= NT * VT0) { #pragma unroll for(int i = 0; i < VT0; ++i) { int index = NT * i + tid; reg[i] = a_global[index + ((index >= aCount) ? b0 : 0)]; } } else { #pragma unroll for(int i = 0; i < VT0; ++i) { int index = NT * i + tid; if(index < total) reg[i] = a_global[index + ((index >= aCount) ? b0 : 0)]; } } #pragma unroll for(int i = VT0; i < VT1; ++i) { int index = NT * i + tid; if(index < total) reg[i] = a_global[index + ((index >= aCount) ? b0 : 0)]; } } template MGPU_DEVICE void DeviceLoad2ToShared(const T* a_global, int aCount, const T* b_global, int bCount, int tid, T* shared, bool sync) { T reg[VT1]; DeviceLoad2ToReg(a_global, aCount, b_global, bCount, tid, reg, false); DeviceRegToShared(reg, tid, shared, sync); } //////////////////////////////////////////////////////////////////////////////// // DeviceLoad2 - load from pointers of different types. Uses two LD statements. template MGPU_DEVICE void DeviceLoad2ToReg(InputIt1 a_global, int aCount, InputIt2 b_global, int bCount, int tid, T* reg, bool sync) { b_global -= aCount; int total = aCount + bCount; if(total >= NT * VT0) { #pragma unroll for(int i = 0; i < VT0; ++i) { int index = NT * i + tid; if(index < aCount) reg[i] = a_global[index]; else reg[i] = b_global[index]; } } else { #pragma unroll for(int i = 0; i < VT0; ++i) { int index = NT * i + tid; if(index < aCount) reg[i] = a_global[index]; else if(index < total) reg[i] = b_global[index]; } } #pragma unroll for(int i = VT0; i < VT1; ++i) { int index = NT * i + tid; if(index < aCount) reg[i] = a_global[index]; else if(index < total) reg[i] = b_global[index]; } } template MGPU_DEVICE void DeviceLoad2ToShared(InputIt1 a_global, int aCount, InputIt2 b_global, int bCount, int tid, T* shared, bool sync) { T reg[VT1]; DeviceLoad2ToReg(a_global, aCount, b_global, bCount, tid, reg, false); DeviceRegToShared(reg, tid, shared, sync); } //////////////////////////////////////////////////////////////////////////////// // DeviceGatherGlobalToGlobal template MGPU_DEVICE void DeviceGatherGlobalToGlobal(int count, InputIt data_global, const int* indices_shared, int tid, OutputIt dest_global, bool sync) { typedef typename std::iterator_traits::value_type ValType; ValType values[VT]; #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; if(index < count) { int gather = indices_shared[index]; values[i] = data_global[gather]; } } if(sync) __syncthreads(); DeviceRegToGlobal(count, values, tid, dest_global, false); } //////////////////////////////////////////////////////////////////////////////// // DeviceTransferMergeValues // Gather in a merge-like value from two input arrays and store to a single // output. Like DeviceGatherGlobalToGlobal, but for two arrays at once. template MGPU_DEVICE void DeviceTransferMergeValuesReg(int count, InputIt1 a_global, InputIt2 b_global, int bStart, const int* indices, int tid, T* reg, bool sync) { b_global -= bStart; if(count >= NT * VT) { #pragma unroll for(int i = 0; i < VT; ++i) { reg[i] = (indices[i] < bStart) ? a_global[indices[i]] : b_global[indices[i]]; } } else { #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; if(index < count) reg[i] = (indices[i] < bStart) ? a_global[indices[i]] : b_global[indices[i]]; } } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceTransferMergeValuesShared(int count, InputIt1 a_global, InputIt2 b_global, int bStart, const int* indices_shared, int tid, OutputIt dest_global, bool sync) { int indices[VT]; DeviceSharedToReg(indices_shared, tid, indices); typedef typename std::iterator_traits::value_type ValType; ValType reg[VT]; DeviceTransferMergeValuesReg(count, a_global, b_global, bStart, indices, tid, reg, sync); DeviceRegToGlobal(count, reg, tid, dest_global, sync); } template MGPU_DEVICE void DeviceTransferMergeValuesReg(int count, const T* a_global, const T* b_global, int bStart, const int* indices, int tid, T* reg, bool sync) { int bOffset = (int)(b_global - a_global - bStart); if(count >= NT * VT) { #pragma unroll for(int i = 0; i < VT; ++i) { int gather = indices[i]; if(gather >= bStart) gather += bOffset; reg[i] = a_global[gather]; } } else { #pragma unroll for(int i = 0; i < VT; ++i) { int index = NT * i + tid; int gather = indices[i]; if(gather >= bStart) gather += bOffset; if(index < count) reg[i] = a_global[gather]; } } if(sync) __syncthreads(); } template MGPU_DEVICE void DeviceTransferMergeValuesShared(int count, const T* a_global, const T* b_global, int bStart, const int* indices_shared, int tid, OutputIt dest_global, bool sync) { int indices[VT]; DeviceSharedToReg(indices_shared, tid, indices); T reg[VT]; DeviceTransferMergeValuesReg(count, a_global, b_global, bStart, indices, tid, reg, sync); DeviceRegToGlobal(count, reg, tid, dest_global, sync); } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/serialsets.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "deviceutil.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // SerialSetIntersection // Emit A if A and B are in range and equal. template MGPU_DEVICE int SerialSetIntersection(const T* data, int aBegin, int aEnd, int bBegin, int bEnd, int end, T* results, int* indices, Comp comp) { const int MinIterations = VT / 2; int commit = 0; #pragma unroll for(int i = 0; i < VT; ++i) { bool test = RangeCheck ? ((aBegin + bBegin < end) && (aBegin < aEnd) && (bBegin < bEnd)) : (i < MinIterations || (aBegin + bBegin < end)); if(test) { T aKey = data[aBegin]; T bKey = data[bBegin]; bool pA = comp(aKey, bKey); bool pB = comp(bKey, aKey); // The outputs must come from A by definition of set interection. results[i] = aKey; indices[i] = aBegin; if(!pB) ++aBegin; if(!pA) ++bBegin; if(pA == pB) commit |= 1<< i; } } return commit; } //////////////////////////////////////////////////////////////////////////////// // SerialSetUnion // Emit A if A <= B. Emit B if B < A. template MGPU_DEVICE int SerialSetUnion(const T* data, int aBegin, int aEnd, int bBegin, int bEnd, int end, T* results, int* indices, Comp comp) { const int MinIterations = VT / 2; int commit = 0; #pragma unroll for(int i = 0; i < VT; ++i) { bool test = RangeCheck ? (aBegin + bBegin < end) : (i < MinIterations || (aBegin + bBegin < end)); if(test) { T aKey = data[aBegin]; T bKey = data[bBegin]; bool pA = false, pB = false; if(RangeCheck && aBegin >= aEnd) pB = true; else if(RangeCheck && bBegin >= bEnd) pA = true; else { // Both are in range. pA = comp(aKey, bKey); pB = comp(bKey, aKey); } // Output A in case of a tie, so check if b < a. results[i] = pB ? bKey : aKey; indices[i] = pB ? bBegin : aBegin; if(!pB) ++aBegin; if(!pA) ++bBegin; commit |= 1<< i; } } return commit; } //////////////////////////////////////////////////////////////////////////////// // SerialSetDifference // Emit A if A < B. template MGPU_DEVICE int SerialSetDifference(const T* data, int aBegin, int aEnd, int bBegin, int bEnd, int end, T* results, int* indices, Comp comp) { const int MinIterations = VT / 2; int commit = 0; #pragma unroll for(int i = 0; i < VT; ++i) { bool test = RangeCheck ? (aBegin + bBegin < end) : (i < MinIterations || (aBegin + bBegin < end)); if(test) { T aKey = data[aBegin]; T bKey = data[bBegin]; bool pA = false, pB = false; if(RangeCheck && aBegin >= aEnd) pB = true; else if(RangeCheck && bBegin >= bEnd) pA = true; else { pA = comp(aKey, bKey); pB = comp(bKey, aKey); } // The outputs must come from A by definition of set difference. results[i] = aKey; indices[i] = aBegin; if(!pB) ++aBegin; if(!pA) ++bBegin; if(pA) commit |= 1<< i; } } return commit; } //////////////////////////////////////////////////////////////////////////////// // SerialSetSymDiff // Emit A if A < B and emit B if B < A. template MGPU_DEVICE int SerialSetSymDiff(const T* data, int aBegin, int aEnd, int bBegin, int bEnd, int end, T* results, int* indices, Comp comp) { const int MinIterations = VT / 2; int commit = 0; #pragma unroll for(int i = 0; i < VT; ++i) { bool test = RangeCheck ? (aBegin + bBegin < end) : (i < MinIterations || (aBegin + bBegin < end)); if(test) { T aKey = data[aBegin]; T bKey = data[bBegin]; bool pA = false, pB = false; if(RangeCheck && (bBegin >= bEnd)) pA = true; else if(RangeCheck && (aBegin >= aEnd)) pB = true; else { pA = comp(aKey, bKey); pB = comp(bKey, aKey); } results[i] = pA ? aKey : bKey; indices[i] = pA ? aBegin : bBegin; if(!pA) ++bBegin; if(!pB) ++aBegin; if(pA != pB) commit |= 1<< i; } } return commit; } //////////////////////////////////////////////////////////////////////////////// // SerialSetOp // Uses the MgpuSetOp enum to statically select one of the four serial ops // above. template MGPU_DEVICE int SerialSetOp(const T* data, int aBegin, int aEnd, int bBegin, int bEnd, int star, T* results, int* indices, Comp comp) { int end = aBegin + bBegin + VT - star; if(RangeCheck) end = min(end, aEnd + bEnd); int commit; switch(Op) { case MgpuSetOpIntersection: commit = SerialSetIntersection(data, aBegin, aEnd, bBegin, bEnd, end, results, indices, comp); break; case MgpuSetOpUnion: commit = SerialSetUnion(data, aBegin, aEnd, bBegin, bEnd, end, results, indices, comp); break; case MgpuSetOpDiff: commit = SerialSetDifference(data, aBegin, aEnd, bBegin, bEnd, end, results, indices, comp); break; case MgpuSetOpSymDiff: commit = SerialSetSymDiff(data, aBegin, aEnd, bBegin, bEnd, end, results, indices, comp); break; } __syncthreads(); return commit; } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/device/sortnetwork.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "deviceutil.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // Odd-even transposition sorting network. Sorts keys and values in-place in // register. // http://en.wikipedia.org/wiki/Odd%E2%80%93even_sort // CUDA Compiler does not currently unroll these loops correctly. Write using // template loop unrolling. /* template MGPU_DEVICE void OddEvenTransposeSort(T* keys, V* values, Comp comp) { #pragma unroll for(int level = 0; level < VT; ++level) { #pragma unroll for(int i = 1 & level; i < VT - 1; i += 2) { if(comp(keys[i + 1], keys[i])) { mgpu::swap(keys[i], keys[i + 1]); mgpu::swap(values[i], values[i + 1]); } } } }*/ template struct OddEvenTransposeSortT { // Sort segments marked by head flags. If the head flag between i and i + 1 // is set (so that (2<< i) & flags is true), the values belong to different // segments and are not swapped. template static MGPU_DEVICE void Sort(K* keys, V* values, int flags, Comp comp) { #pragma unroll for(int i = 1 & I; i < VT - 1; i += 2) if((0 == ((2<< i) & flags)) && comp(keys[i + 1], keys[i])) { mgpu::swap(keys[i], keys[i + 1]); mgpu::swap(values[i], values[i + 1]); } OddEvenTransposeSortT::Sort(keys, values, flags, comp); } }; template struct OddEvenTransposeSortT { template static MGPU_DEVICE void Sort(K* keys, V* values, int flags, Comp comp) { } }; template MGPU_DEVICE void OddEvenTransposeSort(K* keys, V* values, Comp comp) { OddEvenTransposeSortT<0, VT>::Sort(keys, values, 0, comp); } template MGPU_DEVICE void OddEvenTransposeSortFlags(K* keys, V* values, int flags, Comp comp) { OddEvenTransposeSortT<0, VT>::Sort(keys, values, flags, comp); } //////////////////////////////////////////////////////////////////////////////// // Batcher Odd-Even Mergesort network // Unstable but executes much faster than the transposition sort. // http://en.wikipedia.org/wiki/Batcher_odd%E2%80%93even_mergesort template struct OddEvenMergesortT { template MGPU_DEVICE static void CompareAndSwap(K* keys, V* values, int flags, int a, int b, Comp comp) { if(b < Count) { // Mask the bits between a and b. Any head flags in this interval // means the keys are in different segments and must not be swapped. const int Mask = ((2<< b) - 1) ^ ((2<< a) - 1); if(!(Mask & flags) && comp(keys[b], keys[a])) { mgpu::swap(keys[b], keys[a]); mgpu::swap(values[b], values[a]); } } } template struct OddEvenMerge { template MGPU_DEVICE static void Merge(K* keys, V* values, int flags, Comp comp) { // Compare and swap const int M = 2 * R; OddEvenMerge::Merge(keys, values, flags, comp); OddEvenMerge::Merge(keys, values, flags, comp); #pragma unroll for(int i = Low2 + R; i + R < Low2 + Width; i += M) CompareAndSwap(keys, values, flags, i, i + R, comp); } }; template struct OddEvenMerge { template MGPU_DEVICE static void Merge(K* keys, V* values, int flags, Comp comp) { CompareAndSwap(keys, values, flags, Low2, Low2 + R, comp); } }; template MGPU_DEVICE static void Sort(K* keys, V* values, int flags, Comp comp) { const int M = Width / 2; OddEvenMergesortT::Sort(keys, values, flags, comp); OddEvenMergesortT::Sort(keys, values, flags, comp); OddEvenMerge<1, Low>::Merge(keys, values, flags, comp); } }; template struct OddEvenMergesortT<1, Low, Count> { template MGPU_DEVICE static void Sort(K* keys, V* values, int flags, Comp comp) { } }; template MGPU_DEVICE void OddEvenMergesort(K* keys, V* values, Comp comp) { const int Width = 1<< sLogPow2::value; OddEvenMergesortT::Sort(keys, values, 0, comp); } template MGPU_DEVICE void OddEvenMergesortFlags(K* keys, V* values, int flags, Comp comp) { const int Width = 1<< sLogPow2::value; OddEvenMergesortT::Sort(keys, values, flags, comp); } } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/mgpudevice.cuh ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include "mgpuenums.h" #include "device/deviceutil.cuh" namespace mgpu { //////////////////////////////////////////////////////////////////////////////// // device/loadstore.cuh // For 0 <= i < VT: // index = NT * i + tid; // reg[i] = data[index]; // Synchronize after load. template MGPU_DEVICE void DeviceSharedToReg(InputIt data, int tid, T* reg, bool sync = true); // For 0 <= i < VT: // index = NT * i + tid; // if(index < count) reg[i] = data[index]; // No synchronize after load. template MGPU_DEVICE void DeviceGlobalToReg(int count, InputIt data, int tid, T* reg, bool sync = false); template MGPU_DEVICE void DeviceGlobalToRegDefault(int count, InputIt data, int tid, T* reg, T init, bool sync = false); // For 0 <= i < VT: // index = NT * i + tid; // if(index < count) reg[i] = data[index]; // No synchronize after load. template MGPU_DEVICE void DeviceGlobalToReg(int count, InputIt data, int tid, T* reg, bool sync = false); // For 0 <= i < VT: // index = NT * i + tid; // if(index < count) reg[i] = data[index]; // No synchronize after load. template MGPU_DEVICE void DeviceGlobalToRegDefault2(int count, InputIt data, int tid, T* reg, T init, bool sync = false); // For 0 <= i < VT: // index = NT * i + tid; // if(index < count) reg[i] = data[index]; // No synchronize after load. // No optimized code path for count < NV (smaller generated code). template MGPU_DEVICE void DeviceGlobalToRegLoop(int count, InputIt data, int tid, T* reg, bool sync = false); // For 0 <= i < VT: // index = VT * tid + i. // if(index < count) reg[i] = data[index]; // No synchronize after load. template MGPU_DEVICE void DeviceGlobalToThread(int count, InputIt data, int tid, T* reg); template MGPU_DEVICE void DeviceGlobalToThreadDefault(int count, InputIt data, int tid, T* reg, T init); // For 0 <= i < VT: // index = NT * i + tid; // if(index < count) data[index] = reg[i]; // Synchronize after load. template MGPU_DEVICE void DeviceRegToShared(const T* reg, int tid, OutputIt dest, bool sync = true); // For 0 <= i < VT: // index = NT * i + tid; // if(index < count) data[index] = reg[i]; // No synchronize after load. template MGPU_DEVICE void DeviceRegToGlobal(int count, const T* reg, int tid, OutputIt dest, bool sync = false); // For 0 <= index < count: // dest[index] = source[index]; // This function is intended to replace DeviceGlobalToShared in cases where // count is much less than NT * VT. template MGPU_DEVICE void DeviceMemToMemLoop(int count, InputIt source, int tid, OutputIt dest, bool sync = true); // For 0 <= index < count: // dest[index] = source[index]; // Synchronize after store. template MGPU_DEVICE void DeviceSharedToGlobal(int count, const T* source, int tid, OutputIt dest, bool sync = true); // For 0 <= index < count: // dest[index] = source[index]; // Synchronize after store. template MGPU_DEVICE void DeviceGlobalToShared(int count, InputIt source, int tid, T* dest, bool sync = true); template MGPU_DEVICE void DeviceGlobalToShared2(int count, InputIt source, int tid, T* dest, bool sync = true); // For 0 <= index < count: // dest[index] = source[index]; // Synchronize after store. // No optimized code path for count < NV (smaller generated code). template MGPU_DEVICE void DeviceGlobalToSharedLoop(int count, InputIt source, int tid, T* dest, bool sync = true); template MGPU_DEVICE void DeviceGlobalToSharedDefault(int count, InputIt source, int tid, T* dest, T init, bool sync = true); template MGPU_DEVICE void DeviceGlobalToSharedDefault2(int count, InputIt source, int tid, T* dest, T init, bool sync = true); // For 0 <= index < count: // dest[index] = source[index]; // No synchronize. template MGPU_DEVICE void DeviceGlobalToGlobal(int count, InputIt source, int tid, OutputIt dest, bool sync = false); // Transponse VT elements in NT threads (x) into thread-order registers (y) // using only NT * VT / 2 elements of shared memory. template MGPU_DEVICE void HalfSmemTranspose(const T* x, int tid, T* shared, T* y); // For 0 <= i < VT: // index = NT * i + tid; // if(index < count) // gather = indices[index]; // reg[i] = data[gather]; // Synchronize after load. template MGPU_DEVICE void DeviceGather(int count, InputIt data, int indices[VT], int tid, T* reg, bool sync = true); template MGPU_DEVICE void DeviceGatherDefault(int count, InputIt data, int indices[VT], int tid, T* reg, T identity, bool sync = true); // For 0 <= i < VT: // index = NT * i + tid; // if(index < count) // scatter = indices[index]; // data[scatter] = reg[i]; // Synchronize after store. template MGPU_DEVICE void DeviceScatter(int count, const T* reg, int tid, int indices[VT], OutputIt data, bool sync = true); // For 0 <= i < VT: // shared[VT * tid + i] = threadReg[i]; // Synchronize after store. // Note this function moves data in THREAD ORDER. // (DeviceRegToShared moves data in STRIDED ORDER). template MGPU_DEVICE void DeviceThreadToShared(const T* threadReg, int tid, T* shared, bool sync = true); // For 0 <= i < VT: // threadReg[i] = shared[VT * tid + i]; // Synchronize after load. // Note this function moves data in THREAD ORDER. // (DeviceSharedToReg moves data in STRIDED ORDER). template MGPU_DEVICE void DeviceSharedToThread(const T* shared, int tid, T* threadReg, bool sync = true); // For 0 <= index < aCount: // shared[index] = a_global[index]; // For 0 <= index < bCount: // shared[aCount + index] = b_global[index]; // VT0 is the lower-bound for predication-free execution: // If count >= NT * VT0, a predication-free branch is taken. // VT1 is the upper-bound for loads: // NT * VT1 must >= aCount + bCount. template MGPU_DEVICE void DeviceLoad2ToReg(const T* a_global, int aCount, const T* b_global, int bCount, int tid, T* reg, bool sync = false); template MGPU_DEVICE void DeviceLoad2ToShared(const T* a_global, int aCount, const T* b_global, int bCount, int tid, T* shared, bool sync = true); template MGPU_DEVICE void DeviceLoad2ToReg(InputIt1 a_global, int aCount, InputIt2 b_global, int bCount, int tid, T* reg, bool sync = false); template MGPU_DEVICE void DeviceLoad2ToShared(InputIt1 a_global, int aCount, InputIt2 b_global, int bCount, int tid, T* shared, bool sync = true); // For 0 <= i < VT // index = NT * i + tid; // if(index < count) // gather = indices_shared[index]; // dest_global[index] = data_global[gather]; // Synchronize after load. template MGPU_DEVICE void DeviceGatherGlobalToGlobal(int count, InputIt data_global, const int* indices_shared, int tid, OutputIt dest_global, bool sync = true); // For 0 <= i < VT // index = NT * i + tid // if(index < count) // gather = indices[index]; // if(gather < aCount) data = a_global[gather]; // else data = b_global[gather - aCount]; // dest_global[index] = data; // Synchronize after load. template MGPU_DEVICE void DeviceTransferMergeValuesReg(int count, InputIt1 a_global, InputIt2 b_global, int bStart, const int* indices, int tid, T* reg, bool sync = false); template MGPU_DEVICE void DeviceTransferMergeValuesShared(int count, InputIt1 a_global, InputIt2 b_global, int bStart, const int* indices_shared, int tid, OutputIt dest_global, bool sync = true); template MGPU_DEVICE void DeviceTransferMergeValuesReg(int count, const T* a_global, const T* b_global, int bStart, const int* indices, int tid, T* reg, bool sync = false); template MGPU_DEVICE void DeviceTransferMergeValuesShared(int count, const T* a_global, const T* b_global, int bStart, const int* indices_shared, int tid, OutputIt dest_global, bool sync = true); } // namespace mgpu #include "device/loadstore.cuh" #include "device/ctasegscan.cuh" ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/mgpuenums.h ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once namespace mgpu { enum MgpuBounds { MgpuBoundsLower, MgpuBoundsUpper }; enum MgpuScanType { MgpuScanTypeExc, MgpuScanTypeInc }; enum MgpuSearchType { MgpuSearchTypeNone, MgpuSearchTypeIndex, MgpuSearchTypeMatch, MgpuSearchTypeIndexMatch }; enum MgpuJoinKind { MgpuJoinKindInner, MgpuJoinKindLeft, MgpuJoinKindRight, MgpuJoinKindOuter }; enum MgpuSetOp { MgpuSetOpIntersection, MgpuSetOpUnion, MgpuSetOpDiff, MgpuSetOpSymDiff }; } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/contrib/moderngpu/include/util/static.h ================================================ /****************************************************************************** * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * * Code and text by Sean Baxter, NVIDIA Research * See http://nvlabs.github.io/moderngpu for repository and documentation. * ******************************************************************************/ #pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef MGPU_MIN #define MGPU_MIN(x, y) (((x) <= (y)) ? (x) : (y)) #define MGPU_MAX(x, y) (((x) >= (y)) ? (x) : (y)) #define MGPU_MAX0(x) (((x) >= 0) ? (x) : 0) #define MGPU_ABS(x) (((x) >= 0) ? (x) : (-x)) #define MGPU_DIV_UP(x, y) (((x) + (y) - 1) / (y)) #define MGPU_DIV_ROUND(x, y) (((x) + (y) / 2) / (y)) #define MGPU_ROUND_UP(x, y) ((y) * MGPU_DIV_UP(x, y)) #define MGPU_SHIFT_DIV_UP(x, y) (((x) + ((1<< (y)) - 1))>> y) #define MGPU_ROUND_UP_POW2(x, y) (((x) + (y) - 1) & ~((y) - 1)) #define MGPU_ROUND_DOWN_POW2(x, y) ((x) & ~((y) - 1)) #define MGPU_IS_POW_2(x) (0 == ((x) & ((x) - 1))) #endif // MGPU_MIN namespace mgpu { typedef unsigned char byte; typedef unsigned int uint; typedef signed short int16; typedef unsigned short ushort; typedef unsigned short uint16; typedef long long int64; typedef unsigned long long uint64; // IsPow2::value is true if X is a power of 2. template struct sIsPow2 { enum { value = 0 == (X & (X - 1)) }; }; // Finds the base-2 logarithm of X. value is -1 if X is not a power of 2. template struct sLogPow2 { enum { extra = sIsPow2::value ? 0 : (roundUp ? 1 : 0) }; enum { inner = sLogPow2::inner + 1 }; enum { value = inner + extra }; }; template struct sLogPow2<0, roundUp> { enum { inner = 0 }; enum { value = 0 }; }; template struct sLogPow2<1, roundUp> { enum { inner = 0 }; enum { value = 0 }; }; template struct sDivUp { enum { value = (X + Y - 1) / Y }; }; template struct sDiv2RoundUp { enum { value = sDiv2RoundUp::value, levels - 1>::value }; }; template struct sDiv2RoundUp { enum { value = count }; }; template struct sDivSafe { enum { value = X / Y }; }; template struct sDivSafe { enum { value = 0 }; }; template struct sRoundUp { enum { rem = X % Y }; enum { value = X + (rem ? (Y - rem) : 0) }; }; template struct sRoundDown { enum { rem = X % Y }; enum { value = X - rem }; }; // IntegerDiv is a template for avoiding divisions by zero in template // evaluation. Templates always evaluate both b and c in an expression like // a ? b : c, and will error if either rhs contains an illegal expression, // even if the ternary is explictly designed to guard against that. template struct sIntegerDiv { enum { value = X / (Y ? Y : (X + 1)) }; }; template struct sMax { enum { value = (X >= Y) ? X : Y }; }; template struct sMin { enum { value = (X <= Y) ? X : Y }; }; template struct sAbs { enum { value = (X >= 0) ? X : -X }; }; // Finds the number of powers of 2 in the prime factorization of X. template struct sNumFactorsOf2 { enum { shifted = X >> 1 }; enum { value = 1 + sNumFactorsOf2::value }; }; template struct sNumFactorsOf2 { enum { value = 0 }; }; // Returns the divisor for a conflict-free transpose. template struct sBankConflictDivisor { enum { value = (1 & X) ? 0 : (sIsPow2::value ? NumBanks : (1<< sNumFactorsOf2::value)) }; enum { log_value = sLogPow2::value }; }; template struct sConflictFreeStorage { enum { count = NT * X }; enum { divisor = sBankConflictDivisor::value }; enum { padding = sDivSafe::value }; enum { value = count + padding }; }; } // namespace mgpu ================================================ FILE: 3rdparty/ctc_include/detail/cpu_ctc.h ================================================ /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #pragma once #include #include #include #include #include #include #include "ctc_helper.h" namespace mxnet_warpctc { template class CpuCTC { public: // Noncopyable CpuCTC(int alphabet_size, int minibatch, void* workspace, int blank_label) : alphabet_size_(alphabet_size), minibatch_(minibatch), workspace_(workspace), blank_label_(blank_label) { }; CpuCTC(const CpuCTC&) = delete; CpuCTC& operator=(const CpuCTC&) = delete; ctcStatus_t cost_and_grad(const ProbT* const activations, ProbT *grads, ProbT* costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths); ctcStatus_t score_forward(const ProbT* const activations, ProbT* costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths); private: class CpuCTC_metadata { private: int setup_labels(const int* const labels, int blank_label, int L, int S); public: CpuCTC_metadata(int L, int S, int T, int mb, int alphabet_size, void* workspace, size_t bytes_used, int blank_label, const int* const labels); ProbT* alphas; ProbT* betas; int* labels_w_blanks; int* e_inc; int* s_inc; ProbT* output; int repeats; }; int alphabet_size_; // Number of characters plus blank int minibatch_; void* workspace_; int blank_label_; void log_softmax(const ProbT* const activations, ProbT* log_probs, const int* const input_lengths); std::tuple cost_and_grad_kernel(ProbT *grad, const ProbT* const log_probs, const int* const labels, int T, int L, int mb, size_t bytes_used); ProbT compute_alphas(const ProbT* log_probs, int repeats, int S, int T, const int* const e_inc, const int* const s_inc, const int* const labels, ProbT* alphas); ProbT compute_betas_and_grad(ProbT* grad, const ProbT* const log_probs, ProbT log_partition, int repeats, int S, int T, const int* const e_inc, const int* const s_inc, const int* const labels, ProbT* alphas, ProbT* betas, ProbT* output); }; template CpuCTC::CpuCTC_metadata::CpuCTC_metadata(int L, int S, int T, int mb, int alphabet_size, void* workspace, size_t bytes_used, int blank_label, const int* const labels) { alphas = reinterpret_cast(static_cast(workspace) + bytes_used); bytes_used += sizeof(ProbT) * S * T; std::fill(alphas, alphas + S * T, ctc_helper::neg_inf()); betas = reinterpret_cast(static_cast(workspace) + bytes_used); bytes_used += sizeof(ProbT) * S; std::fill(betas, betas + S, ctc_helper::neg_inf()); labels_w_blanks = reinterpret_cast(static_cast(workspace) + bytes_used); bytes_used += sizeof(int) * S; e_inc = reinterpret_cast(static_cast(workspace) + bytes_used); bytes_used += sizeof(int) * S; s_inc = reinterpret_cast(static_cast(workspace) + bytes_used); bytes_used += sizeof(int) * S; output = reinterpret_cast(static_cast(workspace) + bytes_used); bytes_used += sizeof(ProbT) * alphabet_size; repeats = setup_labels(labels, blank_label, L, S); } template int CpuCTC::CpuCTC_metadata::setup_labels(const int* const labels, int blank_label, int L, int S) { int e_counter = 0; int s_counter = 0; s_inc[s_counter++] = 1; int repeats = 0; for (int i = 1; i < L; ++i) { if (labels[i-1] == labels[i]) { s_inc[s_counter++] = 1; s_inc[s_counter++] = 1; e_inc[e_counter++] = 1; e_inc[e_counter++] = 1; ++repeats; } else { s_inc[s_counter++] = 2; e_inc[e_counter++] = 2; } } e_inc[e_counter++] = 1; for (int i = 0; i < L; ++i) { labels_w_blanks[2 * i] = blank_label; labels_w_blanks[2 * i + 1] = labels[i]; } labels_w_blanks[S - 1] = blank_label; return repeats; } template void CpuCTC::log_softmax(const ProbT* const activations, ProbT* log_probs, const int* const input_lengths) { #pragma omp parallel for for (int mb = 0; mb < minibatch_; ++mb) { for(int c = 0; c < input_lengths[mb]; ++c) { int col_offset = (mb + minibatch_ * c) * alphabet_size_; ProbT max_activation = -std::numeric_limits::infinity(); for(int r = 0; r < alphabet_size_; ++r) max_activation = std::max(max_activation, activations[r + col_offset]); ProbT denom = ProbT(0.); for(int r = 0; r < alphabet_size_; ++r) { denom += std::exp(activations[r + col_offset] - max_activation); } for(int r = 0; r < alphabet_size_; ++r) { log_probs[r + col_offset] = activations[r + col_offset] - max_activation - std::log(denom); } } } } template std::tuple CpuCTC::cost_and_grad_kernel(ProbT *grad, const ProbT* const log_probs, const int* const labels, int T, int L, int mb, size_t bytes_used) { const int S = 2*L + 1; // Number of labels with blanks CpuCTC_metadata ctcm(L, S, T, mb, alphabet_size_, workspace_, bytes_used, blank_label_, labels); bool over_threshold = false; if (L + ctcm.repeats > T) { return std::make_tuple(ProbT(0), over_threshold); // TODO, not right to return 0 } ProbT llForward = compute_alphas(log_probs, ctcm.repeats, S, T, ctcm.e_inc, ctcm.s_inc, ctcm.labels_w_blanks, ctcm.alphas); ProbT llBackward = compute_betas_and_grad(grad, log_probs, llForward, ctcm.repeats, S, T, ctcm.e_inc, ctcm.s_inc, ctcm.labels_w_blanks, ctcm.alphas, ctcm.betas, ctcm.output); ProbT diff = std::abs(llForward - llBackward); if (diff > ctc_helper::threshold) { over_threshold = true; } return std::make_tuple(-llForward, over_threshold); } // Computes forward probabilities template ProbT CpuCTC::compute_alphas(const ProbT* log_probs, int repeats, int S, int T, const int* const e_inc, const int* const s_inc, const int* const labels, ProbT* alphas) { int start = (((S /2) + repeats - T) < 0) ? 0 : 1, end = S > 1 ? 2 : 1; for (int i = start; i < end; ++i) { alphas[i] = log_probs[labels[i]]; } for(int t = 1; t < T; ++t) { int remain = (S / 2) + repeats - (T - t); if(remain >= 0) start += s_inc[remain]; if(t <= (S / 2) + repeats) end += e_inc[t - 1]; int startloop = start; int idx1 = t * S, idx2 = (t - 1) * S, idx3 = t * (alphabet_size_ * minibatch_); if (start == 0) { alphas[idx1] = alphas[idx2] + log_probs[blank_label_ + idx3]; startloop += 1; } for(int i = startloop; i < end; ++i) { ProbT prev_sum = ctc_helper::log_plus()(alphas[i + idx2], alphas[(i-1) + idx2]); // Skip two if not on blank and not on repeat. if (labels[i] != blank_label_ && i != 1 && labels[i] != labels[i-2]) prev_sum = ctc_helper::log_plus()(prev_sum, alphas[(i-2) + idx2]); alphas[i + idx1] = prev_sum + log_probs[labels[i] + idx3]; } } ProbT loglike = ctc_helper::neg_inf(); for(int i = start; i < end; ++i) { loglike = ctc_helper::log_plus()(loglike, alphas[i + (T - 1) * S]); } return loglike; } // Starting from T, we sweep backward over the alpha array computing one column // of betas as we go. At each position we can update product alpha * beta and then // sum into the gradient associated with each label. // NOTE computes gradient w.r.t UNNORMALIZED final layer activations. // Assumed passed in grads are already zeroed! template ProbT CpuCTC::compute_betas_and_grad(ProbT* grad, const ProbT* const log_probs, ProbT log_partition, int repeats, int S, int T, const int* const e_inc, const int* const s_inc, const int* const labels, ProbT* alphas, ProbT* betas, ProbT* output) { int start = S > 1 ? (S - 2) : 0, end = (T > (S / 2) + repeats) ? S : S-1; std::fill(output, output + alphabet_size_, ctc_helper::neg_inf()); //set the starting values in the beta column at the very right edge for (int i = start; i < end; ++i) { betas[i] = log_probs[labels[i] + (T - 1) * (alphabet_size_ * minibatch_)]; //compute alpha * beta in log space at this position in (S, T) space alphas[i + (T - 1) * S] += betas[i]; //update the gradient associated with this label //essentially performing a reduce-by-key in a sequential manner output[labels[i]] = ctc_helper::log_plus()(alphas[i + (T - 1) * S], output[labels[i]]); } //update the gradient wrt to each unique label for (int i = 0; i < alphabet_size_; ++i) { int idx3 = (T - 1) * alphabet_size_ * minibatch_ + i; if (output[i] == 0.0 || output[i] == ctc_helper::neg_inf() || log_probs[idx3] == ctc_helper::neg_inf()) { grad[idx3] = std::exp(log_probs[idx3]); } else { grad[idx3] = std::exp(log_probs[idx3]) - std::exp(output[i] - log_probs[idx3] - log_partition); } } //loop from the second to last column all the way to the left for(int t = T - 2; t >= 0; --t) { int remain = (S / 2) + repeats - (T - t); if(remain >= -1) start -= s_inc[remain + 1]; if(t < (S / 2) + repeats) end -= e_inc[t]; int endloop = end == S ? end - 1 : end; int idx1 = t * S, idx3 = t * (alphabet_size_ * minibatch_); std::fill(output, output + alphabet_size_, ctc_helper::neg_inf()); for(int i = start; i < endloop; ++i) { ProbT next_sum = ctc_helper::log_plus()(betas[i], betas[(i+1)]); // Skip two if not on blank and not on repeat. if (labels[i] != blank_label_ && i != (S-2) && labels[i] != labels[i+2]){ next_sum = ctc_helper::log_plus()(next_sum, betas[(i+2)]); } betas[i] = next_sum + log_probs[labels[i] + idx3]; //compute alpha * beta in log space alphas[i + idx1] += betas[i]; //update the gradient associated with this label output[labels[i]] = ctc_helper::log_plus()(alphas[i + idx1], output[labels[i]]); } if (end == S) { betas[(S-1)] = betas[(S-1)] + log_probs[blank_label_ + idx3]; alphas[(S-1) + idx1] += betas[(S-1)]; output[labels[S-1]] = ctc_helper::log_plus()(alphas[S-1 + idx1], output[labels[S-1]]); } //go over the unique labels and compute the final grad // wrt to each one at this time step for (int i = 0; i < alphabet_size_; ++i) { if (output[i] == 0.0 || output[i] == ctc_helper::neg_inf() || log_probs[idx3] == ctc_helper::neg_inf()) { grad[idx3] = std::exp(log_probs[idx3]); } else { grad[idx3] = std::exp(log_probs[idx3]) - std::exp(output[i] - log_probs[idx3] - log_partition); } ++idx3; } } ProbT loglike = ctc_helper::neg_inf(); for(int i = start; i < end; ++i) { loglike = ctc_helper::log_plus()(loglike, betas[i]); } return loglike; } template ctcStatus_t CpuCTC::cost_and_grad(const ProbT* const activations, ProbT *grads, ProbT *costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths) { if (activations == nullptr || grads == nullptr || costs == nullptr || flat_labels == nullptr || label_lengths == nullptr || input_lengths == nullptr ) return CTC_STATUS_INVALID_VALUE; ProbT* log_probs = static_cast(workspace_); int maxT = *std::max_element(input_lengths, input_lengths + minibatch_); size_t bytes_used = sizeof(ProbT) * minibatch_ * alphabet_size_ * maxT; //per minibatch memory size_t per_minibatch_bytes = 0; int maxL = *std::max_element(label_lengths, label_lengths + minibatch_);; int maxS = 2 * maxL + 1; //output per_minibatch_bytes += sizeof(float) * alphabet_size_; //alphas per_minibatch_bytes += sizeof(float) * maxS * maxT; //betas per_minibatch_bytes += sizeof(float) * maxS; //labels w/blanks, e_inc, s_inc per_minibatch_bytes += 3 * sizeof(int) * maxS; log_softmax(activations, log_probs, input_lengths); #pragma omp parallel for for (int mb = 0; mb < minibatch_; ++mb) { const int T = input_lengths[mb]; // Length of utterance (time) const int L = label_lengths[mb]; // Number of labels in transcription bool mb_status; std::tie(costs[mb], mb_status) = cost_and_grad_kernel(grads + mb * alphabet_size_, log_probs + mb * alphabet_size_, flat_labels + std::accumulate(label_lengths, label_lengths + mb, 0), T, L, mb, bytes_used + mb * per_minibatch_bytes); } return CTC_STATUS_SUCCESS; } template ctcStatus_t CpuCTC::score_forward(const ProbT* const activations, ProbT* costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths) { if (activations == nullptr || costs == nullptr || flat_labels == nullptr || label_lengths == nullptr || input_lengths == nullptr ) return CTC_STATUS_INVALID_VALUE; ProbT* log_probs = static_cast(workspace_); int maxT = *std::max_element(input_lengths, input_lengths + minibatch_); size_t bytes_used = sizeof(ProbT) * minibatch_ * alphabet_size_ * maxT; //per minibatch memory size_t per_minibatch_bytes = 0; int maxL = *std::max_element(label_lengths, label_lengths + minibatch_); int maxS = 2 * maxL + 1; //output per_minibatch_bytes += sizeof(float) * alphabet_size_; //alphas per_minibatch_bytes += sizeof(float) * maxS * maxT; //betas per_minibatch_bytes += sizeof(float) * maxS; //labels w/blanks, e_inc, s_inc per_minibatch_bytes += 3 * sizeof(int) * maxS; log_softmax(activations, log_probs, input_lengths); #pragma omp parallel for for (int mb = 0; mb < minibatch_; ++mb) { const int T = input_lengths[mb]; // Length of utterance (time) const int L = label_lengths[mb]; // Number of labels in transcription const int S = 2*L + 1; // Number of labels with blanks CpuCTC_metadata ctcm(L, S, T, mb, alphabet_size_, workspace_, bytes_used + mb * per_minibatch_bytes, blank_label_, flat_labels + std::accumulate(label_lengths, label_lengths + mb, 0)); if (L + ctcm.repeats > T) costs[mb] = ProbT(0); else { costs[mb] = -compute_alphas(log_probs + mb * alphabet_size_, ctcm.repeats, S, T, ctcm.e_inc, ctcm.s_inc, ctcm.labels_w_blanks, ctcm.alphas); } } return CTC_STATUS_SUCCESS; } } // mxnet_warpctc ================================================ FILE: 3rdparty/ctc_include/detail/ctc_helper.h ================================================ /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #pragma once #include #include #include #include "hostdevice.h" typedef enum { CTC_STATUS_SUCCESS = 0, CTC_STATUS_MEMOPS_FAILED = 1, CTC_STATUS_INVALID_VALUE = 2, CTC_STATUS_EXECUTION_FAILED = 3, CTC_STATUS_UNKNOWN_ERROR = 4 } ctcStatus_t; typedef enum { CTC_CPU = 0, CTC_GPU = 1 } ctcComputeLocation; namespace ctc_helper { static const float threshold = 1e-1; template HOSTDEVICE T neg_inf() { return -T(INFINITY); } inline int div_up(int x, int y) { return (x + y - 1) / y; } template struct maximum { HOSTDEVICE Res operator()(const Arg& x, const Arg& y) const { return x < y ? y : x; } }; template struct add { HOSTDEVICE Res operator()(const Arg& x, const Arg& y) const { return x + y; } }; template struct identity { HOSTDEVICE Res operator()(const Arg& x) const {return Res(x);} }; template struct negate { HOSTDEVICE Res operator()(const Arg& x) const {return Res(-x);} }; template struct exponential { HOSTDEVICE Res operator()(const Arg& x) const {return std::exp(x);} }; template struct log_plus { typedef Res result_type; HOSTDEVICE Res operator()(const Arg1& p1, const Arg2& p2) { if (p1 == neg_inf()) return p2; if (p2 == neg_inf()) return p1; Res result = log1p(exp(-fabs(p1 - p2))) + maximum()(p1, p2); return result; } }; } ================================================ FILE: 3rdparty/ctc_include/detail/gpu_ctc.h ================================================ /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #pragma once #include "ctc_helper.h" #include "gpu_ctc_kernels.h" namespace mxnet_warpctc { template class GpuCTC { public: GpuCTC(int alphabet_size, int minibatch, void *workspace, CUstream stream, int blank_label) : out_dim_(alphabet_size), minibatch_(minibatch), gpu_workspace_(workspace), stream_(stream), blank_label_(blank_label) {}; // Noncopyable GpuCTC(const GpuCTC&) = delete; GpuCTC& operator=(const GpuCTC&) = delete; ctcStatus_t cost_and_grad(const ProbT* const activations, ProbT* grads, ProbT* costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths); ctcStatus_t score_forward(const ProbT* const activations, ProbT* costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths); private: template ctcStatus_t launch_alpha_beta_kernels(const ProbT* const log_probs, ProbT *grads, bool compute_alpha, bool compute_beta); ctcStatus_t launch_gpu_kernels(const ProbT* const log_probs, ProbT *grads, size_t config, bool launch_alpha, bool launch_beta); ctcStatus_t setup_gpu_metadata(const int* const flat_labels, const int* const label_lengths, const int* const input_lengths); ctcStatus_t create_metadata_and_choose_config(const int* const label_lengths, const int* const flat_labels, const int* const input_lengths, size_t& best_config); ctcStatus_t compute_log_probs(const ProbT* const activations); ctcStatus_t compute_cost_and_score(const ProbT* const activations, ProbT* grads, ProbT* costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths, bool compute_alpha, bool compute_betas_and_grad); int out_dim_; // Number of characters plus blank int minibatch_; int S_; int T_; int activation_cols_; // Number of columns in activations void *gpu_workspace_; // Buffer for all temporary GPU memory CUstream stream_; int blank_label_; int *utt_length_; // T int *label_sizes_; // L int *repeats_; // repeats_ int *label_offsets_; int *labels_without_blanks_; int *labels_with_blanks_; ProbT *alphas_; ProbT *nll_forward_; ProbT *nll_backward_; ProbT *denoms_; // Temporary storage for denoms for softmax ProbT *log_probs_; // Temporary storage for probabilities (log softmax output) }; template ctcStatus_t GpuCTC::setup_gpu_metadata(const int* const flat_labels, const int* const label_lengths, const int* const input_lengths) { size_t gpu_bytes_used = 0; nll_forward_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += minibatch_ * sizeof(ProbT); nll_backward_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += minibatch_ * sizeof(ProbT); repeats_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += minibatch_ * sizeof(int); label_offsets_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += minibatch_ * sizeof(int); // This is the max of all S and T for all valid examples in the minibatch. // A valid example is one for which L + repeats <= T S_ = 0; T_ = 0; // This is the max of all timesteps, valid or not. Needed to compute offsets int Tmax = 0; // This is the max of all labels, valid or not. Needed to compute offsets int Lmax = 0; int total_label_length = 0; constexpr int cpu_buffer_size = 64; int repeats[cpu_buffer_size]; int label_offsets[cpu_buffer_size]; const int num_passes = ctc_helper::div_up(minibatch_, cpu_buffer_size); cudaError_t cuda_status; for (int pass = 0; pass < num_passes; ++pass) { const int start_idx = pass * cpu_buffer_size; const int end_idx = std::min(minibatch_, (pass+1) * cpu_buffer_size); for (int j = start_idx; j < end_idx; ++j) { const int L = label_lengths[j]; const int local_T = input_lengths[j]; const int *label_ptr = &(flat_labels[total_label_length]); label_offsets[j % cpu_buffer_size] = total_label_length; total_label_length += L; int repeat_counter = 0; for (int i = 1; i < L; ++i) repeat_counter += (label_ptr[i] == label_ptr[i-1]); repeats[j % cpu_buffer_size] = repeat_counter; const bool valid_label = ((L + repeat_counter) <= local_T); // Only update S and T if label is valid S_ = (valid_label) ? std::max(S_, L) : S_; T_ = (valid_label) ? std::max(T_, local_T) : T_; Tmax = std::max(Tmax, local_T); Lmax = std::max(Lmax, L); } cuda_status = cudaMemcpyAsync(&(repeats_[start_idx]), repeats, (end_idx - start_idx) * sizeof(int), cudaMemcpyHostToDevice, stream_); if (cuda_status != cudaSuccess) return CTC_STATUS_MEMOPS_FAILED; cuda_status = cudaMemcpyAsync(&(label_offsets_[start_idx]), label_offsets, (end_idx - start_idx) * sizeof(int), cudaMemcpyHostToDevice, stream_); if (cuda_status != cudaSuccess) return CTC_STATUS_MEMOPS_FAILED; } S_ = 2 * S_ + 1; const int Smax = 2 * Lmax + 1; activation_cols_ = minibatch_ * Tmax; // Allocate memory for T utt_length_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += minibatch_ * sizeof(int); cuda_status = cudaMemcpyAsync(utt_length_, input_lengths, minibatch_ * sizeof(int), cudaMemcpyHostToDevice, stream_); if (cuda_status != cudaSuccess) return CTC_STATUS_MEMOPS_FAILED; label_sizes_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += minibatch_ * sizeof(int); cuda_status = cudaMemcpyAsync(label_sizes_, label_lengths, minibatch_ * sizeof(int), cudaMemcpyHostToDevice, stream_); if (cuda_status != cudaSuccess) return CTC_STATUS_MEMOPS_FAILED; labels_without_blanks_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += Lmax * minibatch_ * sizeof(int); cuda_status = cudaMemcpyAsync(labels_without_blanks_, flat_labels, total_label_length * sizeof(int), cudaMemcpyHostToDevice, stream_); if (cuda_status != cudaSuccess) return CTC_STATUS_MEMOPS_FAILED; labels_with_blanks_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += Smax * minibatch_ * sizeof(int); alphas_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += (S_ * T_) * minibatch_ * sizeof(ProbT); denoms_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += activation_cols_ * sizeof(ProbT); log_probs_ = reinterpret_cast(static_cast(gpu_workspace_) + gpu_bytes_used); gpu_bytes_used += out_dim_ * activation_cols_ * sizeof(ProbT); return CTC_STATUS_SUCCESS; } template template ctcStatus_t GpuCTC::launch_alpha_beta_kernels(const ProbT* const log_probs, ProbT* grads, bool compute_alpha, bool compute_beta ) { // One thread block per utterance const int grid_size = minibatch_; // The data is laid out so that the next timestep is minibatch entries // away const int stride = minibatch_; if (compute_alpha) compute_alpha_kernel<<>> (log_probs, label_sizes_, utt_length_, repeats_, labels_without_blanks_, label_offsets_, labels_with_blanks_, alphas_, nll_forward_, stride, out_dim_, S_, T_, blank_label_); if (compute_beta) { compute_betas_and_grad_kernel<<>> (log_probs, label_sizes_, utt_length_, repeats_, labels_with_blanks_, alphas_, nll_forward_, nll_backward_, grads, stride, out_dim_, S_, T_, blank_label_); cudaStreamSynchronize(stream_); } cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) return CTC_STATUS_EXECUTION_FAILED; return CTC_STATUS_SUCCESS; } template ctcStatus_t GpuCTC::create_metadata_and_choose_config(const int* const flat_labels, const int* const label_lengths, const int* const input_lengths, size_t& best_config) { // Setup the metadata for GPU ctcStatus_t status = setup_gpu_metadata(flat_labels, label_lengths, input_lengths); if (status != CTC_STATUS_SUCCESS) return status; constexpr int num_configs = 12; int config_NT[num_configs] = {32, 64, 128, 64, 128, 32, 64, 128, 64, 128, 128, 128}; int config_VT[num_configs] = { 1, 1, 1, 3, 2, 9, 6, 4, 9, 6, 9, 10}; best_config = 0; for (int i = 0; i < num_configs; ++i) { if ((config_NT[i]* config_VT[i]) >= S_) break; else best_config++; } if (best_config >= num_configs) return CTC_STATUS_UNKNOWN_ERROR; return CTC_STATUS_SUCCESS; } template ctcStatus_t GpuCTC::launch_gpu_kernels(const ProbT* const log_probs, ProbT* grads, size_t config, bool l_a, bool l_b) { switch(config) { case 0: {return launch_alpha_beta_kernels<32, 1>(log_probs, grads, l_a, l_b);} case 1: {return launch_alpha_beta_kernels<64, 1>(log_probs, grads, l_a, l_b);} case 2: {return launch_alpha_beta_kernels<128, 1>(log_probs, grads, l_a, l_b);} case 3: {return launch_alpha_beta_kernels<64, 3>(log_probs, grads, l_a, l_b);} case 4: {return launch_alpha_beta_kernels<128, 2>(log_probs, grads, l_a, l_b);} case 5: {return launch_alpha_beta_kernels<32, 9>(log_probs, grads, l_a, l_b);} case 6: {return launch_alpha_beta_kernels<64, 6>(log_probs, grads, l_a, l_b);} case 7: {return launch_alpha_beta_kernels<128, 4>(log_probs, grads, l_a, l_b);} case 8: {return launch_alpha_beta_kernels<64, 9>(log_probs, grads, l_a, l_b);} case 9: {return launch_alpha_beta_kernels<128, 6>(log_probs, grads, l_a, l_b);} case 10: {return launch_alpha_beta_kernels<128, 9>(log_probs, grads, l_a, l_b);} case 11: {return launch_alpha_beta_kernels<128, 10>(log_probs, grads, l_a, l_b);} } return CTC_STATUS_EXECUTION_FAILED; } template ctcStatus_t GpuCTC::compute_log_probs(const ProbT* const activations) { cudaError_t cuda_status; cuda_status = cudaMemcpyAsync(log_probs_, activations, activation_cols_ * out_dim_ *sizeof(ProbT), cudaMemcpyDeviceToDevice, stream_); if (cuda_status != cudaSuccess) return CTC_STATUS_MEMOPS_FAILED; // create mshadow handles to data using namespace mshadow; using namespace mshadow::expr; Stream mxstream; mxstream.stream_ = stream_; Tensor log_probs_handle(log_probs_, mshadow::Shape2(activation_cols_, out_dim_), &mxstream); Tensor denoms_handle(denoms_, mshadow::Shape1(activation_cols_), &mxstream); denoms_handle = reduce_with_axis(log_probs_handle, 1); // Kernel launch to subtract maximum const int NT = 128; const int VT = 1; const int NV = NT * VT; const int num_elements = out_dim_ * activation_cols_; const int grid_size = ctc_helper::div_up(num_elements, NV); prepare_stable_LSM_kernel <<< grid_size, NT, 0, stream_>>> (ctc_helper::identity(), log_probs_, denoms_, out_dim_, num_elements); // compute denominators for softmax denoms_handle = reduce_with_axis(F(log_probs_handle), 1); // Kernel launch to calculate probabilities compute_log_probs_kernel<<>> (ctc_helper::identity(), log_probs_, denoms_, out_dim_, num_elements); cuda_status = cudaGetLastError(); if (cuda_status != cudaSuccess) return CTC_STATUS_EXECUTION_FAILED; return CTC_STATUS_SUCCESS; } template ctcStatus_t GpuCTC::compute_cost_and_score(const ProbT* const activations, ProbT* grads, ProbT* costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths, bool compute_alpha, bool compute_betas_and_grad) { size_t best_config; ctcStatus_t status = create_metadata_and_choose_config(flat_labels, label_lengths, input_lengths, best_config); if (status != CTC_STATUS_SUCCESS) return status; status = compute_log_probs(activations); if (status != CTC_STATUS_SUCCESS) return status; launch_gpu_kernels(log_probs_, grads, best_config, compute_alpha, compute_betas_and_grad); cudaError_t cuda_status_mem, cuda_status_sync; cuda_status_mem = cudaMemcpyAsync(costs, nll_forward_, sizeof(ProbT) * minibatch_, cudaMemcpyDeviceToHost, stream_); cuda_status_sync = cudaStreamSynchronize(stream_); if (cuda_status_mem != cudaSuccess || cuda_status_sync != cudaSuccess) return CTC_STATUS_MEMOPS_FAILED; return CTC_STATUS_SUCCESS; } template ctcStatus_t GpuCTC::cost_and_grad(const ProbT* const activations, ProbT* grads, ProbT* costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths) { if (activations == nullptr || grads == nullptr || costs == nullptr || flat_labels == nullptr || label_lengths == nullptr || input_lengths == nullptr ) return CTC_STATUS_INVALID_VALUE; return compute_cost_and_score(activations, grads, costs, flat_labels, label_lengths, input_lengths, true, true); } template ctcStatus_t GpuCTC::score_forward(const ProbT* const activations, ProbT* costs, const int* const flat_labels, const int* const label_lengths, const int* const input_lengths) { if (activations == nullptr || costs == nullptr || flat_labels == nullptr || label_lengths == nullptr || input_lengths == nullptr ) return CTC_STATUS_INVALID_VALUE; return compute_cost_and_score(activations, nullptr, costs, flat_labels, label_lengths, input_lengths, true, false); } } // mxnet_warpctc ================================================ FILE: 3rdparty/ctc_include/detail/gpu_ctc_kernels.h ================================================ /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #pragma once #include "../contrib/moderngpu/include/device/ctascan.cuh" #include "../contrib/moderngpu/include/device/ctamerge.cuh" #include "ctc_helper.h" using namespace mgpu; template struct CTASegReduce { enum {NV = NT * VT}; union Storage { typename CTAScan::Storage scanStorage; int indices[NV]; }; //adapted from global kernel KernelReduceByKeyPreprocess __device__ static void preprocessKeys(KeyT *keys, int count, int *numUniqueLabels, int seg_start[VT], int seg_end[VT], int *scanout) { __shared__ Storage shared; const int tid = threadIdx.x; // Compare adjacent keys within each thread and mark discontinuities int endFlags = 0; T key = keys[VT * tid]; #pragma unroll for (int i = 0; i < VT; ++i) { int index = VT * tid + 1 + i; T next = keys[index]; if(index == count || (index < count && key != next)) { endFlags |= 1 << i; } key = next; } __syncthreads(); //Count the number of encountered end flags int scan = CTAScan::Scan(tid, popc(endFlags), shared.scanStorage, numUniqueLabels); __syncthreads(); //output the unique keys //use indices as scratch space int outputPos = scan; #pragma unroll for (int i = 0; i < VT; ++i) { if ( (endFlags >> i) & 1) { shared.indices[outputPos] = keys[VT * tid + i]; scanout[outputPos] = VT * tid + i; outputPos++; } } __syncthreads(); // Create start and end for (int idx = tid, j = 0; idx < (*numUniqueLabels); idx += blockDim.x, ++j) { seg_start[j] = (idx == 0) ? 0 : (scanout[idx-1] + 1); seg_end[j] = scanout[idx]; } __syncthreads(); //copy from the scratch space back into the keys #pragma unroll for (int i = 0; i < VT; ++i) { keys[i * NT + tid] = shared.indices[i * NT + tid]; } __syncthreads(); } }; // Computes forward probabilities. This fills in a T * S matrix. // The computation starts at t=1 (2nd row) and ends at t=T-1 (last row). Each row has // S elements where S = 2L + 1. // // We only need to read in probabilities corresponding to the labels, thus a sparse // set of values are read from the log probs matrix since the character set is much smaller // than the labels. This is much more true for Mandarin than English. template __global__ void compute_alpha_kernel (const ProbT* log_probs, const int *label_sizes, const int *utt_length, const int *repeats_in_labels, const int *labels_without_blanks, const int *label_offsets, int *labels_with_blanks, ProbT *alphas, ProbT* nll_forward, int stride, int out_dim, int S_memoffset, int T_memoffset, int blank_label) { ctc_helper::log_plus log_plus_f; const int tid = threadIdx.x; const int L = label_sizes[blockIdx.x]; const int T = utt_length[blockIdx.x]; const int S = 2*L + 1; const int prob_offset = out_dim * blockIdx.x; const int repeats = repeats_in_labels[blockIdx.x]; const int NV = NT * VT; __shared__ int label[NV]; if ((L + repeats) > T) return; // Generate labels with blanks from labels without blanks { const int label_start_offset = label_offsets[blockIdx.x]; for (int idx = tid; idx < L; idx += blockDim.x) { const int offset = (blockIdx.x * S_memoffset) + 2 * idx; labels_with_blanks[offset] = blank_label; labels_with_blanks[offset+1] = labels_without_blanks[label_start_offset + idx]; } if (tid == 0) { labels_with_blanks[(blockIdx.x * S_memoffset) + 2 * L] = blank_label; } } __syncthreads(); const int *labels = labels_with_blanks; const int* label_global = &labels[blockIdx.x * S_memoffset]; ProbT* alpha = &alphas[blockIdx.x * (S_memoffset * T_memoffset)]; // Set the first row of alpha neg_inf - it is much more efficient to do it // here than outside #pragma unroll for (int idx = tid; idx < min(S, NV); idx += blockDim.x) { alpha[idx] = ctc_helper::neg_inf(); } // Load labels into shared memory #pragma unroll for (int i = tid; i < S; i += NT) { label[i] = label_global[i]; } __syncthreads(); int start = (L + repeats < T) ? 0 : 1; int end = S > 1 ? 2 : 1; // Initialize the first row corresponding to t=0; for(int i = tid; i < (end-start); i += blockDim.x) alpha[i + start] = log_probs[prob_offset + label[i + start]]; __syncthreads(); // Fill in the rest of matrix, one row at a time (outer loop). for(int t = 1; t < T; ++t) { // Start offsets into the current and previous row const int start_cur_row = t * S; const int start_prev_row = (t - 1) * S; // The prob is a 2D column major array, with probabilites for each t strided // by (out_dim * stride), where stride is the minibatch size const int start_prob_col = t * (out_dim * stride); // This is the first column and in this case there is nothing left of it if (tid == 0) { if (start == 0) { alpha[start_cur_row] = alpha[start_prev_row] + log_probs[prob_offset + start_prob_col + blank_label]; } else if (start == 1) { alpha[start_cur_row] = alpha[start_prev_row]; } } __syncthreads(); // Fill in the elements in each row. There is no loop dependence here since our // input is the row above. We sum either two or three adjacent values from the // row above depending on whether we have a blank or repeated characters. Finally // we add the probability corresponding to this label at time t #pragma unroll for (int idx = (tid+1); idx < S; idx += blockDim.x) { ProbT prev_sum = log_plus_f(alpha[idx + start_prev_row], alpha[(idx-1) + start_prev_row]); // Skip two if not on blank and not on repeat. if ((label[idx] != blank_label) && (idx != 1) && (label[idx] != label[idx-2])) prev_sum = log_plus_f(prev_sum, alpha[(idx-2) + start_prev_row]); alpha[idx + start_cur_row] = prev_sum + log_probs[prob_offset + start_prob_col + label[idx]]; } __syncthreads(); } if (tid == 0) { // Add and return the rightmost two/one element(s) in the last row. ProbT loglike = ctc_helper::neg_inf(); // This is the total increment for s_inc and e_inc through the loop const int val = 2 * (L-1) + 1 - (((L + repeats) == T) ? 1 : 0); start = (val * (L!=0) + start); end = (val * (L!=0) + end); for(int i = start; i < end; ++i) loglike = log_plus_f(loglike, alpha[i + (T - 1) * S]); nll_forward[blockIdx.x] = -loglike; } } // Computes backward probabilities. This also fills in a T * S matrix // // See comments above compute_alphas for more context. template __global__ void compute_betas_and_grad_kernel (const ProbT* log_probs, const int *label_sizes, const int *utt_length, const int *repeats_in_labels, const int *labels_with_blanks, ProbT *alphas, const ProbT* nll_forward, ProbT *nll_backward, ProbT *grads, int stride, int out_dim, int S_memoffset, int T_memoffset, int blank_label) { ctc_helper::log_plus log_plus_f; typedef CTASegReduce> SegReduce; const int tid = threadIdx.x; const int L = label_sizes[blockIdx.x]; const int T = utt_length[blockIdx.x]; const int S = 2*L + 1; const int prob_offset = out_dim * blockIdx.x; const int repeats = repeats_in_labels[blockIdx.x]; const ProbT log_partition = -nll_forward[blockIdx.x]; const int* labels = labels_with_blanks; const int* label_global = &labels[blockIdx.x * S_memoffset]; ProbT* alpha = &alphas[blockIdx.x * (S_memoffset * T_memoffset)]; const int NV = NT * VT; union TempStorage { ProbT beta[NV]; int result[NV]; }; __shared__ TempStorage temp_buffer; __shared__ int label[NV]; // Temporaries needed for segmented reduce // TODO: see if we can combine the shared memory requirements __shared__ int keys_shared[NV]; __shared__ int gather_indices[NV]; __shared__ ProbT output[NV]; ProbT beta_val[VT]; if ((L + repeats) > T) return; int start = S > 1 ? (S - 2) : 0; int end = (L + repeats < T) ? S : S-1; // Setup shared memory buffers #pragma unroll for (int idx = tid; idx < NV; idx += NT) { label[idx] = (idx < S) ? label_global[idx] : INT_MAX; } __syncthreads(); // int flags; int uniquelabels; int seg_start[VT]; int seg_end[VT]; // Sort labels and record indices from which to gather from { int key[VT]; int gather_val[VT]; #pragma unroll for (int i = 0; i < VT; ++i) { const int idx = tid * VT + i; gather_val[i] = idx; key[i] = label[idx]; } __syncthreads(); CTAMergesort> (key, gather_val, keys_shared, gather_indices, S, tid, mgpu::less()); __syncthreads(); for (int i = 0; i < VT; ++i) { const int idx = tid * VT + i; gather_indices[idx] = gather_val[i]; } __syncthreads(); SegReduce::preprocessKeys(keys_shared, S, &uniquelabels, seg_start, seg_end, temp_buffer.result); __syncthreads(); } // TODO: probably not necessary __syncthreads(); // Load labels back #pragma unroll for (int idx = tid; idx < NV; idx += NT) { temp_buffer.beta[idx] = ctc_helper::neg_inf(); } __syncthreads(); // Initialize the two rightmost values in the last row (assuming L non-zero) for(int i = tid; i < (end-start); i += blockDim.x) temp_buffer.beta[i + start] = log_probs[prob_offset + (T - 1) * (out_dim * stride) + label[i + start]]; __syncthreads(); // Load output data in registers through the transpose trick - should really be a function #pragma unroll for (int idx = tid; idx < S; idx += NT) { output[idx] = alpha[idx + (T - 1) * S] + temp_buffer.beta[idx]; } __syncthreads(); // Start at the second to last row and backward in time for(int t = T - 1; t >= 0; --t) { // Start offsets into the current and next row const int start_cur_row = t * S; // Starting offset of column that we read from the log probs array const int start_prob_col = t * (out_dim * stride); if (t < T-1) { // Filling up one row at at time but going back in time from the last row // to the first. As in the forward pass, there is no loop dependence and we // do a variable length filter of maximum filter size of 3 #pragma unroll for(int idx = tid, i = 0; idx < (S-1); idx += NT, i++) { ProbT next_sum = log_plus_f(temp_buffer.beta[idx], temp_buffer.beta[idx+1]); // Skip two if not on blank and not on repeat. if ((label[idx] != blank_label) && (idx != (S-2)) && (label[idx] != label[idx+2])) next_sum = log_plus_f(next_sum, temp_buffer.beta[idx+2]); beta_val[i] = next_sum + log_probs[prob_offset + start_prob_col + label[idx]]; } __syncthreads(); // Initialize values for the rightmost column since there is nothing to the right // Update input buffer for next iteration if ((tid == 0) && (end == S)) temp_buffer.beta[(S-1)] = temp_buffer.beta[(S-1)] + log_probs[prob_offset + start_prob_col + blank_label]; #pragma unroll for(int idx = tid, i = 0; idx < (S-1); idx += NT, i++) { temp_buffer.beta[idx] = beta_val[i]; } __syncthreads(); // Beta Computation done - add to alpha and update the gradient. Reload // the gradient back for segmented reduce later on #pragma unroll for(int idx = tid; idx < S; idx += NT) { output[idx] = alpha[idx + start_cur_row] + temp_buffer.beta[idx]; } __syncthreads(); } __syncthreads(); // Compute segmented reduction of output by using label as key { // Somewhat faster key value reduce ProbT accum[VT]; for (int idx = tid, j = 0; idx < uniquelabels; idx += blockDim.x, ++j) { accum[j] = ctc_helper::neg_inf(); for (int i = seg_start[j]; i <= seg_end[j]; ++i) { accum[j] = log_plus_f(accum[j], output[gather_indices[i]]); } } __syncthreads(); // Write accumulated value into output since that is not used for (int idx = tid, j = 0; idx < uniquelabels; idx += blockDim.x, ++j) { output[idx] = accum[j]; } __syncthreads(); for (int idx = tid; idx < out_dim; idx += blockDim.x) { const int grads_offset = prob_offset + start_prob_col + idx; grads[grads_offset] = exp(log_probs[grads_offset]); } __syncthreads(); for (int idx = tid; idx < uniquelabels; idx += blockDim.x) { const int grads_offset = prob_offset + start_prob_col + keys_shared[idx]; ProbT grad = output[idx]; if ((grad == 0.0) || (log_probs[grads_offset] == ctc_helper::neg_inf()) || (grad == ctc_helper::neg_inf())) { } else { grads[grads_offset] = exp(log_probs[grads_offset]) - exp(grad - log_probs[grads_offset] - log_partition); } } __syncthreads(); } // Output backward log likelihood if ((t == 0) && (tid == 0)) { ProbT loglike = ctc_helper::neg_inf(); const int val = 2 * (L-1) + 1 - (((L + repeats) == T) ? 1 : 0); start = (-val * (L != 0) + start); end = (-val * (L != 0) + end); // Sum and return the leftmost one/two value(s) in first row for(int i = start; i < end; ++i) loglike = log_plus_f(loglike, temp_buffer.beta[i]); nll_backward[blockIdx.x] = -loglike; } // For some reason this is important __syncthreads(); } } template __global__ void compute_log_probs_kernel(Op f, ProbT* log_probs, const ProbT* const denom, int alphabet_size, int count) { int idx = blockDim.x * blockIdx.x + threadIdx.x; int stride = blockDim.x * gridDim.x; #pragma unroll for(int i = 0; i < VT; i++) { if (idx < count) { const int column_idx = idx / alphabet_size; log_probs[idx] = log_probs[idx] - log(denom[column_idx]); } idx += stride; } } template __global__ void prepare_stable_LSM_kernel(Op f, ProbT* log_probs, const ProbT* const col_max, int alphabet_size, int count) { int idx = blockDim.x * blockIdx.x + threadIdx.x; int stride = blockDim.x * gridDim.x; #pragma unroll for(int i = 0; i < VT; i++) { if (idx < count) { const int column_idx = idx / alphabet_size; log_probs[idx] = f(log_probs[idx] - col_max[column_idx]); } idx += stride; } } ================================================ FILE: 3rdparty/ctc_include/detail/hostdevice.h ================================================ /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #pragma once #ifdef __CUDACC__ #define HOSTDEVICE __host__ __device__ #else #define HOSTDEVICE #endif ================================================ FILE: 3rdparty/miniz/miniz.c ================================================ /************************************************************************** * * Copyright 2013-2014 RAD Game Tools and Valve Software * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * **************************************************************************/ #include "miniz.h" typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; #ifdef __cplusplus extern "C" { #endif /* ------------------- zlib-style API's */ mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) { mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; if (!ptr) return MZ_ADLER32_INIT; while (buf_len) { for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; } for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; } return (s2 << 16) + s1; } /* Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ */ #if 0 mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) { static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; mz_uint32 crcu32 = (mz_uint32)crc; if (!ptr) return MZ_CRC32_INIT; crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } return ~crcu32; } #else /* Faster, but larger CPU cache footprint. */ mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) { static const mz_uint32 s_crc_table[256] = { 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; mz_uint32 crc32 = (mz_uint32)crc ^ 0xFFFFFFFF; const mz_uint8 *pByte_buf = (const mz_uint8 *)ptr; while (buf_len >= 4) { crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF]; crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[1]) & 0xFF]; crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[2]) & 0xFF]; crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[3]) & 0xFF]; pByte_buf += 4; buf_len -= 4; } while (buf_len) { crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF]; ++pByte_buf; --buf_len; } return ~crc32; } #endif void mz_free(void *p) { MZ_FREE(p); } void *miniz_def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } void miniz_def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); } void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } const char *mz_version(void) { return MZ_VERSION; } #ifndef MINIZ_NO_ZLIB_APIS int mz_deflateInit(mz_streamp pStream, int level) { return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); } int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) { tdefl_compressor *pComp; mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); if (!pStream) return MZ_STREAM_ERROR; if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; pStream->data_type = 0; pStream->adler = MZ_ADLER32_INIT; pStream->msg = NULL; pStream->reserved = 0; pStream->total_in = 0; pStream->total_out = 0; if (!pStream->zalloc) pStream->zalloc = miniz_def_alloc_func; if (!pStream->zfree) pStream->zfree = miniz_def_free_func; pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); if (!pComp) return MZ_MEM_ERROR; pStream->state = (struct mz_internal_state *)pComp; if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) { mz_deflateEnd(pStream); return MZ_PARAM_ERROR; } return MZ_OK; } int mz_deflateReset(mz_streamp pStream) { if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; pStream->total_in = pStream->total_out = 0; tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, ((tdefl_compressor *)pStream->state)->m_flags); return MZ_OK; } int mz_deflate(mz_streamp pStream, int flush) { size_t in_bytes, out_bytes; mz_ulong orig_total_in, orig_total_out; int mz_status = MZ_OK; if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; if (!pStream->avail_out) return MZ_BUF_ERROR; if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; if (((tdefl_compressor *)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; for (;;) { tdefl_status defl_status; in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; defl_status = tdefl_compress((tdefl_compressor *)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state); pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; if (defl_status < 0) { mz_status = MZ_STREAM_ERROR; break; } else if (defl_status == TDEFL_STATUS_DONE) { mz_status = MZ_STREAM_END; break; } else if (!pStream->avail_out) break; else if ((!pStream->avail_in) && (flush != MZ_FINISH)) { if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) break; return MZ_BUF_ERROR; /* Can't make forward progress without some input. */ } } return mz_status; } int mz_deflateEnd(mz_streamp pStream) { if (!pStream) return MZ_STREAM_ERROR; if (pStream->state) { pStream->zfree(pStream->opaque, pStream->state); pStream->state = NULL; } return MZ_OK; } mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) { (void)pStream; /* This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) */ return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); } int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) { int status; mz_stream stream; memset(&stream, 0, sizeof(stream)); /* In case mz_ulong is 64-bits (argh I hate longs). */ if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; stream.next_in = pSource; stream.avail_in = (mz_uint32)source_len; stream.next_out = pDest; stream.avail_out = (mz_uint32)*pDest_len; status = mz_deflateInit(&stream, level); if (status != MZ_OK) return status; status = mz_deflate(&stream, MZ_FINISH); if (status != MZ_STREAM_END) { mz_deflateEnd(&stream); return (status == MZ_OK) ? MZ_BUF_ERROR : status; } *pDest_len = stream.total_out; return mz_deflateEnd(&stream); } int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) { return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); } mz_ulong mz_compressBound(mz_ulong source_len) { return mz_deflateBound(NULL, source_len); } typedef struct { tinfl_decompressor m_decomp; mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; tinfl_status m_last_status; } inflate_state; int mz_inflateInit2(mz_streamp pStream, int window_bits) { inflate_state *pDecomp; if (!pStream) return MZ_STREAM_ERROR; if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; pStream->data_type = 0; pStream->adler = 0; pStream->msg = NULL; pStream->total_in = 0; pStream->total_out = 0; pStream->reserved = 0; if (!pStream->zalloc) pStream->zalloc = miniz_def_alloc_func; if (!pStream->zfree) pStream->zfree = miniz_def_free_func; pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); if (!pDecomp) return MZ_MEM_ERROR; pStream->state = (struct mz_internal_state *)pDecomp; tinfl_init(&pDecomp->m_decomp); pDecomp->m_dict_ofs = 0; pDecomp->m_dict_avail = 0; pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; pDecomp->m_first_call = 1; pDecomp->m_has_flushed = 0; pDecomp->m_window_bits = window_bits; return MZ_OK; } int mz_inflateInit(mz_streamp pStream) { return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); } int mz_inflateReset(mz_streamp pStream) { inflate_state *pDecomp; if (!pStream) return MZ_STREAM_ERROR; pStream->data_type = 0; pStream->adler = 0; pStream->msg = NULL; pStream->total_in = 0; pStream->total_out = 0; pStream->reserved = 0; pDecomp = (inflate_state *)pStream->state; tinfl_init(&pDecomp->m_decomp); pDecomp->m_dict_ofs = 0; pDecomp->m_dict_avail = 0; pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; pDecomp->m_first_call = 1; pDecomp->m_has_flushed = 0; /* pDecomp->m_window_bits = window_bits */; return MZ_OK; } int mz_inflate(mz_streamp pStream, int flush) { inflate_state *pState; mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; size_t in_bytes, out_bytes, orig_avail_in; tinfl_status status; if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; pState = (inflate_state *)pStream->state; if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; orig_avail_in = pStream->avail_in; first_call = pState->m_first_call; pState->m_first_call = 0; if (pState->m_last_status < 0) return MZ_DATA_ERROR; if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; pState->m_has_flushed |= (flush == MZ_FINISH); if ((flush == MZ_FINISH) && (first_call)) { /* MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. */ decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); pState->m_last_status = status; pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; if (status < 0) return MZ_DATA_ERROR; else if (status != TINFL_STATUS_DONE) { pState->m_last_status = TINFL_STATUS_FAILED; return MZ_BUF_ERROR; } return MZ_STREAM_END; } /* flush != MZ_FINISH then we must assume there's more input. */ if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; if (pState->m_dict_avail) { n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; } for (;;) { in_bytes = pStream->avail_in; out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); pState->m_last_status = status; pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); pState->m_dict_avail = (mz_uint)out_bytes; n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); if (status < 0) return MZ_DATA_ERROR; /* Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). */ else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) return MZ_BUF_ERROR; /* Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. */ else if (flush == MZ_FINISH) { /* The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. */ if (status == TINFL_STATUS_DONE) return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; /* status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. */ else if (!pStream->avail_out) return MZ_BUF_ERROR; } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) break; } return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; } int mz_inflateEnd(mz_streamp pStream) { if (!pStream) return MZ_STREAM_ERROR; if (pStream->state) { pStream->zfree(pStream->opaque, pStream->state); pStream->state = NULL; } return MZ_OK; } int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) { mz_stream stream; int status; memset(&stream, 0, sizeof(stream)); /* In case mz_ulong is 64-bits (argh I hate longs). */ if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; stream.next_in = pSource; stream.avail_in = (mz_uint32)source_len; stream.next_out = pDest; stream.avail_out = (mz_uint32)*pDest_len; status = mz_inflateInit(&stream); if (status != MZ_OK) return status; status = mz_inflate(&stream, MZ_FINISH); if (status != MZ_STREAM_END) { mz_inflateEnd(&stream); return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; } *pDest_len = stream.total_out; return mz_inflateEnd(&stream); } const char *mz_error(int err) { static struct { int m_err; const char *m_pDesc; } s_error_descs[] = { { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } }; mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; return NULL; } #endif /*MINIZ_NO_ZLIB_APIS */ #ifdef __cplusplus } #endif /* This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to */ /************************************************************************** * * Copyright 2013-2014 RAD Game Tools and Valve Software * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * **************************************************************************/ #ifdef __cplusplus extern "C" { #endif /* ------------------- Low-level Compression (independent from all decompression API's) */ /* Purposely making these tables static for faster init and thread safety. */ static const mz_uint16 s_tdefl_len_sym[256] = { 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285 }; static const mz_uint8 s_tdefl_len_extra[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 }; static const mz_uint8 s_tdefl_small_dist_sym[512] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 }; static const mz_uint8 s_tdefl_small_dist_extra[512] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }; static const mz_uint8 s_tdefl_large_dist_sym[128] = { 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 }; static const mz_uint8 s_tdefl_large_dist_extra[128] = { 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 }; /* Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. */ typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq *pSyms0, tdefl_sym_freq *pSyms1) { mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist); for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) { const mz_uint32 *pHist = &hist[pass << 8]; mz_uint offsets[256], cur_ofs = 0; for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; { tdefl_sym_freq *t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } } return pCur_syms; } /* tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. */ static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) { int root, leaf, next, avbl, used, dpth; if (n == 0) return; else if (n == 1) { A[0].m_key = 1; return; } A[0].m_key += A[1].m_key; root = 0; leaf = 2; for (next = 1; next < n - 1; next++) { if (leaf >= n || A[root].m_key < A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = (mz_uint16)next; } else A[next].m_key = A[leaf++].m_key; if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); A[root++].m_key = (mz_uint16)next; } else A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); } A[n - 2].m_key = 0; for (next = n - 3; next >= 0; next--) A[next].m_key = A[A[next].m_key].m_key + 1; avbl = 1; used = dpth = 0; root = n - 2; next = n - 1; while (avbl > 0) { while (root >= 0 && (int)A[root].m_key == dpth) { used++; root--; } while (avbl > used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } avbl = 2 * used; dpth++; used = 0; } } /* Limits canonical Huffman code table's max code size. */ enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) { int i; mz_uint32 total = 0; if (code_list_len <= 1) return; for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); while (total != (1UL << max_code_size)) { pNum_codes[max_code_size]--; for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } total--; } } static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) { int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes); if (static_table) { for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; } else { tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; int num_used_syms = 0; const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); for (i = 1, j = num_used_syms; i <= code_size_limit; i++) for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); } next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); for (i = 0; i < table_len; i++) { mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; } } #define TDEFL_PUT_BITS(b, l) \ do \ { \ mz_uint bits = b; \ mz_uint len = l; \ MZ_ASSERT(bits <= ((1U << len) - 1U)); \ d->m_bit_buffer |= (bits << d->m_bits_in); \ d->m_bits_in += len; \ while (d->m_bits_in >= 8) \ { \ if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ d->m_bit_buffer >>= 8; \ d->m_bits_in -= 8; \ } \ } \ MZ_MACRO_END #define TDEFL_RLE_PREV_CODE_SIZE() \ { \ if (rle_repeat_count) \ { \ if (rle_repeat_count < 3) \ { \ d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ while (rle_repeat_count--) \ packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ } \ else \ { \ d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ packed_code_sizes[num_packed_code_sizes++] = 16; \ packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ } \ rle_repeat_count = 0; \ } \ } #define TDEFL_RLE_ZERO_CODE_SIZE() \ { \ if (rle_z_count) \ { \ if (rle_z_count < 3) \ { \ d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ while (rle_z_count--) \ packed_code_sizes[num_packed_code_sizes++] = 0; \ } \ else if (rle_z_count <= 10) \ { \ d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ packed_code_sizes[num_packed_code_sizes++] = 17; \ packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ } \ else \ { \ d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ packed_code_sizes[num_packed_code_sizes++] = 18; \ packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ } \ rle_z_count = 0; \ } \ } static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; static void tdefl_start_dynamic_block(tdefl_compressor *d) { int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; d->m_huff_count[0][256] = 1; tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); for (i = 0; i < total_code_sizes_to_pack; i++) { mz_uint8 code_size = code_sizes_to_pack[i]; if (!code_size) { TDEFL_RLE_PREV_CODE_SIZE(); if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } } else { TDEFL_RLE_ZERO_CODE_SIZE(); if (code_size != prev_code_size) { TDEFL_RLE_PREV_CODE_SIZE(); d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; } else if (++rle_repeat_count == 6) { TDEFL_RLE_PREV_CODE_SIZE(); } } prev_code_size = code_size; } if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); TDEFL_PUT_BITS(2, 2); TDEFL_PUT_BITS(num_lit_codes - 257, 5); TDEFL_PUT_BITS(num_dist_codes - 1, 5); for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes;) { mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); } } static void tdefl_start_static_block(tdefl_compressor *d) { mz_uint i; mz_uint8 *p = &d->m_huff_code_sizes[0][0]; for (i = 0; i <= 143; ++i) *p++ = 8; for (; i <= 255; ++i) *p++ = 9; for (; i <= 279; ++i) *p++ = 7; for (; i <= 287; ++i) *p++ = 8; memset(d->m_huff_code_sizes[1], 5, 32); tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); TDEFL_PUT_BITS(1, 2); } static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { mz_uint flags; mz_uint8 *pLZ_codes; mz_uint8 *pOutput_buf = d->m_pOutput_buf; mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; mz_uint64 bit_buffer = d->m_bit_buffer; mz_uint bits_in = d->m_bits_in; #define TDEFL_PUT_BITS_FAST(b, l) \ { \ bit_buffer |= (((mz_uint64)(b)) << bits_in); \ bits_in += (l); \ } flags = 1; for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) { if (flags == 1) flags = *pLZ_codes++ | 0x100; if (flags & 1) { mz_uint s0, s1, n0, n1, sym, num_extra_bits; mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3; MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); /* This sequence coaxes MSVC into using cmov's vs. jmp's. */ s0 = s_tdefl_small_dist_sym[match_dist & 511]; n0 = s_tdefl_small_dist_extra[match_dist & 511]; s1 = s_tdefl_large_dist_sym[match_dist >> 8]; n1 = s_tdefl_large_dist_extra[match_dist >> 8]; sym = (match_dist < 512) ? s0 : s1; num_extra_bits = (match_dist < 512) ? n0 : n1; MZ_ASSERT(d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); } else { mz_uint lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { flags >>= 1; lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { flags >>= 1; lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); } } } if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE; *(mz_uint64 *)pOutput_buf = bit_buffer; pOutput_buf += (bits_in >> 3); bit_buffer >>= (bits_in & ~7); bits_in &= 7; } #undef TDEFL_PUT_BITS_FAST d->m_pOutput_buf = pOutput_buf; d->m_bits_in = 0; d->m_bit_buffer = 0; while (bits_in) { mz_uint32 n = MZ_MIN(bits_in, 16); TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); bit_buffer >>= n; bits_in -= n; } TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); return (d->m_pOutput_buf < d->m_pOutput_buf_end); } #else static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { mz_uint flags; mz_uint8 *pLZ_codes; flags = 1; for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) { if (flags == 1) flags = *pLZ_codes++ | 0x100; if (flags & 1) { mz_uint sym, num_extra_bits; mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); if (match_dist < 512) { sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; } else { sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; } MZ_ASSERT(d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); } else { mz_uint lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); } } TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); return (d->m_pOutput_buf < d->m_pOutput_buf_end); } #endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS */ static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) { if (static_block) tdefl_start_static_block(d); else tdefl_start_dynamic_block(d); return tdefl_compress_lz_codes(d); } static int tdefl_flush_block(tdefl_compressor *d, int flush) { mz_uint saved_bit_buf, saved_bits_in; mz_uint8 *pSaved_output_buf; mz_bool comp_block_succeeded = MZ_FALSE; int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; d->m_pOutput_buf = pOutput_buf_start; d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; MZ_ASSERT(!d->m_output_flush_remaining); d->m_output_flush_ofs = 0; d->m_output_flush_remaining = 0; *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) { TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8); } TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; if (!use_raw_block) comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); /* If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. */ if (((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) { mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; TDEFL_PUT_BITS(0, 2); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) { TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); } for (i = 0; i < d->m_total_lz_bytes; ++i) { TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); } } /* Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. */ else if (!comp_block_succeeded) { d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; tdefl_compress_block(d, MZ_TRUE); } if (flush) { if (flush == TDEFL_FINISH) { if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } } else { mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } } } MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) { if (d->m_pPut_buf_func) { *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); } else if (pOutput_buf_start == d->m_output_buf) { int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); d->m_out_buf_ofs += bytes_to_copy; if ((n -= bytes_to_copy) != 0) { d->m_output_flush_ofs = bytes_to_copy; d->m_output_flush_remaining = n; } } else { d->m_out_buf_ofs += n; } } return d->m_output_flush_remaining; } #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES #ifdef MINIZ_UNALIGNED_USE_MEMCPY static mz_uint16 TDEFL_READ_UNALIGNED_WORD(const mz_uint8* p) { mz_uint16 ret; memcpy(&ret, p, sizeof(mz_uint16)); return ret; } static mz_uint16 TDEFL_READ_UNALIGNED_WORD2(const mz_uint16* p) { mz_uint16 ret; memcpy(&ret, p, sizeof(mz_uint16)); return ret; } #else #define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) #define TDEFL_READ_UNALIGNED_WORD2(p) *(const mz_uint16 *)(p) #endif static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD2(s); MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; for (;;) { for (;;) { if (--num_probes_left == 0) return; #define TDEFL_PROBE \ next_probe_pos = d->m_next[probe_pos]; \ if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ return; \ probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ break; TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; } if (!dist) break; q = (const mz_uint16 *)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD2(q) != s01) continue; p = s; probe_len = 32; do { } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); if (!probe_len) { *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN); break; } else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q)) > match_len) { *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); } } } #else static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; const mz_uint8 *s = d->m_dict + pos, *p, *q; mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; for (;;) { for (;;) { if (--num_probes_left == 0) return; #define TDEFL_PROBE \ next_probe_pos = d->m_next[probe_pos]; \ if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ return; \ probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) \ break; TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; } if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; if (probe_len > match_len) { *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; } } } #endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES */ #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN #ifdef MINIZ_UNALIGNED_USE_MEMCPY static mz_uint32 TDEFL_READ_UNALIGNED_WORD32(const mz_uint8* p) { mz_uint32 ret; memcpy(&ret, p, sizeof(mz_uint32)); return ret; } #else #define TDEFL_READ_UNALIGNED_WORD32(p) *(const mz_uint32 *)(p) #endif static mz_bool tdefl_compress_fast(tdefl_compressor *d) { /* Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. */ mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) { const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); d->m_src_buf_left -= num_bytes_to_process; lookahead_size += num_bytes_to_process; while (num_bytes_to_process) { mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); memcpy(d->m_dict + dst_pos, d->m_pSrc, n); if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); d->m_pSrc += n; dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; num_bytes_to_process -= n; } dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; while (lookahead_size >= 4) { mz_uint cur_match_dist, cur_match_len = 1; mz_uint8 *pCur_dict = d->m_dict + cur_pos; mz_uint first_trigram = TDEFL_READ_UNALIGNED_WORD32(pCur_dict) & 0xFFFFFF; mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; mz_uint probe_pos = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)lookahead_pos; if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((TDEFL_READ_UNALIGNED_WORD32(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) { const mz_uint16 *p = (const mz_uint16 *)pCur_dict; const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); mz_uint32 probe_len = 32; do { } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); if (!probe_len) cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U))) { cur_match_len = 1; *pLZ_code_buf++ = (mz_uint8)first_trigram; *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); d->m_huff_count[0][(mz_uint8)first_trigram]++; } else { mz_uint32 s0, s1; cur_match_len = MZ_MIN(cur_match_len, lookahead_size); MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); cur_match_dist--; pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); #ifdef MINIZ_UNALIGNED_USE_MEMCPY memcpy(&pLZ_code_buf[1], &cur_match_dist, sizeof(cur_match_dist)); #else *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; #endif pLZ_code_buf += 3; *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; } } else { *pLZ_code_buf++ = (mz_uint8)first_trigram; *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); d->m_huff_count[0][(mz_uint8)first_trigram]++; } if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } total_lz_bytes += cur_match_len; lookahead_pos += cur_match_len; dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE); cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; MZ_ASSERT(lookahead_size >= cur_match_len); lookahead_size -= cur_match_len; if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { int n; d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; if ((n = tdefl_flush_block(d, 0)) != 0) return (n < 0) ? MZ_FALSE : MZ_TRUE; total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; } } while (lookahead_size) { mz_uint8 lit = d->m_dict[cur_pos]; total_lz_bytes++; *pLZ_code_buf++ = lit; *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } d->m_huff_count[0][lit]++; lookahead_pos++; dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE); cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; lookahead_size--; if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { int n; d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; if ((n = tdefl_flush_block(d, 0)) != 0) return (n < 0) ? MZ_FALSE : MZ_TRUE; total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; } } } d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; return MZ_TRUE; } #endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */ static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) { d->m_total_lz_bytes++; *d->m_pLZ_code_buf++ = lit; *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } d->m_huff_count[0][lit]++; } static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) { mz_uint32 s0, s1; MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); d->m_total_lz_bytes += match_len; d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); match_dist -= 1; d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; } static mz_bool tdefl_compress_normal(tdefl_compressor *d) { const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; tdefl_flush flush = d->m_flush; while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) { mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; /* Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. */ if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) { mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; src_buf_left -= num_bytes_to_process; d->m_lookahead_size += num_bytes_to_process; while (pSrc != pSrc_end) { mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; } } else { while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) { mz_uint8 c = *pSrc++; mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; src_buf_left--; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) { mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); } } } d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break; /* Simple lazy/greedy parsing state machine. */ len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) { if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) { mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; } } else { tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); } if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) { cur_match_dist = cur_match_len = 0; } if (d->m_saved_match_len) { if (cur_match_len > d->m_saved_match_len) { tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); if (cur_match_len >= 128) { tdefl_record_match(d, cur_match_len, cur_match_dist); d->m_saved_match_len = 0; len_to_move = cur_match_len; } else { d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; } } else { tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; } } else if (!cur_match_dist) tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) { tdefl_record_match(d, cur_match_len, cur_match_dist); len_to_move = cur_match_len; } else { d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; } /* Move the lookahead forward by len_to_move bytes. */ d->m_lookahead_pos += len_to_move; MZ_ASSERT(d->m_lookahead_size >= len_to_move); d->m_lookahead_size -= len_to_move; d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); /* Check if it's time to flush the current LZ codes to the internal output buffer. */ if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || ((d->m_total_lz_bytes > 31 * 1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) { int n; d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; if ((n = tdefl_flush_block(d, 0)) != 0) return (n < 0) ? MZ_FALSE : MZ_TRUE; } } d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; return MZ_TRUE; } static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) { if (d->m_pIn_buf_size) { *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; } if (d->m_pOut_buf_size) { size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); d->m_output_flush_ofs += (mz_uint)n; d->m_output_flush_remaining -= (mz_uint)n; d->m_out_buf_ofs += n; *d->m_pOut_buf_size = d->m_out_buf_ofs; } return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; } tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) { if (!d) { if (pIn_buf_size) *pIn_buf_size = 0; if (pOut_buf_size) *pOut_buf_size = 0; return TDEFL_STATUS_BAD_PARAM; } d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; d->m_out_buf_ofs = 0; d->m_flush = flush; if (((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf)) { if (pIn_buf_size) *pIn_buf_size = 0; if (pOut_buf_size) *pOut_buf_size = 0; return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); } d->m_wants_to_finish |= (flush == TDEFL_FINISH); if ((d->m_output_flush_remaining) || (d->m_finished)) return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) { if (!tdefl_compress_fast(d)) return d->m_prev_return_status; } else #endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */ { if (!tdefl_compress_normal(d)) return d->m_prev_return_status; } if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) { if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status; d->m_finished = (flush == TDEFL_FINISH); if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; } } return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); } tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) { MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); } tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) { d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_dict); memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); return TDEFL_STATUS_OKAY; } tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) { return d->m_prev_return_status; } mz_uint32 tdefl_get_adler32(tdefl_compressor *d) { return d->m_adler32; } mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) { tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); MZ_FREE(pComp); return succeeded; } typedef struct { size_t m_size, m_capacity; mz_uint8 *m_pBuf; mz_bool m_expandable; } tdefl_output_buffer; static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) { tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; size_t new_size = p->m_size + len; if (new_size > p->m_capacity) { size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE; do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; } memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; return MZ_TRUE; } void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) { tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; out_buf.m_expandable = MZ_TRUE; if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; *pOut_len = out_buf.m_size; return out_buf.m_pBuf; } size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) { tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); if (!pOut_buf) return 0; out_buf.m_pBuf = (mz_uint8 *)pOut_buf; out_buf.m_capacity = out_buf_len; if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; return out_buf.m_size; } static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; /* level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). */ mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) { mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; return comp_flags; } #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4204) /* nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) */ #endif /* Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. */ void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) { /* Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. */ static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; if (!pComp) return NULL; MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } /* write dummy header */ for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); /* compress image data */ tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); } if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } /* write real header */ *pLen_out = out_buf.m_size - 41; { static const mz_uint8 chans[] = { 0x00, 0x00, 0x04, 0x02, 0x06 }; mz_uint8 pnghdr[41] = { 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x44, 0x41, 0x54 }; pnghdr[18] = (mz_uint8)(w >> 8); pnghdr[19] = (mz_uint8)w; pnghdr[22] = (mz_uint8)(h >> 8); pnghdr[23] = (mz_uint8)h; pnghdr[25] = chans[num_chans]; pnghdr[33] = (mz_uint8)(*pLen_out >> 24); pnghdr[34] = (mz_uint8)(*pLen_out >> 16); pnghdr[35] = (mz_uint8)(*pLen_out >> 8); pnghdr[36] = (mz_uint8)*pLen_out; c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); for (i = 0; i < 4; ++i, c <<= 8) ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); memcpy(out_buf.m_pBuf, pnghdr, 41); } /* write footer (IDAT CRC-32, followed by IEND chunk) */ if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, *pLen_out + 4); for (i = 0; i < 4; ++i, c <<= 8) (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); /* compute final size of file, grab compressed data buffer and return */ *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; } void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) { /* Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) */ return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); } #ifndef MINIZ_NO_MALLOC /* Allocate the tdefl_compressor and tinfl_decompressor structures in C so that */ /* non-C language bindings to tdefL_ and tinfl_ API don't need to worry about */ /* structure size and allocation mechanism. */ tdefl_compressor *tdefl_compressor_alloc() { return (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); } void tdefl_compressor_free(tdefl_compressor *pComp) { MZ_FREE(pComp); } #endif #ifdef _MSC_VER #pragma warning(pop) #endif #ifdef __cplusplus } #endif /************************************************************************** * * Copyright 2013-2014 RAD Game Tools and Valve Software * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * **************************************************************************/ #ifdef __cplusplus extern "C" { #endif /* ------------------- Low-level Decompression (completely independent from all compression API's) */ #define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) #define TINFL_MEMSET(p, c, l) memset(p, c, l) #define TINFL_CR_BEGIN \ switch (r->m_state) \ { \ case 0: #define TINFL_CR_RETURN(state_index, result) \ do \ { \ status = result; \ r->m_state = state_index; \ goto common_exit; \ case state_index:; \ } \ MZ_MACRO_END #define TINFL_CR_RETURN_FOREVER(state_index, result) \ do \ { \ for (;;) \ { \ TINFL_CR_RETURN(state_index, result); \ } \ } \ MZ_MACRO_END #define TINFL_CR_FINISH } #define TINFL_GET_BYTE(state_index, c) \ do \ { \ while (pIn_buf_cur >= pIn_buf_end) \ { \ TINFL_CR_RETURN(state_index, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); \ } \ c = *pIn_buf_cur++; \ } \ MZ_MACRO_END #define TINFL_NEED_BITS(state_index, n) \ do \ { \ mz_uint c; \ TINFL_GET_BYTE(state_index, c); \ bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ num_bits += 8; \ } while (num_bits < (mz_uint)(n)) #define TINFL_SKIP_BITS(state_index, n) \ do \ { \ if (num_bits < (mz_uint)(n)) \ { \ TINFL_NEED_BITS(state_index, n); \ } \ bit_buf >>= (n); \ num_bits -= (n); \ } \ MZ_MACRO_END #define TINFL_GET_BITS(state_index, b, n) \ do \ { \ if (num_bits < (mz_uint)(n)) \ { \ TINFL_NEED_BITS(state_index, n); \ } \ b = bit_buf & ((1 << (n)) - 1); \ bit_buf >>= (n); \ num_bits -= (n); \ } \ MZ_MACRO_END /* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. */ /* It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a */ /* Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the */ /* bit buffer contains >=15 bits (deflate's max. Huffman code size). */ #define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ do \ { \ temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ if (temp >= 0) \ { \ code_len = temp >> 9; \ if ((code_len) && (num_bits >= code_len)) \ break; \ } \ else if (num_bits > TINFL_FAST_LOOKUP_BITS) \ { \ code_len = TINFL_FAST_LOOKUP_BITS; \ do \ { \ temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ } while ((temp < 0) && (num_bits >= (code_len + 1))); \ if (temp >= 0) \ break; \ } \ TINFL_GET_BYTE(state_index, c); \ bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ num_bits += 8; \ } while (num_bits < 15); /* TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read */ /* beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully */ /* decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. */ /* The slow path is only executed at the very end of the input buffer. */ /* v1.16: The original macro handled the case at the very end of the passed-in input buffer, but we also need to handle the case where the user passes in 1+zillion bytes */ /* following the deflate data and our non-conservative read-ahead path won't kick in here on this code. This is much trickier. */ #define TINFL_HUFF_DECODE(state_index, sym, pHuff) \ do \ { \ int temp; \ mz_uint code_len, c; \ if (num_bits < 15) \ { \ if ((pIn_buf_end - pIn_buf_cur) < 2) \ { \ TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ } \ else \ { \ bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ pIn_buf_cur += 2; \ num_bits += 16; \ } \ } \ if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ code_len = temp >> 9, temp &= 511; \ else \ { \ code_len = TINFL_FAST_LOOKUP_BITS; \ do \ { \ temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ } while (temp < 0); \ } \ sym = temp; \ bit_buf >>= code_len; \ num_bits -= code_len; \ } \ MZ_MACRO_END tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) { static const int s_length_base[31] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 }; static const int s_length_extra[31] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 }; static const int s_dist_base[32] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0 }; static const int s_dist_extra[32] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 }; static const mz_uint8 s_length_dezigzag[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; static const int s_min_table_sizes[3] = { 257, 1, 4 }; tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; /* Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). */ if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; TINFL_CR_BEGIN bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4))))); if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } } do { TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; if (r->m_type == 0) { TINFL_SKIP_BITS(5, num_bits & 7); for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } while ((counter) && (num_bits)) { TINFL_GET_BITS(51, dist, 8); while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } *pOut_buf_cur++ = (mz_uint8)dist; counter--; } while (counter) { size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } while (pIn_buf_cur >= pIn_buf_end) { TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); } n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; } } else if (r->m_type == 3) { TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); } else { if (r->m_type == 1) { mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i; r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); for (i = 0; i <= 143; ++i) *p++ = 8; for (; i <= 255; ++i) *p++ = 9; for (; i <= 279; ++i) *p++ = 7; for (; i <= 287; ++i) *p++ = 8; } else { for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; } r->m_table_sizes[2] = 19; } for (; (int)r->m_type >= 0; r->m_type--) { int tree_next, tree_cur; tinfl_huff_table *pTable; mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree); for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } if ((65536 != total) && (used_syms > 1)) { TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); } for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) { mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue; cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; } if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) { tree_cur -= ((rev_code >>= 1) & 1); if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1]; } tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; } if (r->m_type == 2) { for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) { mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } if ((dist == 16) && (!counter)) { TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); } num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; } if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) { TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); } TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); } } for (;;) { mz_uint8 *pSrc; for (;;) { if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) { TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); if (counter >= 256) break; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } *pOut_buf_cur++ = (mz_uint8)counter; } else { int sym2; mz_uint code_len; #if TINFL_USE_64BIT_BITBUF if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } #else if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } #endif if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) code_len = sym2 >> 9; else { code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); } counter = sym2; bit_buf >>= code_len; num_bits -= code_len; if (counter & 256) break; #if !TINFL_USE_64BIT_BITBUF if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } #endif if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) code_len = sym2 >> 9; else { code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); } bit_buf >>= code_len; num_bits -= code_len; pOut_buf_cur[0] = (mz_uint8)counter; if (sym2 & 256) { pOut_buf_cur++; counter = sym2; break; } pOut_buf_cur[1] = (mz_uint8)sym2; pOut_buf_cur += 2; } } if ((counter &= 511) == 256) break; num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) { TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); } pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) { while (counter--) { while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; } continue; } #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES else if ((counter >= 9) && (counter <= dist)) { const mz_uint8 *pSrc_end = pSrc + (counter & ~7); do { #ifdef MINIZ_UNALIGNED_USE_MEMCPY memcpy(pOut_buf_cur, pSrc, sizeof(mz_uint32)*2); #else ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; #endif pOut_buf_cur += 8; } while ((pSrc += 8) < pSrc_end); if ((counter &= 7) < 3) { if (counter) { pOut_buf_cur[0] = pSrc[0]; if (counter > 1) pOut_buf_cur[1] = pSrc[1]; pOut_buf_cur += counter; } continue; } } #endif while(counter>2) { pOut_buf_cur[0] = pSrc[0]; pOut_buf_cur[1] = pSrc[1]; pOut_buf_cur[2] = pSrc[2]; pOut_buf_cur += 3; pSrc += 3; counter -= 3; } if (counter > 0) { pOut_buf_cur[0] = pSrc[0]; if (counter > 1) pOut_buf_cur[1] = pSrc[1]; pOut_buf_cur += counter; } } } } while (!(r->m_final & 1)); /* Ensure byte alignment and put back any bytes from the bitbuf if we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */ /* I'm being super conservative here. A number of simplifications can be made to the byte alignment part, and the Adler32 check shouldn't ever need to worry about reading from the bitbuf now. */ TINFL_SKIP_BITS(32, num_bits & 7); while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) { --pIn_buf_cur; num_bits -= 8; } bit_buf &= (tinfl_bit_buf_t)((((mz_uint64)1) << num_bits) - (mz_uint64)1); MZ_ASSERT(!num_bits); /* if this assert fires then we've read beyond the end of non-deflate/zlib streams with following data (such as gzip streams). */ if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } } TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); TINFL_CR_FINISH common_exit: /* As long as we aren't telling the caller that we NEED more input to make forward progress: */ /* Put back any bytes from the bitbuf in case we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */ /* We need to be very careful here to NOT push back any bytes we definitely know we need to make forward progress, though, or we'll lock the caller up into an inf loop. */ if ((status != TINFL_STATUS_NEEDS_MORE_INPUT) && (status != TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS)) { while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) { --pIn_buf_cur; num_bits -= 8; } } r->m_num_bits = num_bits; r->m_bit_buf = bit_buf & (tinfl_bit_buf_t)((((mz_uint64)1) << num_bits) - (mz_uint64)1); r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) { const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; while (buf_len) { for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; } for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; } r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH; } return status; } /* Higher level helper functions. */ void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) { tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; *pOut_len = 0; tinfl_init(&decomp); for (;;) { size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, &dst_buf_size, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) { MZ_FREE(pBuf); *pOut_len = 0; return NULL; } src_buf_ofs += src_buf_size; *pOut_len += dst_buf_size; if (status == TINFL_STATUS_DONE) break; new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); if (!pNew_buf) { MZ_FREE(pBuf); *pOut_len = 0; return NULL; } pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; } return pBuf; } size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) { tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; } int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) { int result = 0; tinfl_decompressor decomp; mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; if (!pDict) return TINFL_STATUS_FAILED; tinfl_init(&decomp); for (;;) { size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); in_buf_ofs += in_buf_size; if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) break; if (status != TINFL_STATUS_HAS_MORE_OUTPUT) { result = (status == TINFL_STATUS_DONE); break; } dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); } MZ_FREE(pDict); *pIn_buf_size = in_buf_ofs; return result; } #ifndef MINIZ_NO_MALLOC tinfl_decompressor *tinfl_decompressor_alloc() { tinfl_decompressor *pDecomp = (tinfl_decompressor *)MZ_MALLOC(sizeof(tinfl_decompressor)); if (pDecomp) tinfl_init(pDecomp); return pDecomp; } void tinfl_decompressor_free(tinfl_decompressor *pDecomp) { MZ_FREE(pDecomp); } #endif #ifdef __cplusplus } #endif /************************************************************************** * * Copyright 2013-2014 RAD Game Tools and Valve Software * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC * Copyright 2016 Martin Raiber * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * **************************************************************************/ #ifndef MINIZ_NO_ARCHIVE_APIS #ifdef __cplusplus extern "C" { #endif /* ------------------- .ZIP archive reading */ #ifdef MINIZ_NO_STDIO #define MZ_FILE void * #else #include #if defined(_MSC_VER) || defined(__MINGW64__) static FILE *mz_fopen(const char *pFilename, const char *pMode) { FILE *pFile = NULL; fopen_s(&pFile, pFilename, pMode); return pFile; } static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) { FILE *pFile = NULL; if (freopen_s(&pFile, pPath, pMode, pStream)) return NULL; return pFile; } #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN mz_fopen #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 _ftelli64 #define MZ_FSEEK64 _fseeki64 #define MZ_FILE_STAT_STRUCT _stat64 #define MZ_FILE_STAT _stat64 #define MZ_FFLUSH fflush #define MZ_FREOPEN mz_freopen #define MZ_DELETE_FILE remove #elif defined(__MINGW32__) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftello64 #define MZ_FSEEK64 fseeko64 #define MZ_FILE_STAT_STRUCT _stat #define MZ_FILE_STAT _stat #define MZ_FFLUSH fflush #define MZ_FREOPEN(f, m, s) freopen(f, m, s) #define MZ_DELETE_FILE remove #elif defined(__TINYC__) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftell #define MZ_FSEEK64 fseek #define MZ_FILE_STAT_STRUCT stat #define MZ_FILE_STAT stat #define MZ_FFLUSH fflush #define MZ_FREOPEN(f, m, s) freopen(f, m, s) #define MZ_DELETE_FILE remove #elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen64(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftello64 #define MZ_FSEEK64 fseeko64 #define MZ_FILE_STAT_STRUCT stat64 #define MZ_FILE_STAT stat64 #define MZ_FFLUSH fflush #define MZ_FREOPEN(p, m, s) freopen64(p, m, s) #define MZ_DELETE_FILE remove #elif defined(__APPLE__) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftello #define MZ_FSEEK64 fseeko #define MZ_FILE_STAT_STRUCT stat #define MZ_FILE_STAT stat #define MZ_FFLUSH fflush #define MZ_FREOPEN(p, m, s) freopen(p, m, s) #define MZ_DELETE_FILE remove #else #pragma message("Using fopen, ftello, fseeko, stat() etc. path for file I/O - this path may not support large files.") #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #ifdef __STRICT_ANSI__ #define MZ_FTELL64 ftell #define MZ_FSEEK64 fseek #else #define MZ_FTELL64 ftello #define MZ_FSEEK64 fseeko #endif #define MZ_FILE_STAT_STRUCT stat #define MZ_FILE_STAT stat #define MZ_FFLUSH fflush #define MZ_FREOPEN(f, m, s) freopen(f, m, s) #define MZ_DELETE_FILE remove #endif /* #ifdef _MSC_VER */ #endif /* #ifdef MINIZ_NO_STDIO */ #define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) /* Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff. */ enum { /* ZIP archive identifiers and record sizes */ MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, /* ZIP64 archive identifier and record sizes */ MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06064b50, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG = 0x07064b50, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE = 56, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE = 20, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID = 0x0001, MZ_ZIP_DATA_DESCRIPTOR_ID = 0x08074b50, MZ_ZIP_DATA_DESCRIPTER_SIZE64 = 24, MZ_ZIP_DATA_DESCRIPTER_SIZE32 = 16, /* Central directory header record offsets */ MZ_ZIP_CDH_SIG_OFS = 0, MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, MZ_ZIP_CDH_BIT_FLAG_OFS = 8, MZ_ZIP_CDH_METHOD_OFS = 10, MZ_ZIP_CDH_FILE_TIME_OFS = 12, MZ_ZIP_CDH_FILE_DATE_OFS = 14, MZ_ZIP_CDH_CRC32_OFS = 16, MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, MZ_ZIP_CDH_DISK_START_OFS = 34, MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, /* Local directory header offsets */ MZ_ZIP_LDH_SIG_OFS = 0, MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, MZ_ZIP_LDH_BIT_FLAG_OFS = 6, MZ_ZIP_LDH_METHOD_OFS = 8, MZ_ZIP_LDH_FILE_TIME_OFS = 10, MZ_ZIP_LDH_FILE_DATE_OFS = 12, MZ_ZIP_LDH_CRC32_OFS = 14, MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR = 1 << 3, /* End of central directory offsets */ MZ_ZIP_ECDH_SIG_OFS = 0, MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, /* ZIP64 End of central directory locator offsets */ MZ_ZIP64_ECDL_SIG_OFS = 0, /* 4 bytes */ MZ_ZIP64_ECDL_NUM_DISK_CDIR_OFS = 4, /* 4 bytes */ MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS = 8, /* 8 bytes */ MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS = 16, /* 4 bytes */ /* ZIP64 End of central directory header offsets */ MZ_ZIP64_ECDH_SIG_OFS = 0, /* 4 bytes */ MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS = 4, /* 8 bytes */ MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS = 12, /* 2 bytes */ MZ_ZIP64_ECDH_VERSION_NEEDED_OFS = 14, /* 2 bytes */ MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS = 16, /* 4 bytes */ MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS = 20, /* 4 bytes */ MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 24, /* 8 bytes */ MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS = 32, /* 8 bytes */ MZ_ZIP64_ECDH_CDIR_SIZE_OFS = 40, /* 8 bytes */ MZ_ZIP64_ECDH_CDIR_OFS_OFS = 48, /* 8 bytes */ MZ_ZIP_VERSION_MADE_BY_DOS_FILESYSTEM_ID = 0, MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG = 0x10, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED = 1, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG = 32, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION = 64, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED = 8192, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8 = 1 << 11 }; typedef struct { void *m_p; size_t m_size, m_capacity; mz_uint m_element_size; } mz_zip_array; struct mz_zip_internal_state_tag { mz_zip_array m_central_dir; mz_zip_array m_central_dir_offsets; mz_zip_array m_sorted_central_dir_offsets; /* The flags passed in when the archive is initially opened. */ uint32_t m_init_flags; /* MZ_TRUE if the archive has a zip64 end of central directory headers, etc. */ mz_bool m_zip64; /* MZ_TRUE if we found zip64 extended info in the central directory (m_zip64 will also be slammed to true too, even if we didn't find a zip64 end of central dir header, etc.) */ mz_bool m_zip64_has_extended_info_fields; /* These fields are used by the file, FILE, memory, and memory/heap read/write helpers. */ MZ_FILE *m_pFile; mz_uint64 m_file_archive_start_ofs; void *m_pMem; size_t m_mem_size; size_t m_mem_capacity; }; #define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size #if defined(DEBUG) || defined(_DEBUG) || defined(NDEBUG) static MZ_FORCEINLINE mz_uint mz_zip_array_range_check(const mz_zip_array *pArray, mz_uint index) { MZ_ASSERT(index < pArray->m_size); return index; } #define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[mz_zip_array_range_check(array_ptr, index)] #else #define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index] #endif static MZ_FORCEINLINE void mz_zip_array_init(mz_zip_array *pArray, mz_uint32 element_size) { memset(pArray, 0, sizeof(mz_zip_array)); pArray->m_element_size = element_size; } static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray) { pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); memset(pArray, 0, sizeof(mz_zip_array)); } static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing) { void *pNew_p; size_t new_capacity = min_new_capacity; MZ_ASSERT(pArray->m_element_size); if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; if (growing) { new_capacity = MZ_MAX(1, pArray->m_capacity); while (new_capacity < min_new_capacity) new_capacity *= 2; } if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity))) return MZ_FALSE; pArray->m_p = pNew_p; pArray->m_capacity = new_capacity; return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing) { if (new_capacity > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) return MZ_FALSE; } return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing) { if (new_size > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) return MZ_FALSE; } pArray->m_size = new_size; return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n) { return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); } static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n) { size_t orig_size = pArray->m_size; if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) return MZ_FALSE; if (n > 0) memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size); return MZ_TRUE; } #ifndef MINIZ_NO_TIME static MZ_TIME_T mz_zip_dos_to_time_t(int dos_time, int dos_date) { struct tm tm; memset(&tm, 0, sizeof(tm)); tm.tm_isdst = -1; tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; tm.tm_mon = ((dos_date >> 5) & 15) - 1; tm.tm_mday = dos_date & 31; tm.tm_hour = (dos_time >> 11) & 31; tm.tm_min = (dos_time >> 5) & 63; tm.tm_sec = (dos_time << 1) & 62; return mktime(&tm); } #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS static void mz_zip_time_t_to_dos_time(MZ_TIME_T time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date) { #ifdef _MSC_VER struct tm tm_struct; struct tm *tm = &tm_struct; errno_t err = localtime_s(tm, &time); if (err) { *pDOS_date = 0; *pDOS_time = 0; return; } #else struct tm *tm = localtime(&time); #endif /* #ifdef _MSC_VER */ *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1)); *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday); } #endif /* MINIZ_NO_ARCHIVE_WRITING_APIS */ #ifndef MINIZ_NO_STDIO #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS static mz_bool mz_zip_get_file_modified_time(const char *pFilename, MZ_TIME_T *pTime) { struct MZ_FILE_STAT_STRUCT file_stat; /* On Linux with x86 glibc, this call will fail on large files (I think >= 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. */ if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE; *pTime = file_stat.st_mtime; return MZ_TRUE; } #endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS*/ static mz_bool mz_zip_set_file_times(const char *pFilename, MZ_TIME_T access_time, MZ_TIME_T modified_time) { struct utimbuf t; memset(&t, 0, sizeof(t)); t.actime = access_time; t.modtime = modified_time; return !utime(pFilename, &t); } #endif /* #ifndef MINIZ_NO_STDIO */ #endif /* #ifndef MINIZ_NO_TIME */ static MZ_FORCEINLINE mz_bool mz_zip_set_error(mz_zip_archive *pZip, mz_zip_error err_num) { if (pZip) pZip->m_last_error = err_num; return MZ_FALSE; } static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint flags) { (void)flags; if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!pZip->m_pAlloc) pZip->m_pAlloc = miniz_def_alloc_func; if (!pZip->m_pFree) pZip->m_pFree = miniz_def_free_func; if (!pZip->m_pRealloc) pZip->m_pRealloc = miniz_def_realloc_func; pZip->m_archive_size = 0; pZip->m_central_directory_file_ofs = 0; pZip->m_total_files = 0; pZip->m_last_error = MZ_ZIP_NO_ERROR; if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); pZip->m_pState->m_init_flags = flags; pZip->m_pState->m_zip64 = MZ_FALSE; pZip->m_pState->m_zip64_has_extended_info_fields = MZ_FALSE; pZip->m_zip_mode = MZ_ZIP_MODE_READING; return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index) { const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); mz_uint8 l = 0, r = 0; pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pE = pL + MZ_MIN(l_len, r_len); while (pL < pE) { if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; pL++; pR++; } return (pL == pE) ? (l_len < r_len) : (l < r); } #define MZ_SWAP_UINT32(a, b) \ do \ { \ mz_uint32 t = a; \ a = b; \ b = t; \ } \ MZ_MACRO_END /* Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.) */ static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip) { mz_zip_internal_state *pState = pZip->m_pState; const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; const mz_zip_array *pCentral_dir = &pState->m_central_dir; mz_uint32 *pIndices; mz_uint32 start, end; const mz_uint32 size = pZip->m_total_files; if (size <= 1U) return; pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); start = (size - 2U) >> 1U; for (;;) { mz_uint64 child, root = start; for (;;) { if ((child = (root << 1U) + 1U) >= size) break; child += (((child + 1U) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U]))); if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) break; MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; } if (!start) break; start--; } end = size - 1; while (end > 0) { mz_uint64 child, root = 0; MZ_SWAP_UINT32(pIndices[end], pIndices[0]); for (;;) { if ((child = (root << 1U) + 1U) >= end) break; child += (((child + 1U) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U])); if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) break; MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; } end--; } } static mz_bool mz_zip_reader_locate_header_sig(mz_zip_archive *pZip, mz_uint32 record_sig, mz_uint32 record_size, mz_int64 *pOfs) { mz_int64 cur_file_ofs; mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8 *pBuf = (mz_uint8 *)buf_u32; /* Basic sanity checks - reject files which are too small */ if (pZip->m_archive_size < record_size) return MZ_FALSE; /* Find the record by scanning the file from the end towards the beginning. */ cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); for (;;) { int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) return MZ_FALSE; for (i = n - 4; i >= 0; --i) { mz_uint s = MZ_READ_LE32(pBuf + i); if (s == record_sig) { if ((pZip->m_archive_size - (cur_file_ofs + i)) >= record_size) break; } } if (i >= 0) { cur_file_ofs += i; break; } /* Give up if we've searched the entire file, or we've gone back "too far" (~64kb) */ if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (MZ_UINT16_MAX + record_size))) return MZ_FALSE; cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); } *pOfs = cur_file_ofs; return MZ_TRUE; } static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint flags) { mz_uint cdir_size = 0, cdir_entries_on_this_disk = 0, num_this_disk = 0, cdir_disk_index = 0; mz_uint64 cdir_ofs = 0; mz_int64 cur_file_ofs = 0; const mz_uint8 *p; mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8 *pBuf = (mz_uint8 *)buf_u32; mz_bool sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); mz_uint32 zip64_end_of_central_dir_locator_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pZip64_locator = (mz_uint8 *)zip64_end_of_central_dir_locator_u32; mz_uint32 zip64_end_of_central_dir_header_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pZip64_end_of_central_dir = (mz_uint8 *)zip64_end_of_central_dir_header_u32; mz_uint64 zip64_end_of_central_dir_ofs = 0; /* Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there. */ if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); if (!mz_zip_reader_locate_header_sig(pZip, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE, &cur_file_ofs)) return mz_zip_set_error(pZip, MZ_ZIP_FAILED_FINDING_CENTRAL_DIR); /* Read and verify the end of central directory record. */ if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); if (cur_file_ofs >= (MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)) { if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs - MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE, pZip64_locator, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) { if (MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG) { zip64_end_of_central_dir_ofs = MZ_READ_LE64(pZip64_locator + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS); if (zip64_end_of_central_dir_ofs > (pZip->m_archive_size - MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); if (pZip->m_pRead(pZip->m_pIO_opaque, zip64_end_of_central_dir_ofs, pZip64_end_of_central_dir, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) { if (MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG) { pZip->m_pState->m_zip64 = MZ_TRUE; } } } } } pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS); cdir_entries_on_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS); num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS); cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); if (pZip->m_pState->m_zip64) { mz_uint32 zip64_total_num_of_disks = MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS); mz_uint64 zip64_cdir_total_entries = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS); mz_uint64 zip64_cdir_total_entries_on_this_disk = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS); mz_uint64 zip64_size_of_end_of_central_dir_record = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS); mz_uint64 zip64_size_of_central_directory = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_SIZE_OFS); if (zip64_size_of_end_of_central_dir_record < (MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - 12)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if (zip64_total_num_of_disks != 1U) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); /* Check for miniz's practical limits */ if (zip64_cdir_total_entries > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); pZip->m_total_files = (mz_uint32)zip64_cdir_total_entries; if (zip64_cdir_total_entries_on_this_disk > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); cdir_entries_on_this_disk = (mz_uint32)zip64_cdir_total_entries_on_this_disk; /* Check for miniz's current practical limits (sorry, this should be enough for millions of files) */ if (zip64_size_of_central_directory > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); cdir_size = (mz_uint32)zip64_size_of_central_directory; num_this_disk = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS); cdir_disk_index = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS); cdir_ofs = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_OFS_OFS); } if (pZip->m_total_files != cdir_entries_on_this_disk) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1))) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); if (cdir_size < pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pZip->m_central_directory_file_ofs = cdir_ofs; if (pZip->m_total_files) { mz_uint i, n; /* Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and possibly another to hold the sorted indices. */ if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) || (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if (sort_central_dir) { if (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); /* Now create an index into the central directory file records, do some basic sanity checking on each record */ p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) { mz_uint total_header_size, disk_index, bit_flags, filename_size, ext_data_size; mz_uint64 comp_size, decomp_size, local_header_ofs; if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); if (sort_central_dir) MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i; comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); filename_size = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); ext_data_size = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS); if ((!pZip->m_pState->m_zip64_has_extended_info_fields) && (ext_data_size) && (MZ_MAX(MZ_MAX(comp_size, decomp_size), local_header_ofs) == MZ_UINT32_MAX)) { /* Attempt to find zip64 extended information field in the entry's extra data */ mz_uint32 extra_size_remaining = ext_data_size; if (extra_size_remaining) { const mz_uint8 *pExtra_data; void* buf = NULL; if (MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + ext_data_size > n) { buf = MZ_MALLOC(ext_data_size); if(buf==NULL) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size, buf, ext_data_size) != ext_data_size) { MZ_FREE(buf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } pExtra_data = (mz_uint8*)buf; } else { pExtra_data = p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size; } do { mz_uint32 field_id; mz_uint32 field_data_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) { MZ_FREE(buf); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining) { MZ_FREE(buf); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { /* Ok, the archive didn't have any zip64 headers but it uses a zip64 extended information field so mark it as zip64 anyway (this can occur with infozip's zip util when it reads compresses files from stdin). */ pZip->m_pState->m_zip64 = MZ_TRUE; pZip->m_pState->m_zip64_has_extended_info_fields = MZ_TRUE; break; } pExtra_data += sizeof(mz_uint16) * 2 + field_data_size; extra_size_remaining = extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size; } while (extra_size_remaining); MZ_FREE(buf); } } /* I've seen archives that aren't marked as zip64 that uses zip64 ext data, argh */ if ((comp_size != MZ_UINT32_MAX) && (decomp_size != MZ_UINT32_MAX)) { if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); if ((disk_index == MZ_UINT16_MAX) || ((disk_index != num_this_disk) && (disk_index != 1))) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); if (comp_size != MZ_UINT32_MAX) { if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } bit_flags = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); if (bit_flags & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); n -= total_header_size; p += total_header_size; } } if (sort_central_dir) mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); return MZ_TRUE; } void mz_zip_zero_struct(mz_zip_archive *pZip) { if (pZip) MZ_CLEAR_OBJ(*pZip); } static mz_bool mz_zip_reader_end_internal(mz_zip_archive *pZip, mz_bool set_last_error) { mz_bool status = MZ_TRUE; if (!pZip) return MZ_FALSE; if ((!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) { if (set_last_error) pZip->m_last_error = MZ_ZIP_INVALID_PARAMETER; return MZ_FALSE; } if (pZip->m_pState) { mz_zip_internal_state *pState = pZip->m_pState; pZip->m_pState = NULL; mz_zip_array_clear(pZip, &pState->m_central_dir); mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); #ifndef MINIZ_NO_STDIO if (pState->m_pFile) { if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE) { if (MZ_FCLOSE(pState->m_pFile) == EOF) { if (set_last_error) pZip->m_last_error = MZ_ZIP_FILE_CLOSE_FAILED; status = MZ_FALSE; } } pState->m_pFile = NULL; } #endif /* #ifndef MINIZ_NO_STDIO */ pZip->m_pFree(pZip->m_pAlloc_opaque, pState); } pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; return status; } mz_bool mz_zip_reader_end(mz_zip_archive *pZip) { return mz_zip_reader_end_internal(pZip, MZ_TRUE); } mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags) { if ((!pZip) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; pZip->m_zip_type = MZ_ZIP_TYPE_USER; pZip->m_archive_size = size; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end_internal(pZip, MZ_FALSE); return MZ_FALSE; } return MZ_TRUE; } static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) { mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); return s; } mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags) { if (!pMem) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; pZip->m_zip_type = MZ_ZIP_TYPE_MEMORY; pZip->m_archive_size = size; pZip->m_pRead = mz_zip_mem_read_func; pZip->m_pIO_opaque = pZip; pZip->m_pNeeds_keepalive = NULL; #ifdef __cplusplus pZip->m_pState->m_pMem = const_cast(pMem); #else pZip->m_pState->m_pMem = (void *)pMem; #endif pZip->m_pState->m_mem_size = size; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end_internal(pZip, MZ_FALSE); return MZ_FALSE; } return MZ_TRUE; } #ifndef MINIZ_NO_STDIO static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) { mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); file_ofs += pZip->m_pState->m_file_archive_start_ofs; if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) return 0; return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); } mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags) { return mz_zip_reader_init_file_v2(pZip, pFilename, flags, 0, 0); } mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size) { mz_uint64 file_size; MZ_FILE *pFile; if ((!pZip) || (!pFilename) || ((archive_size) && (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pFile = MZ_FOPEN(pFilename, "rb"); if (!pFile) return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); file_size = archive_size; if (!file_size) { if (MZ_FSEEK64(pFile, 0, SEEK_END)) { MZ_FCLOSE(pFile); return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED); } file_size = MZ_FTELL64(pFile); } /* TODO: Better sanity check archive_size and the # of actual remaining bytes */ if (file_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) { MZ_FCLOSE(pFile); return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); } if (!mz_zip_reader_init_internal(pZip, flags)) { MZ_FCLOSE(pFile); return MZ_FALSE; } pZip->m_zip_type = MZ_ZIP_TYPE_FILE; pZip->m_pRead = mz_zip_file_read_func; pZip->m_pIO_opaque = pZip; pZip->m_pState->m_pFile = pFile; pZip->m_archive_size = file_size; pZip->m_pState->m_file_archive_start_ofs = file_start_ofs; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end_internal(pZip, MZ_FALSE); return MZ_FALSE; } return MZ_TRUE; } mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags) { mz_uint64 cur_file_ofs; if ((!pZip) || (!pFile)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); cur_file_ofs = MZ_FTELL64(pFile); if (!archive_size) { if (MZ_FSEEK64(pFile, 0, SEEK_END)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED); archive_size = MZ_FTELL64(pFile) - cur_file_ofs; if (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); } if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; pZip->m_zip_type = MZ_ZIP_TYPE_CFILE; pZip->m_pRead = mz_zip_file_read_func; pZip->m_pIO_opaque = pZip; pZip->m_pState->m_pFile = pFile; pZip->m_archive_size = archive_size; pZip->m_pState->m_file_archive_start_ofs = cur_file_ofs; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end_internal(pZip, MZ_FALSE); return MZ_FALSE; } return MZ_TRUE; } #endif /* #ifndef MINIZ_NO_STDIO */ static MZ_FORCEINLINE const mz_uint8 *mz_zip_get_cdh(mz_zip_archive *pZip, mz_uint file_index) { if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files)) return NULL; return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); } mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index) { mz_uint m_bit_flag; const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); if (!p) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return MZ_FALSE; } m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); return (m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION)) != 0; } mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index) { mz_uint bit_flag; mz_uint method; const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); if (!p) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return MZ_FALSE; } method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); if ((method != 0) && (method != MZ_DEFLATED)) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); return MZ_FALSE; } if (bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION)) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); return MZ_FALSE; } if (bit_flag & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); return MZ_FALSE; } return MZ_TRUE; } mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index) { mz_uint filename_len, attribute_mapping_id, external_attr; const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); if (!p) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return MZ_FALSE; } filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); if (filename_len) { if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') return MZ_TRUE; } /* Bugfix: This code was also checking if the internal attribute was non-zero, which wasn't correct. */ /* Most/all zip writers (hopefully) set DOS file/directory attributes in the low 16-bits, so check for the DOS directory flag and ignore the source OS ID in the created by field. */ /* FIXME: Remove this check? Is it necessary - we already check the filename. */ attribute_mapping_id = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS) >> 8; (void)attribute_mapping_id; external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); if ((external_attr & MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG) != 0) { return MZ_TRUE; } return MZ_FALSE; } static mz_bool mz_zip_file_stat_internal(mz_zip_archive *pZip, mz_uint file_index, const mz_uint8 *pCentral_dir_header, mz_zip_archive_file_stat *pStat, mz_bool *pFound_zip64_extra_data) { mz_uint n; const mz_uint8 *p = pCentral_dir_header; if (pFound_zip64_extra_data) *pFound_zip64_extra_data = MZ_FALSE; if ((!p) || (!pStat)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* Extract fields from the central directory record. */ pStat->m_file_index = file_index; pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); #ifndef MINIZ_NO_TIME pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); #endif pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); /* Copy as much of the filename and comment as possible. */ n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pStat->m_filename[n] = '\0'; n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); pStat->m_comment_size = n; memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n); pStat->m_comment[n] = '\0'; /* Set some flags for convienance */ pStat->m_is_directory = mz_zip_reader_is_file_a_directory(pZip, file_index); pStat->m_is_encrypted = mz_zip_reader_is_file_encrypted(pZip, file_index); pStat->m_is_supported = mz_zip_reader_is_file_supported(pZip, file_index); /* See if we need to read any zip64 extended information fields. */ /* Confusingly, these zip64 fields can be present even on non-zip64 archives (Debian zip on a huge files from stdin piped to stdout creates them). */ if (MZ_MAX(MZ_MAX(pStat->m_comp_size, pStat->m_uncomp_size), pStat->m_local_header_ofs) == MZ_UINT32_MAX) { /* Attempt to find zip64 extended information field in the entry's extra data */ mz_uint32 extra_size_remaining = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS); if (extra_size_remaining) { const mz_uint8 *pExtra_data = p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); do { mz_uint32 field_id; mz_uint32 field_data_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { const mz_uint8 *pField_data = pExtra_data + sizeof(mz_uint16) * 2; mz_uint32 field_data_remaining = field_data_size; if (pFound_zip64_extra_data) *pFound_zip64_extra_data = MZ_TRUE; if (pStat->m_uncomp_size == MZ_UINT32_MAX) { if (field_data_remaining < sizeof(mz_uint64)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pStat->m_uncomp_size = MZ_READ_LE64(pField_data); pField_data += sizeof(mz_uint64); field_data_remaining -= sizeof(mz_uint64); } if (pStat->m_comp_size == MZ_UINT32_MAX) { if (field_data_remaining < sizeof(mz_uint64)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pStat->m_comp_size = MZ_READ_LE64(pField_data); pField_data += sizeof(mz_uint64); field_data_remaining -= sizeof(mz_uint64); } if (pStat->m_local_header_ofs == MZ_UINT32_MAX) { if (field_data_remaining < sizeof(mz_uint64)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pStat->m_local_header_ofs = MZ_READ_LE64(pField_data); pField_data += sizeof(mz_uint64); field_data_remaining -= sizeof(mz_uint64); } break; } pExtra_data += sizeof(mz_uint16) * 2 + field_data_size; extra_size_remaining = extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size; } while (extra_size_remaining); } } return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_string_equal(const char *pA, const char *pB, mz_uint len, mz_uint flags) { mz_uint i; if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len); for (i = 0; i < len; ++i) if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE; return MZ_TRUE; } static MZ_FORCEINLINE int mz_zip_filename_compare(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, mz_uint r_len) { const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); mz_uint8 l = 0, r = 0; pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pE = pL + MZ_MIN(l_len, r_len); while (pL < pE) { if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; pL++; pR++; } return (pL == pE) ? (int)(l_len - r_len) : (l - r); } static mz_bool mz_zip_locate_file_binary_search(mz_zip_archive *pZip, const char *pFilename, mz_uint32 *pIndex) { mz_zip_internal_state *pState = pZip->m_pState; const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; const mz_zip_array *pCentral_dir = &pState->m_central_dir; mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); const uint32_t size = pZip->m_total_files; const mz_uint filename_len = (mz_uint)strlen(pFilename); if (pIndex) *pIndex = 0; if (size) { /* yes I could use uint32_t's, but then we would have to add some special case checks in the loop, argh, and */ /* honestly the major expense here on 32-bit CPU's will still be the filename compare */ mz_int64 l = 0, h = (mz_int64)size - 1; while (l <= h) { mz_int64 m = l + ((h - l) >> 1); uint32_t file_index = pIndices[(uint32_t)m]; int comp = mz_zip_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len); if (!comp) { if (pIndex) *pIndex = file_index; return MZ_TRUE; } else if (comp < 0) l = m + 1; else h = m - 1; } } return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND); } int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags) { mz_uint32 index; if (!mz_zip_reader_locate_file_v2(pZip, pName, pComment, flags, &index)) return -1; else return (int)index; } mz_bool mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *pIndex) { mz_uint file_index; size_t name_len, comment_len; if (pIndex) *pIndex = 0; if ((!pZip) || (!pZip->m_pState) || (!pName)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* See if we can use a binary search */ if (((pZip->m_pState->m_init_flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0) && (pZip->m_zip_mode == MZ_ZIP_MODE_READING) && ((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) { return mz_zip_locate_file_binary_search(pZip, pName, pIndex); } /* Locate the entry by scanning the entire central directory */ name_len = strlen(pName); if (name_len > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); comment_len = pComment ? strlen(pComment) : 0; if (comment_len > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); for (file_index = 0; file_index < pZip->m_total_files; file_index++) { const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); const char *pFilename = (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; if (filename_len < name_len) continue; if (comment_len) { mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); const char *pFile_comment = pFilename + filename_len + file_extra_len; if ((file_comment_len != comment_len) || (!mz_zip_string_equal(pComment, pFile_comment, file_comment_len, flags))) continue; } if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) { int ofs = filename_len - 1; do { if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':')) break; } while (--ofs >= 0); ofs++; pFilename += ofs; filename_len -= ofs; } if ((filename_len == name_len) && (mz_zip_string_equal(pName, pFilename, filename_len, flags))) { if (pIndex) *pIndex = file_index; return MZ_TRUE; } } return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND); } mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) { int status = TINFL_STATUS_DONE; mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; mz_zip_archive_file_stat file_stat; void *pRead_buf; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; tinfl_decompressor inflator; if ((!pZip) || (!pZip->m_pState) || ((buf_size) && (!pBuf)) || ((user_read_buf_size) && (!pUser_read_buf)) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; /* A directory or zero length file */ if ((file_stat.m_is_directory) || (!file_stat.m_comp_size)) return MZ_TRUE; /* Encryption and patch files are not supported. */ if (file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); /* This function only supports decompressing stored and deflate. */ if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); /* Ensure supplied output buffer is large enough. */ needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size; if (buf_size < needed_size) return mz_zip_set_error(pZip, MZ_ZIP_BUF_TOO_SMALL); /* Read and parse the local directory entry. */ cur_file_ofs = file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { /* The file is stored or the caller has requested the compressed data. */ if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) == 0) { if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32) return mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED); } #endif return MZ_TRUE; } /* Decompress the file either directly from memory or from a file input buffer. */ tinfl_init(&inflator); if (pZip->m_pState->m_pMem) { /* Read directly from the archive in memory. */ pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; read_buf_size = read_buf_avail = file_stat.m_comp_size; comp_remaining = 0; } else if (pUser_read_buf) { /* Use a user provided read buffer. */ if (!user_read_buf_size) return MZ_FALSE; pRead_buf = (mz_uint8 *)pUser_read_buf; read_buf_size = user_read_buf_size; read_buf_avail = 0; comp_remaining = file_stat.m_comp_size; } else { /* Temporarily allocate a read buffer. */ read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE); if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); read_buf_avail = 0; comp_remaining = file_stat.m_comp_size; } do { /* The size_t cast here should be OK because we've verified that the output buffer is >= file_stat.m_uncomp_size above */ size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { status = TINFL_STATUS_FAILED; mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED); break; } cur_file_ofs += read_buf_avail; comp_remaining -= read_buf_avail; read_buf_ofs = 0; } in_buf_size = (size_t)read_buf_avail; status = tinfl_decompress(&inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); read_buf_avail -= in_buf_size; read_buf_ofs += in_buf_size; out_buf_ofs += out_buf_size; } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); if (status == TINFL_STATUS_DONE) { /* Make sure the entire file was decompressed, and check its CRC. */ if (out_buf_ofs != file_stat.m_uncomp_size) { mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE); status = TINFL_STATUS_FAILED; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS else if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32) { mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED); status = TINFL_STATUS_FAILED; } #endif } if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return status == TINFL_STATUS_DONE; } mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) return MZ_FALSE; return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size); } mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags) { return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, NULL, 0); } mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags) { return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0); } void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags) { mz_uint64 comp_size, uncomp_size, alloc_size; const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); void *pBuf; if (pSize) *pSize = 0; if (!p) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return NULL; } comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) { mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); return NULL; } if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); return NULL; } if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, flags)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return NULL; } if (pSize) *pSize = (size_t)alloc_size; return pBuf; } void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) { if (pSize) *pSize = 0; return MZ_FALSE; } return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); } mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) { int status = TINFL_STATUS_DONE; mz_uint file_crc32 = MZ_CRC32_INIT; mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs; mz_zip_archive_file_stat file_stat; void *pRead_buf = NULL; void *pWrite_buf = NULL; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; if ((!pZip) || (!pZip->m_pState) || (!pCallback) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; /* A directory or zero length file */ if ((file_stat.m_is_directory) || (!file_stat.m_comp_size)) return MZ_TRUE; /* Encryption and patch files are not supported. */ if (file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); /* This function only supports decompressing stored and deflate. */ if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); /* Read and do some minimal validation of the local directory entry (this doesn't crack the zip64 stuff, which we already have from the central dir) */ cur_file_ofs = file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); /* Decompress the file either directly from memory or from a file input buffer. */ if (pZip->m_pState->m_pMem) { pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; read_buf_size = read_buf_avail = file_stat.m_comp_size; comp_remaining = 0; } else { read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE); if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); read_buf_avail = 0; comp_remaining = file_stat.m_comp_size; } if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { /* The file is stored or the caller has requested the compressed data. */ if (pZip->m_pState->m_pMem) { if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > MZ_UINT32_MAX)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) { mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED); status = TINFL_STATUS_FAILED; } else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)file_stat.m_comp_size); #endif } cur_file_ofs += file_stat.m_comp_size; out_buf_ofs += file_stat.m_comp_size; comp_remaining = 0; } else { while (comp_remaining) { read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); status = TINFL_STATUS_FAILED; break; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); } #endif if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED); status = TINFL_STATUS_FAILED; break; } cur_file_ofs += read_buf_avail; out_buf_ofs += read_buf_avail; comp_remaining -= read_buf_avail; } } } else { tinfl_decompressor inflator; tinfl_init(&inflator); if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); status = TINFL_STATUS_FAILED; } else { do { mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); status = TINFL_STATUS_FAILED; break; } cur_file_ofs += read_buf_avail; comp_remaining -= read_buf_avail; read_buf_ofs = 0; } in_buf_size = (size_t)read_buf_avail; status = tinfl_decompress(&inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); read_buf_avail -= in_buf_size; read_buf_ofs += in_buf_size; if (out_buf_size) { if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size) { mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED); status = TINFL_STATUS_FAILED; break; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); #endif if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) { mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED); status = TINFL_STATUS_FAILED; break; } } } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT)); } } if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { /* Make sure the entire file was decompressed, and check its CRC. */ if (out_buf_ofs != file_stat.m_uncomp_size) { mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE); status = TINFL_STATUS_FAILED; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS else if (file_crc32 != file_stat.m_crc32) { mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED); status = TINFL_STATUS_FAILED; } #endif } if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); return status == TINFL_STATUS_DONE; } mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) return MZ_FALSE; return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags); } mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags) { mz_zip_reader_extract_iter_state *pState; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; /* Argument sanity check */ if ((!pZip) || (!pZip->m_pState)) return NULL; /* Allocate an iterator status structure */ pState = (mz_zip_reader_extract_iter_state*)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_reader_extract_iter_state)); if (!pState) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); return NULL; } /* Fetch file details */ if (!mz_zip_reader_file_stat(pZip, file_index, &pState->file_stat)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } /* Encryption and patch files are not supported. */ if (pState->file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } /* This function only supports decompressing stored and deflate. */ if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (pState->file_stat.m_method != 0) && (pState->file_stat.m_method != MZ_DEFLATED)) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } /* Init state - save args */ pState->pZip = pZip; pState->flags = flags; /* Init state - reset variables to defaults */ pState->status = TINFL_STATUS_DONE; #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS pState->file_crc32 = MZ_CRC32_INIT; #endif pState->read_buf_ofs = 0; pState->out_buf_ofs = 0; pState->pRead_buf = NULL; pState->pWrite_buf = NULL; pState->out_blk_remain = 0; /* Read and parse the local directory entry. */ pState->cur_file_ofs = pState->file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, pState->cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } pState->cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); if ((pState->cur_file_ofs + pState->file_stat.m_comp_size) > pZip->m_archive_size) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } /* Decompress the file either directly from memory or from a file input buffer. */ if (pZip->m_pState->m_pMem) { pState->pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + pState->cur_file_ofs; pState->read_buf_size = pState->read_buf_avail = pState->file_stat.m_comp_size; pState->comp_remaining = pState->file_stat.m_comp_size; } else { if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method))) { /* Decompression required, therefore intermediate read buffer required */ pState->read_buf_size = MZ_MIN(pState->file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); if (NULL == (pState->pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)pState->read_buf_size))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } } else { /* Decompression not required - we will be reading directly into user buffer, no temp buf required */ pState->read_buf_size = 0; } pState->read_buf_avail = 0; pState->comp_remaining = pState->file_stat.m_comp_size; } if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method))) { /* Decompression required, init decompressor */ tinfl_init( &pState->inflator ); /* Allocate write buffer */ if (NULL == (pState->pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if (pState->pRead_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pState->pRead_buf); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } } return pState; } mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags) { mz_uint32 file_index; /* Locate file index by name */ if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) return NULL; /* Construct iterator */ return mz_zip_reader_extract_iter_new(pZip, file_index, flags); } size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size) { size_t copied_to_caller = 0; /* Argument sanity check */ if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState) || (!pvBuf)) return 0; if ((pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method)) { /* The file is stored or the caller has requested the compressed data, calc amount to return. */ copied_to_caller = (size_t)MZ_MIN( buf_size, pState->comp_remaining ); /* Zip is in memory....or requires reading from a file? */ if (pState->pZip->m_pState->m_pMem) { /* Copy data to caller's buffer */ memcpy( pvBuf, pState->pRead_buf, copied_to_caller ); pState->pRead_buf = ((mz_uint8*)pState->pRead_buf) + copied_to_caller; } else { /* Read directly into caller's buffer */ if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs, pvBuf, copied_to_caller) != copied_to_caller) { /* Failed to read all that was asked for, flag failure and alert user */ mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED); pState->status = TINFL_STATUS_FAILED; copied_to_caller = 0; } } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS /* Compute CRC if not returning compressed data only */ if (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) pState->file_crc32 = (mz_uint32)mz_crc32(pState->file_crc32, (const mz_uint8 *)pvBuf, copied_to_caller); #endif /* Advance offsets, dec counters */ pState->cur_file_ofs += copied_to_caller; pState->out_buf_ofs += copied_to_caller; pState->comp_remaining -= copied_to_caller; } else { do { /* Calc ptr to write buffer - given current output pos and block size */ mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pState->pWrite_buf + (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); /* Calc max output size - given current output pos and block size */ size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); if (!pState->out_blk_remain) { /* Read more data from file if none available (and reading from file) */ if ((!pState->read_buf_avail) && (!pState->pZip->m_pState->m_pMem)) { /* Calc read size */ pState->read_buf_avail = MZ_MIN(pState->read_buf_size, pState->comp_remaining); if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs, pState->pRead_buf, (size_t)pState->read_buf_avail) != pState->read_buf_avail) { mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED); pState->status = TINFL_STATUS_FAILED; break; } /* Advance offsets, dec counters */ pState->cur_file_ofs += pState->read_buf_avail; pState->comp_remaining -= pState->read_buf_avail; pState->read_buf_ofs = 0; } /* Perform decompression */ in_buf_size = (size_t)pState->read_buf_avail; pState->status = tinfl_decompress(&pState->inflator, (const mz_uint8 *)pState->pRead_buf + pState->read_buf_ofs, &in_buf_size, (mz_uint8 *)pState->pWrite_buf, pWrite_buf_cur, &out_buf_size, pState->comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); pState->read_buf_avail -= in_buf_size; pState->read_buf_ofs += in_buf_size; /* Update current output block size remaining */ pState->out_blk_remain = out_buf_size; } if (pState->out_blk_remain) { /* Calc amount to return. */ size_t to_copy = MZ_MIN( (buf_size - copied_to_caller), pState->out_blk_remain ); /* Copy data to caller's buffer */ memcpy( (uint8_t*)pvBuf + copied_to_caller, pWrite_buf_cur, to_copy ); #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS /* Perform CRC */ pState->file_crc32 = (mz_uint32)mz_crc32(pState->file_crc32, pWrite_buf_cur, to_copy); #endif /* Decrement data consumed from block */ pState->out_blk_remain -= to_copy; /* Inc output offset, while performing sanity check */ if ((pState->out_buf_ofs += to_copy) > pState->file_stat.m_uncomp_size) { mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED); pState->status = TINFL_STATUS_FAILED; break; } /* Increment counter of data copied to caller */ copied_to_caller += to_copy; } } while ( (copied_to_caller < buf_size) && ((pState->status == TINFL_STATUS_NEEDS_MORE_INPUT) || (pState->status == TINFL_STATUS_HAS_MORE_OUTPUT)) ); } /* Return how many bytes were copied into user buffer */ return copied_to_caller; } mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState) { int status; /* Argument sanity check */ if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState)) return MZ_FALSE; /* Was decompression completed and requested? */ if ((pState->status == TINFL_STATUS_DONE) && (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { /* Make sure the entire file was decompressed, and check its CRC. */ if (pState->out_buf_ofs != pState->file_stat.m_uncomp_size) { mz_zip_set_error(pState->pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE); pState->status = TINFL_STATUS_FAILED; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS else if (pState->file_crc32 != pState->file_stat.m_crc32) { mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED); pState->status = TINFL_STATUS_FAILED; } #endif } /* Free buffers */ if (!pState->pZip->m_pState->m_pMem) pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pRead_buf); if (pState->pWrite_buf) pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pWrite_buf); /* Save status */ status = pState->status; /* Free context */ pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState); return status == TINFL_STATUS_DONE; } #ifndef MINIZ_NO_STDIO static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n) { (void)ofs; return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque); } mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags) { mz_bool status; mz_zip_archive_file_stat file_stat; MZ_FILE *pFile; if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; if ((file_stat.m_is_directory) || (!file_stat.m_is_supported)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); pFile = MZ_FOPEN(pDst_filename, "wb"); if (!pFile) return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); if (MZ_FCLOSE(pFile) == EOF) { if (status) mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED); status = MZ_FALSE; } #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO) if (status) mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); #endif return status; } mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index)) return MZ_FALSE; return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); } mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *pFile, mz_uint flags) { mz_zip_archive_file_stat file_stat; if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; if ((file_stat.m_is_directory) || (!file_stat.m_is_supported)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); return mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); } mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index)) return MZ_FALSE; return mz_zip_reader_extract_to_cfile(pZip, file_index, pFile, flags); } #endif /* #ifndef MINIZ_NO_STDIO */ static size_t mz_zip_compute_crc32_callback(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) { mz_uint32 *p = (mz_uint32 *)pOpaque; (void)file_ofs; *p = (mz_uint32)mz_crc32(*p, (const mz_uint8 *)pBuf, n); return n; } mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags) { mz_zip_archive_file_stat file_stat; mz_zip_internal_state *pState; const mz_uint8 *pCentral_dir_header; mz_bool found_zip64_ext_data_in_cdir = MZ_FALSE; mz_bool found_zip64_ext_data_in_ldir = MZ_FALSE; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; mz_uint64 local_header_ofs = 0; mz_uint32 local_header_filename_len, local_header_extra_len, local_header_crc32; mz_uint64 local_header_comp_size, local_header_uncomp_size; mz_uint32 uncomp_crc32 = MZ_CRC32_INIT; mz_bool has_data_descriptor; mz_uint32 local_header_bit_flags; mz_zip_array file_data_array; mz_zip_array_init(&file_data_array, 1); if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (file_index > pZip->m_total_files) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; pCentral_dir_header = mz_zip_get_cdh(pZip, file_index); if (!mz_zip_file_stat_internal(pZip, file_index, pCentral_dir_header, &file_stat, &found_zip64_ext_data_in_cdir)) return MZ_FALSE; /* A directory or zero length file */ if ((file_stat.m_is_directory) || (!file_stat.m_uncomp_size)) return MZ_TRUE; /* Encryption and patch files are not supported. */ if (file_stat.m_is_encrypted) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); /* This function only supports stored and deflate. */ if ((file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); if (!file_stat.m_is_supported) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); /* Read and parse the local directory entry. */ local_header_ofs = file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); local_header_filename_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS); local_header_extra_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); local_header_comp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS); local_header_uncomp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS); local_header_crc32 = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_CRC32_OFS); local_header_bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); has_data_descriptor = (local_header_bit_flags & 8) != 0; if (local_header_filename_len != strlen(file_stat.m_filename)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if ((local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len + local_header_extra_len + file_stat.m_comp_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if (!mz_zip_array_resize(pZip, &file_data_array, MZ_MAX(local_header_filename_len, local_header_extra_len), MZ_FALSE)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if (local_header_filename_len) { if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE, file_data_array.m_p, local_header_filename_len) != local_header_filename_len) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); goto handle_failure; } /* I've seen 1 archive that had the same pathname, but used backslashes in the local dir and forward slashes in the central dir. Do we care about this? For now, this case will fail validation. */ if (memcmp(file_stat.m_filename, file_data_array.m_p, local_header_filename_len) != 0) { mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); goto handle_failure; } } if ((local_header_extra_len) && ((local_header_comp_size == MZ_UINT32_MAX) || (local_header_uncomp_size == MZ_UINT32_MAX))) { mz_uint32 extra_size_remaining = local_header_extra_len; const mz_uint8 *pExtra_data = (const mz_uint8 *)file_data_array.m_p; if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len, file_data_array.m_p, local_header_extra_len) != local_header_extra_len) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); goto handle_failure; } do { mz_uint32 field_id, field_data_size, field_total_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); field_total_size = field_data_size + sizeof(mz_uint16) * 2; if (field_total_size > extra_size_remaining) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { const mz_uint8 *pSrc_field_data = pExtra_data + sizeof(mz_uint32); if (field_data_size < sizeof(mz_uint64) * 2) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); goto handle_failure; } local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data); local_header_comp_size = MZ_READ_LE64(pSrc_field_data + sizeof(mz_uint64)); found_zip64_ext_data_in_ldir = MZ_TRUE; break; } pExtra_data += field_total_size; extra_size_remaining -= field_total_size; } while (extra_size_remaining); } /* TODO: parse local header extra data when local_header_comp_size is 0xFFFFFFFF! (big_descriptor.zip) */ /* I've seen zips in the wild with the data descriptor bit set, but proper local header values and bogus data descriptors */ if ((has_data_descriptor) && (!local_header_comp_size) && (!local_header_crc32)) { mz_uint8 descriptor_buf[32]; mz_bool has_id; const mz_uint8 *pSrc; mz_uint32 file_crc32; mz_uint64 comp_size = 0, uncomp_size = 0; mz_uint32 num_descriptor_uint32s = ((pState->m_zip64) || (found_zip64_ext_data_in_ldir)) ? 6 : 4; if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len + local_header_extra_len + file_stat.m_comp_size, descriptor_buf, sizeof(mz_uint32) * num_descriptor_uint32s) != (sizeof(mz_uint32) * num_descriptor_uint32s)) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); goto handle_failure; } has_id = (MZ_READ_LE32(descriptor_buf) == MZ_ZIP_DATA_DESCRIPTOR_ID); pSrc = has_id ? (descriptor_buf + sizeof(mz_uint32)) : descriptor_buf; file_crc32 = MZ_READ_LE32(pSrc); if ((pState->m_zip64) || (found_zip64_ext_data_in_ldir)) { comp_size = MZ_READ_LE64(pSrc + sizeof(mz_uint32)); uncomp_size = MZ_READ_LE64(pSrc + sizeof(mz_uint32) + sizeof(mz_uint64)); } else { comp_size = MZ_READ_LE32(pSrc + sizeof(mz_uint32)); uncomp_size = MZ_READ_LE32(pSrc + sizeof(mz_uint32) + sizeof(mz_uint32)); } if ((file_crc32 != file_stat.m_crc32) || (comp_size != file_stat.m_comp_size) || (uncomp_size != file_stat.m_uncomp_size)) { mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); goto handle_failure; } } else { if ((local_header_crc32 != file_stat.m_crc32) || (local_header_comp_size != file_stat.m_comp_size) || (local_header_uncomp_size != file_stat.m_uncomp_size)) { mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); goto handle_failure; } } mz_zip_array_clear(pZip, &file_data_array); if ((flags & MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY) == 0) { if (!mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_compute_crc32_callback, &uncomp_crc32, 0)) return MZ_FALSE; /* 1 more check to be sure, although the extract checks too. */ if (uncomp_crc32 != file_stat.m_crc32) { mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); return MZ_FALSE; } } return MZ_TRUE; handle_failure: mz_zip_array_clear(pZip, &file_data_array); return MZ_FALSE; } mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags) { mz_zip_internal_state *pState; uint32_t i; if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; /* Basic sanity checks */ if (!pState->m_zip64) { if (pZip->m_total_files > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); if (pZip->m_archive_size > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); } else { if (pZip->m_total_files >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); if (pState->m_central_dir.m_size >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); } for (i = 0; i < pZip->m_total_files; i++) { if (MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG & flags) { mz_uint32 found_index; mz_zip_archive_file_stat stat; if (!mz_zip_reader_file_stat(pZip, i, &stat)) return MZ_FALSE; if (!mz_zip_reader_locate_file_v2(pZip, stat.m_filename, NULL, 0, &found_index)) return MZ_FALSE; /* This check can fail if there are duplicate filenames in the archive (which we don't check for when writing - that's up to the user) */ if (found_index != i) return mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); } if (!mz_zip_validate_file(pZip, i, flags)) return MZ_FALSE; } return MZ_TRUE; } mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr) { mz_bool success = MZ_TRUE; mz_zip_archive zip; mz_zip_error actual_err = MZ_ZIP_NO_ERROR; if ((!pMem) || (!size)) { if (pErr) *pErr = MZ_ZIP_INVALID_PARAMETER; return MZ_FALSE; } mz_zip_zero_struct(&zip); if (!mz_zip_reader_init_mem(&zip, pMem, size, flags)) { if (pErr) *pErr = zip.m_last_error; return MZ_FALSE; } if (!mz_zip_validate_archive(&zip, flags)) { actual_err = zip.m_last_error; success = MZ_FALSE; } if (!mz_zip_reader_end_internal(&zip, success)) { if (!actual_err) actual_err = zip.m_last_error; success = MZ_FALSE; } if (pErr) *pErr = actual_err; return success; } #ifndef MINIZ_NO_STDIO mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr) { mz_bool success = MZ_TRUE; mz_zip_archive zip; mz_zip_error actual_err = MZ_ZIP_NO_ERROR; if (!pFilename) { if (pErr) *pErr = MZ_ZIP_INVALID_PARAMETER; return MZ_FALSE; } mz_zip_zero_struct(&zip); if (!mz_zip_reader_init_file_v2(&zip, pFilename, flags, 0, 0)) { if (pErr) *pErr = zip.m_last_error; return MZ_FALSE; } if (!mz_zip_validate_archive(&zip, flags)) { actual_err = zip.m_last_error; success = MZ_FALSE; } if (!mz_zip_reader_end_internal(&zip, success)) { if (!actual_err) actual_err = zip.m_last_error; success = MZ_FALSE; } if (pErr) *pErr = actual_err; return success; } #endif /* #ifndef MINIZ_NO_STDIO */ /* ------------------- .ZIP archive writing */ #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS static MZ_FORCEINLINE void mz_write_le16(mz_uint8 *p, mz_uint16 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); } static MZ_FORCEINLINE void mz_write_le32(mz_uint8 *p, mz_uint32 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); p[2] = (mz_uint8)(v >> 16); p[3] = (mz_uint8)(v >> 24); } static MZ_FORCEINLINE void mz_write_le64(mz_uint8 *p, mz_uint64 v) { mz_write_le32(p, (mz_uint32)v); mz_write_le32(p + sizeof(mz_uint32), (mz_uint32)(v >> 32)); } #define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) #define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) #define MZ_WRITE_LE64(p, v) mz_write_le64((mz_uint8 *)(p), (mz_uint64)(v)) static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) { mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; mz_zip_internal_state *pState = pZip->m_pState; mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); if (!n) return 0; /* An allocation this big is likely to just fail on 32-bit systems, so don't even go there. */ if ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)) { mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE); return 0; } if (new_size > pState->m_mem_capacity) { void *pNew_block; size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); while (new_capacity < new_size) new_capacity *= 2; if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); return 0; } pState->m_pMem = pNew_block; pState->m_mem_capacity = new_capacity; } memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); pState->m_mem_size = (size_t)new_size; return n; } static mz_bool mz_zip_writer_end_internal(mz_zip_archive *pZip, mz_bool set_last_error) { mz_zip_internal_state *pState; mz_bool status = MZ_TRUE; if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) { if (set_last_error) mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return MZ_FALSE; } pState = pZip->m_pState; pZip->m_pState = NULL; mz_zip_array_clear(pZip, &pState->m_central_dir); mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); #ifndef MINIZ_NO_STDIO if (pState->m_pFile) { if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE) { if (MZ_FCLOSE(pState->m_pFile) == EOF) { if (set_last_error) mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED); status = MZ_FALSE; } } pState->m_pFile = NULL; } #endif /* #ifndef MINIZ_NO_STDIO */ if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); pState->m_pMem = NULL; } pZip->m_pFree(pZip->m_pAlloc_opaque, pState); pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; return status; } mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags) { mz_bool zip64 = (flags & MZ_ZIP_FLAG_WRITE_ZIP64) != 0; if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) { if (!pZip->m_pRead) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); } if (pZip->m_file_offset_alignment) { /* Ensure user specified file offset alignment is a power of 2. */ if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); } if (!pZip->m_pAlloc) pZip->m_pAlloc = miniz_def_alloc_func; if (!pZip->m_pFree) pZip->m_pFree = miniz_def_free_func; if (!pZip->m_pRealloc) pZip->m_pRealloc = miniz_def_realloc_func; pZip->m_archive_size = existing_size; pZip->m_central_directory_file_ofs = 0; pZip->m_total_files = 0; if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); pZip->m_pState->m_zip64 = zip64; pZip->m_pState->m_zip64_has_extended_info_fields = zip64; pZip->m_zip_type = MZ_ZIP_TYPE_USER; pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; return MZ_TRUE; } mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) { return mz_zip_writer_init_v2(pZip, existing_size, 0); } mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags) { pZip->m_pWrite = mz_zip_heap_write_func; pZip->m_pNeeds_keepalive = NULL; if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) pZip->m_pRead = mz_zip_mem_read_func; pZip->m_pIO_opaque = pZip; if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags)) return MZ_FALSE; pZip->m_zip_type = MZ_ZIP_TYPE_HEAP; if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning))) { if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size))) { mz_zip_writer_end_internal(pZip, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } pZip->m_pState->m_mem_capacity = initial_allocation_size; } return MZ_TRUE; } mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size) { return mz_zip_writer_init_heap_v2(pZip, size_to_reserve_at_beginning, initial_allocation_size, 0); } #ifndef MINIZ_NO_STDIO static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) { mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); file_ofs += pZip->m_pState->m_file_archive_start_ofs; if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) { mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED); return 0; } return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); } mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning) { return mz_zip_writer_init_file_v2(pZip, pFilename, size_to_reserve_at_beginning, 0); } mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags) { MZ_FILE *pFile; pZip->m_pWrite = mz_zip_file_write_func; pZip->m_pNeeds_keepalive = NULL; if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) pZip->m_pRead = mz_zip_file_read_func; pZip->m_pIO_opaque = pZip; if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags)) return MZ_FALSE; if (NULL == (pFile = MZ_FOPEN(pFilename, (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) ? "w+b" : "wb"))) { mz_zip_writer_end(pZip); return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); } pZip->m_pState->m_pFile = pFile; pZip->m_zip_type = MZ_ZIP_TYPE_FILE; if (size_to_reserve_at_beginning) { mz_uint64 cur_ofs = 0; char buf[4096]; MZ_CLEAR_OBJ(buf); do { size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) { mz_zip_writer_end(pZip); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_ofs += n; size_to_reserve_at_beginning -= n; } while (size_to_reserve_at_beginning); } return MZ_TRUE; } mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags) { pZip->m_pWrite = mz_zip_file_write_func; pZip->m_pNeeds_keepalive = NULL; if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) pZip->m_pRead = mz_zip_file_read_func; pZip->m_pIO_opaque = pZip; if (!mz_zip_writer_init_v2(pZip, 0, flags)) return MZ_FALSE; pZip->m_pState->m_pFile = pFile; pZip->m_pState->m_file_archive_start_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); pZip->m_zip_type = MZ_ZIP_TYPE_CFILE; return MZ_TRUE; } #endif /* #ifndef MINIZ_NO_STDIO */ mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags) { mz_zip_internal_state *pState; if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (flags & MZ_ZIP_FLAG_WRITE_ZIP64) { /* We don't support converting a non-zip64 file to zip64 - this seems like more trouble than it's worth. (What about the existing 32-bit data descriptors that could follow the compressed data?) */ if (!pZip->m_pState->m_zip64) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); } /* No sense in trying to write to an archive that's already at the support max size */ if (pZip->m_pState->m_zip64) { if (pZip->m_total_files == MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { if (pZip->m_total_files == MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); if ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE); } pState = pZip->m_pState; if (pState->m_pFile) { #ifdef MINIZ_NO_STDIO (void)pFilename; return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); #else if (pZip->m_pIO_opaque != pZip) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE) { if (!pFilename) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* Archive is being read from stdio and was originally opened only for reading. Try to reopen as writable. */ if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) { /* The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it. */ mz_zip_reader_end_internal(pZip, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); } } pZip->m_pWrite = mz_zip_file_write_func; pZip->m_pNeeds_keepalive = NULL; #endif /* #ifdef MINIZ_NO_STDIO */ } else if (pState->m_pMem) { /* Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback. */ if (pZip->m_pIO_opaque != pZip) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState->m_mem_capacity = pState->m_mem_size; pZip->m_pWrite = mz_zip_heap_write_func; pZip->m_pNeeds_keepalive = NULL; } /* Archive is being read via a user provided read function - make sure the user has specified a write function too. */ else if (!pZip->m_pWrite) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* Start writing new files at the archive's current central directory location. */ /* TODO: We could add a flag that lets the user start writing immediately AFTER the existing central dir - this would be safer. */ pZip->m_archive_size = pZip->m_central_directory_file_ofs; pZip->m_central_directory_file_ofs = 0; /* Clear the sorted central dir offsets, they aren't useful or maintained now. */ /* Even though we're now in write mode, files can still be extracted and verified, but file locates will be slow. */ /* TODO: We could easily maintain the sorted central directory offsets. */ mz_zip_array_clear(pZip, &pZip->m_pState->m_sorted_central_dir_offsets); pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; return MZ_TRUE; } mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename) { return mz_zip_writer_init_from_reader_v2(pZip, pFilename, 0); } /* TODO: pArchive_name is a terrible name here! */ mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags) { return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0); } typedef struct { mz_zip_archive *m_pZip; mz_uint64 m_cur_archive_file_ofs; mz_uint64 m_comp_size; } mz_zip_writer_add_state; static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len, void *pUser) { mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len) return MZ_FALSE; pState->m_cur_archive_file_ofs += len; pState->m_comp_size += len; return MZ_TRUE; } #define MZ_ZIP64_MAX_LOCAL_EXTRA_FIELD_SIZE (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 2) #define MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 3) static mz_uint32 mz_zip_writer_create_zip64_extra_data(mz_uint8 *pBuf, mz_uint64 *pUncomp_size, mz_uint64 *pComp_size, mz_uint64 *pLocal_header_ofs) { mz_uint8 *pDst = pBuf; mz_uint32 field_size = 0; MZ_WRITE_LE16(pDst + 0, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID); MZ_WRITE_LE16(pDst + 2, 0); pDst += sizeof(mz_uint16) * 2; if (pUncomp_size) { MZ_WRITE_LE64(pDst, *pUncomp_size); pDst += sizeof(mz_uint64); field_size += sizeof(mz_uint64); } if (pComp_size) { MZ_WRITE_LE64(pDst, *pComp_size); pDst += sizeof(mz_uint64); field_size += sizeof(mz_uint64); } if (pLocal_header_ofs) { MZ_WRITE_LE64(pDst, *pLocal_header_ofs); pDst += sizeof(mz_uint64); field_size += sizeof(mz_uint64); } MZ_WRITE_LE16(pBuf + 2, field_size); return (mz_uint32)(pDst - pBuf); } static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date) { (void)pZip; memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX)); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX)); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); return MZ_TRUE; } static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) { (void)pZip; memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX)); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX)); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_MIN(local_header_ofs, MZ_UINT32_MAX)); return MZ_TRUE; } static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, const void *pExtra, mz_uint16 extra_size, const void *pComment, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes, const char *user_extra_data, mz_uint user_extra_data_len) { mz_zip_internal_state *pState = pZip->m_pState; mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; size_t orig_central_dir_size = pState->m_central_dir.m_size; mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; if (!pZip->m_pState->m_zip64) { if (local_header_ofs > 0xFFFFFFFF) return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE); } /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */ if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + user_extra_data_len + comment_size) >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, (mz_uint16)(extra_size + user_extra_data_len), comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, user_extra_data, user_extra_data_len)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, ¢ral_dir_ofs, 1))) { /* Try to resize the central directory array back into its original state. */ mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } return MZ_TRUE; } static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) { /* Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes. */ if (*pArchive_name == '/') return MZ_FALSE; /* Making sure the name does not contain drive letters or DOS style backward slashes is the responsibility of the program using miniz*/ return MZ_TRUE; } static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive *pZip) { mz_uint32 n; if (!pZip->m_file_offset_alignment) return 0; n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); return (mz_uint)((pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1)); } static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, mz_uint64 cur_file_ofs, mz_uint32 n) { char buf[4096]; memset(buf, 0, MZ_MIN(sizeof(buf), n)); while (n) { mz_uint32 s = MZ_MIN(sizeof(buf), n); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_file_ofs += s; n -= s; } return MZ_TRUE; } mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32) { return mz_zip_writer_add_mem_ex_v2(pZip, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, uncomp_size, uncomp_crc32, NULL, NULL, 0, NULL, 0); } mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len) { mz_uint16 method = 0, dos_time = 0, dos_date = 0; mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; size_t archive_name_size; mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; tdefl_compressor *pComp = NULL; mz_bool store_data_uncompressed; mz_zip_internal_state *pState; mz_uint8 *pExtra_data = NULL; mz_uint32 extra_size = 0; mz_uint8 extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE]; mz_uint16 bit_flags = 0; if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; if (uncomp_size || (buf_size && !(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) bit_flags |= MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR; if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME)) bit_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8; level = level_and_flags & 0xF; store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; if (pState->m_zip64) { if (pZip->m_total_files == MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { if (pZip->m_total_files == MZ_UINT16_MAX) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */ } if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ } } if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_writer_validate_archive_name(pArchive_name)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); #ifndef MINIZ_NO_TIME if (last_modified != NULL) { mz_zip_time_t_to_dos_time(*last_modified, &dos_time, &dos_date); } else { MZ_TIME_T cur_time; time(&cur_time); mz_zip_time_t_to_dos_time(cur_time, &dos_time, &dos_date); } #endif /* #ifndef MINIZ_NO_TIME */ if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size); uncomp_size = buf_size; if (uncomp_size <= 3) { level = 0; store_data_uncompressed = MZ_TRUE; } } archive_name_size = strlen(pArchive_name); if (archive_name_size > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */ if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size) >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); if (!pState->m_zip64) { /* Bail early if the archive would obviously become too large */ if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + user_extra_data_len + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + user_extra_data_central_len + MZ_ZIP_DATA_DESCRIPTER_SIZE32) > 0xFFFFFFFF) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ } } if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) { /* Set DOS Subdirectory attribute bit. */ ext_attributes |= MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG; /* Subdirectories cannot contain data. */ if ((buf_size) || (uncomp_size)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); } /* Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.) */ if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + (pState->m_zip64 ? MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE : 0))) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if ((!store_data_uncompressed) && (buf_size)) { if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return MZ_FALSE; } local_dir_header_ofs += num_alignment_padding_bytes; if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } cur_archive_file_ofs += num_alignment_padding_bytes; MZ_CLEAR_OBJ(local_dir_header); if (!store_data_uncompressed || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { method = MZ_DEFLATED; } if (pState->m_zip64) { if (uncomp_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX) { pExtra_data = extra_data; extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); } if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)(extra_size + user_extra_data_len), 0, 0, 0, method, bit_flags, dos_time, dos_date)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += sizeof(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += archive_name_size; if (pExtra_data != NULL) { if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size) != extra_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += extra_size; } } else { if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX)) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)user_extra_data_len, 0, 0, 0, method, bit_flags, dos_time, dos_date)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += sizeof(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += archive_name_size; } if (user_extra_data_len > 0) { if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data, user_extra_data_len) != user_extra_data_len) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += user_extra_data_len; } if (store_data_uncompressed) { if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += buf_size; comp_size = buf_size; } else if (buf_size) { mz_zip_writer_add_state state; state.m_pZip = pZip; state.m_cur_archive_file_ofs = cur_archive_file_ofs; state.m_comp_size = 0; if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) || (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED); } comp_size = state.m_comp_size; cur_archive_file_ofs = state.m_cur_archive_file_ofs; } pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); pComp = NULL; if (uncomp_size) { mz_uint8 local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64]; mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32; MZ_ASSERT(bit_flags & MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR); MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID); MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32); if (pExtra_data == NULL) { if (comp_size > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); MZ_WRITE_LE32(local_dir_footer + 8, comp_size); MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size); } else { MZ_WRITE_LE64(local_dir_footer + 8, comp_size); MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size); local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64; } if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer, local_dir_footer_size) != local_dir_footer_size) return MZ_FALSE; cur_archive_file_ofs += local_dir_footer_size; } if (pExtra_data != NULL) { extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); } if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, (mz_uint16)extra_size, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes, user_extra_data_central, user_extra_data_central_len)) return MZ_FALSE; pZip->m_total_files++; pZip->m_archive_size = cur_archive_file_ofs; return MZ_TRUE; } mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive *pZip, const char *pArchive_name, mz_file_read_func read_callback, void* callback_opaque, mz_uint64 size_to_add, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len) { mz_uint16 gen_flags = MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR; mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; mz_uint64 local_dir_header_ofs, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = size_to_add, comp_size = 0; size_t archive_name_size; mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; mz_uint8 *pExtra_data = NULL; mz_uint32 extra_size = 0; mz_uint8 extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE]; mz_zip_internal_state *pState; mz_uint64 file_ofs = 0; if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME)) gen_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8; if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; level = level_and_flags & 0xF; /* Sanity checks */ if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; if ((!pState->m_zip64) && (uncomp_size > MZ_UINT32_MAX)) { /* Source file is too large for non-zip64 */ /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ pState->m_zip64 = MZ_TRUE; } /* We could support this, but why? */ if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_writer_validate_archive_name(pArchive_name)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); if (pState->m_zip64) { if (pZip->m_total_files == MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { if (pZip->m_total_files == MZ_UINT16_MAX) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */ } } archive_name_size = strlen(pArchive_name); if (archive_name_size > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */ if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size) >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); if (!pState->m_zip64) { /* Bail early if the archive would obviously become too large */ if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + user_extra_data_len + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 1024 + MZ_ZIP_DATA_DESCRIPTER_SIZE32 + user_extra_data_central_len) > 0xFFFFFFFF) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ } } #ifndef MINIZ_NO_TIME if (pFile_time) { mz_zip_time_t_to_dos_time(*pFile_time, &dos_time, &dos_date); } #endif if (uncomp_size <= 3) level = 0; if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes)) { return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += num_alignment_padding_bytes; local_dir_header_ofs = cur_archive_file_ofs; if (pZip->m_file_offset_alignment) { MZ_ASSERT((cur_archive_file_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } if (uncomp_size && level) { method = MZ_DEFLATED; } MZ_CLEAR_OBJ(local_dir_header); if (pState->m_zip64) { if (uncomp_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX) { pExtra_data = extra_data; extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); } if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)(extra_size + user_extra_data_len), 0, 0, 0, method, gen_flags, dos_time, dos_date)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += sizeof(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += archive_name_size; if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size) != extra_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += extra_size; } else { if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX)) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)user_extra_data_len, 0, 0, 0, method, gen_flags, dos_time, dos_date)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += sizeof(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += archive_name_size; } if (user_extra_data_len > 0) { if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data, user_extra_data_len) != user_extra_data_len) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += user_extra_data_len; } if (uncomp_size) { mz_uint64 uncomp_remaining = uncomp_size; void *pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); if (!pRead_buf) { return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!level) { while (uncomp_remaining) { mz_uint n = (mz_uint)MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); if ((read_callback(callback_opaque, file_ofs, pRead_buf, n) != n) || (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } file_ofs += n; uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); uncomp_remaining -= n; cur_archive_file_ofs += n; } comp_size = uncomp_size; } else { mz_bool result = MZ_FALSE; mz_zip_writer_add_state state; tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); if (!pComp) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } state.m_pZip = pZip; state.m_cur_archive_file_ofs = cur_archive_file_ofs; state.m_comp_size = 0; if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); } for (;;) { size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE); tdefl_status status; tdefl_flush flush = TDEFL_NO_FLUSH; if (read_callback(callback_opaque, file_ofs, pRead_buf, in_buf_size)!= in_buf_size) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); break; } file_ofs += in_buf_size; uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); uncomp_remaining -= in_buf_size; if (pZip->m_pNeeds_keepalive != NULL && pZip->m_pNeeds_keepalive(pZip->m_pIO_opaque)) flush = TDEFL_FULL_FLUSH; status = tdefl_compress_buffer(pComp, pRead_buf, in_buf_size, uncomp_remaining ? flush : TDEFL_FINISH); if (status == TDEFL_STATUS_DONE) { result = MZ_TRUE; break; } else if (status != TDEFL_STATUS_OKAY) { mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED); break; } } pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); if (!result) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return MZ_FALSE; } comp_size = state.m_comp_size; cur_archive_file_ofs = state.m_cur_archive_file_ofs; } pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); } { mz_uint8 local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64]; mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32; MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID); MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32); if (pExtra_data == NULL) { if (comp_size > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); MZ_WRITE_LE32(local_dir_footer + 8, comp_size); MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size); } else { MZ_WRITE_LE64(local_dir_footer + 8, comp_size); MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size); local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64; } if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer, local_dir_footer_size) != local_dir_footer_size) return MZ_FALSE; cur_archive_file_ofs += local_dir_footer_size; } if (pExtra_data != NULL) { extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); } if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, (mz_uint16)extra_size, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, gen_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes, user_extra_data_central, user_extra_data_central_len)) return MZ_FALSE; pZip->m_total_files++; pZip->m_archive_size = cur_archive_file_ofs; return MZ_TRUE; } #ifndef MINIZ_NO_STDIO static size_t mz_file_read_func_stdio(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) { MZ_FILE *pSrc_file = (MZ_FILE *)pOpaque; mz_int64 cur_ofs = MZ_FTELL64(pSrc_file); if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pSrc_file, (mz_int64)file_ofs, SEEK_SET)))) return 0; return MZ_FREAD(pBuf, 1, n, pSrc_file); } mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 size_to_add, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len) { return mz_zip_writer_add_read_buf_callback(pZip, pArchive_name, mz_file_read_func_stdio, pSrc_file, size_to_add, pFile_time, pComment, comment_size, level_and_flags, user_extra_data, user_extra_data_len, user_extra_data_central, user_extra_data_central_len); } mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) { MZ_FILE *pSrc_file = NULL; mz_uint64 uncomp_size = 0; MZ_TIME_T file_modified_time; MZ_TIME_T *pFile_time = NULL; mz_bool status; memset(&file_modified_time, 0, sizeof(file_modified_time)); #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO) pFile_time = &file_modified_time; if (!mz_zip_get_file_modified_time(pSrc_filename, &file_modified_time)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_STAT_FAILED); #endif pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); if (!pSrc_file) return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); MZ_FSEEK64(pSrc_file, 0, SEEK_END); uncomp_size = MZ_FTELL64(pSrc_file); MZ_FSEEK64(pSrc_file, 0, SEEK_SET); status = mz_zip_writer_add_cfile(pZip, pArchive_name, pSrc_file, uncomp_size, pFile_time, pComment, comment_size, level_and_flags, NULL, 0, NULL, 0); MZ_FCLOSE(pSrc_file); return status; } #endif /* #ifndef MINIZ_NO_STDIO */ static mz_bool mz_zip_writer_update_zip64_extension_block(mz_zip_array *pNew_ext, mz_zip_archive *pZip, const mz_uint8 *pExt, uint32_t ext_len, mz_uint64 *pComp_size, mz_uint64 *pUncomp_size, mz_uint64 *pLocal_header_ofs, mz_uint32 *pDisk_start) { /* + 64 should be enough for any new zip64 data */ if (!mz_zip_array_reserve(pZip, pNew_ext, ext_len + 64, MZ_FALSE)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); mz_zip_array_resize(pZip, pNew_ext, 0, MZ_FALSE); if ((pUncomp_size) || (pComp_size) || (pLocal_header_ofs) || (pDisk_start)) { mz_uint8 new_ext_block[64]; mz_uint8 *pDst = new_ext_block; mz_write_le16(pDst, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID); mz_write_le16(pDst + sizeof(mz_uint16), 0); pDst += sizeof(mz_uint16) * 2; if (pUncomp_size) { mz_write_le64(pDst, *pUncomp_size); pDst += sizeof(mz_uint64); } if (pComp_size) { mz_write_le64(pDst, *pComp_size); pDst += sizeof(mz_uint64); } if (pLocal_header_ofs) { mz_write_le64(pDst, *pLocal_header_ofs); pDst += sizeof(mz_uint64); } if (pDisk_start) { mz_write_le32(pDst, *pDisk_start); pDst += sizeof(mz_uint32); } mz_write_le16(new_ext_block + sizeof(mz_uint16), (mz_uint16)((pDst - new_ext_block) - sizeof(mz_uint16) * 2)); if (!mz_zip_array_push_back(pZip, pNew_ext, new_ext_block, pDst - new_ext_block)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if ((pExt) && (ext_len)) { mz_uint32 extra_size_remaining = ext_len; const mz_uint8 *pExtra_data = pExt; do { mz_uint32 field_id, field_data_size, field_total_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); field_total_size = field_data_size + sizeof(mz_uint16) * 2; if (field_total_size > extra_size_remaining) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if (field_id != MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { if (!mz_zip_array_push_back(pZip, pNew_ext, pExtra_data, field_total_size)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } pExtra_data += field_total_size; extra_size_remaining -= field_total_size; } while (extra_size_remaining); } return MZ_TRUE; } /* TODO: This func is now pretty freakin complex due to zip64, split it up? */ mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index) { mz_uint n, bit_flags, num_alignment_padding_bytes, src_central_dir_following_data_size; mz_uint64 src_archive_bytes_remaining, local_dir_header_ofs; mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; mz_uint8 new_central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; size_t orig_central_dir_size; mz_zip_internal_state *pState; void *pBuf; const mz_uint8 *pSrc_central_header; mz_zip_archive_file_stat src_file_stat; mz_uint32 src_filename_len, src_comment_len, src_ext_len; mz_uint32 local_header_filename_size, local_header_extra_len; mz_uint64 local_header_comp_size, local_header_uncomp_size; mz_bool found_zip64_ext_data_in_ldir = MZ_FALSE; /* Sanity checks */ if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pSource_zip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; /* Don't support copying files from zip64 archives to non-zip64, even though in some cases this is possible */ if ((pSource_zip->m_pState->m_zip64) && (!pZip->m_pState->m_zip64)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* Get pointer to the source central dir header and crack it */ if (NULL == (pSrc_central_header = mz_zip_get_cdh(pSource_zip, src_file_index))) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_SIG_OFS) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); src_filename_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS); src_comment_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); src_ext_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS); src_central_dir_following_data_size = src_filename_len + src_ext_len + src_comment_len; /* TODO: We don't support central dir's >= MZ_UINT32_MAX bytes right now (+32 fudge factor in case we need to add more extra data) */ if ((pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_central_dir_following_data_size + 32) >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); if (!pState->m_zip64) { if (pZip->m_total_files == MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { /* TODO: Our zip64 support still has some 32-bit limits that may not be worth fixing. */ if (pZip->m_total_files == MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } if (!mz_zip_file_stat_internal(pSource_zip, src_file_index, pSrc_central_header, &src_file_stat, NULL)) return MZ_FALSE; cur_src_file_ofs = src_file_stat.m_local_header_ofs; cur_dst_file_ofs = pZip->m_archive_size; /* Read the source archive's local dir header */ if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; /* Compute the total size we need to copy (filename+extra data+compressed data) */ local_header_filename_size = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS); local_header_extra_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); local_header_comp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS); local_header_uncomp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS); src_archive_bytes_remaining = local_header_filename_size + local_header_extra_len + src_file_stat.m_comp_size; /* Try to find a zip64 extended information field */ if ((local_header_extra_len) && ((local_header_comp_size == MZ_UINT32_MAX) || (local_header_uncomp_size == MZ_UINT32_MAX))) { mz_zip_array file_data_array; const mz_uint8 *pExtra_data; mz_uint32 extra_size_remaining = local_header_extra_len; mz_zip_array_init(&file_data_array, 1); if (!mz_zip_array_resize(pZip, &file_data_array, local_header_extra_len, MZ_FALSE)) { return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, src_file_stat.m_local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_size, file_data_array.m_p, local_header_extra_len) != local_header_extra_len) { mz_zip_array_clear(pZip, &file_data_array); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } pExtra_data = (const mz_uint8 *)file_data_array.m_p; do { mz_uint32 field_id, field_data_size, field_total_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) { mz_zip_array_clear(pZip, &file_data_array); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); field_total_size = field_data_size + sizeof(mz_uint16) * 2; if (field_total_size > extra_size_remaining) { mz_zip_array_clear(pZip, &file_data_array); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { const mz_uint8 *pSrc_field_data = pExtra_data + sizeof(mz_uint32); if (field_data_size < sizeof(mz_uint64) * 2) { mz_zip_array_clear(pZip, &file_data_array); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data); local_header_comp_size = MZ_READ_LE64(pSrc_field_data + sizeof(mz_uint64)); /* may be 0 if there's a descriptor */ found_zip64_ext_data_in_ldir = MZ_TRUE; break; } pExtra_data += field_total_size; extra_size_remaining -= field_total_size; } while (extra_size_remaining); mz_zip_array_clear(pZip, &file_data_array); } if (!pState->m_zip64) { /* Try to detect if the new archive will most likely wind up too big and bail early (+(sizeof(mz_uint32) * 4) is for the optional descriptor which could be present, +64 is a fudge factor). */ /* We also check when the archive is finalized so this doesn't need to be perfect. */ mz_uint64 approx_new_archive_size = cur_dst_file_ofs + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + src_archive_bytes_remaining + (sizeof(mz_uint32) * 4) + pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_central_dir_following_data_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 64; if (approx_new_archive_size >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); } /* Write dest archive padding */ if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes)) return MZ_FALSE; cur_dst_file_ofs += num_alignment_padding_bytes; local_dir_header_ofs = cur_dst_file_ofs; if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } /* The original zip's local header+ext block doesn't change, even with zip64, so we can just copy it over to the dest zip */ if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; /* Copy over the source archive bytes to the dest archive, also ensure we have enough buf space to handle optional data descriptor */ if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(32U, MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, src_archive_bytes_remaining))))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); while (src_archive_bytes_remaining) { n = (mz_uint)MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, src_archive_bytes_remaining); if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } cur_src_file_ofs += n; if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_dst_file_ofs += n; src_archive_bytes_remaining -= n; } /* Now deal with the optional data descriptor */ bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); if (bit_flags & 8) { /* Copy data descriptor */ if ((pSource_zip->m_pState->m_zip64) || (found_zip64_ext_data_in_ldir)) { /* src is zip64, dest must be zip64 */ /* name uint32_t's */ /* id 1 (optional in zip64?) */ /* crc 1 */ /* comp_size 2 */ /* uncomp_size 2 */ if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, (sizeof(mz_uint32) * 6)) != (sizeof(mz_uint32) * 6)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID) ? 6 : 5); } else { /* src is NOT zip64 */ mz_bool has_id; if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } has_id = (MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID); if (pZip->m_pState->m_zip64) { /* dest is zip64, so upgrade the data descriptor */ const mz_uint32 *pSrc_descriptor = (const mz_uint32 *)((const mz_uint8 *)pBuf + (has_id ? sizeof(mz_uint32) : 0)); const mz_uint32 src_crc32 = pSrc_descriptor[0]; const mz_uint64 src_comp_size = pSrc_descriptor[1]; const mz_uint64 src_uncomp_size = pSrc_descriptor[2]; mz_write_le32((mz_uint8 *)pBuf, MZ_ZIP_DATA_DESCRIPTOR_ID); mz_write_le32((mz_uint8 *)pBuf + sizeof(mz_uint32) * 1, src_crc32); mz_write_le64((mz_uint8 *)pBuf + sizeof(mz_uint32) * 2, src_comp_size); mz_write_le64((mz_uint8 *)pBuf + sizeof(mz_uint32) * 4, src_uncomp_size); n = sizeof(mz_uint32) * 6; } else { /* dest is NOT zip64, just copy it as-is */ n = sizeof(mz_uint32) * (has_id ? 4 : 3); } } if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_src_file_ofs += n; cur_dst_file_ofs += n; } pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); /* Finally, add the new central dir header */ orig_central_dir_size = pState->m_central_dir.m_size; memcpy(new_central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); if (pState->m_zip64) { /* This is the painful part: We need to write a new central dir header + ext block with updated zip64 fields, and ensure the old fields (if any) are not included. */ const mz_uint8 *pSrc_ext = pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_filename_len; mz_zip_array new_ext_block; mz_zip_array_init(&new_ext_block, sizeof(mz_uint8)); MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_UINT32_MAX); MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_UINT32_MAX); MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_UINT32_MAX); if (!mz_zip_writer_update_zip64_extension_block(&new_ext_block, pZip, pSrc_ext, src_ext_len, &src_file_stat.m_comp_size, &src_file_stat.m_uncomp_size, &local_dir_header_ofs, NULL)) { mz_zip_array_clear(pZip, &new_ext_block); return MZ_FALSE; } MZ_WRITE_LE16(new_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS, new_ext_block.m_size); if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) { mz_zip_array_clear(pZip, &new_ext_block); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, src_filename_len)) { mz_zip_array_clear(pZip, &new_ext_block); mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_ext_block.m_p, new_ext_block.m_size)) { mz_zip_array_clear(pZip, &new_ext_block); mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_filename_len + src_ext_len, src_comment_len)) { mz_zip_array_clear(pZip, &new_ext_block); mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } mz_zip_array_clear(pZip, &new_ext_block); } else { /* sanity checks */ if (cur_dst_file_ofs > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); if (local_dir_header_ofs >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs); if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, src_central_dir_following_data_size)) { mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } } /* This shouldn't trigger unless we screwed up during the initial sanity checks */ if (pState->m_central_dir.m_size >= MZ_UINT32_MAX) { /* TODO: Support central dirs >= 32-bits in size */ mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); } n = (mz_uint32)orig_central_dir_size; if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) { mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } pZip->m_total_files++; pZip->m_archive_size = cur_dst_file_ofs; return MZ_TRUE; } mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) { mz_zip_internal_state *pState; mz_uint64 central_dir_ofs, central_dir_size; mz_uint8 hdr[256]; if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; if (pState->m_zip64) { if ((pZip->m_total_files > MZ_UINT32_MAX) || (pState->m_central_dir.m_size >= MZ_UINT32_MAX)) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { if ((pZip->m_total_files > MZ_UINT16_MAX) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > MZ_UINT32_MAX)) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } central_dir_ofs = 0; central_dir_size = 0; if (pZip->m_total_files) { /* Write central directory */ central_dir_ofs = pZip->m_archive_size; central_dir_size = pState->m_central_dir.m_size; pZip->m_central_directory_file_ofs = central_dir_ofs; if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); pZip->m_archive_size += central_dir_size; } if (pState->m_zip64) { /* Write zip64 end of central directory header */ mz_uint64 rel_ofs_to_zip64_ecdr = pZip->m_archive_size; MZ_CLEAR_OBJ(hdr); MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDH_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - sizeof(mz_uint32) - sizeof(mz_uint64)); MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS, 0x031E); /* TODO: always Unix */ MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_NEEDED_OFS, 0x002D); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_SIZE_OFS, central_dir_size); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_OFS_OFS, central_dir_ofs); if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE; /* Write zip64 end of central directory locator */ MZ_CLEAR_OBJ(hdr); MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS, rel_ofs_to_zip64_ecdr); MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS, 1); if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) != MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE; } /* Write end of central directory record */ MZ_CLEAR_OBJ(hdr); MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files)); MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files)); MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_size)); MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_ofs)); if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); #ifndef MINIZ_NO_STDIO if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED); #endif /* #ifndef MINIZ_NO_STDIO */ pZip->m_archive_size += MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE; pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; return MZ_TRUE; } mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize) { if ((!ppBuf) || (!pSize)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); *ppBuf = NULL; *pSize = 0; if ((!pZip) || (!pZip->m_pState)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (pZip->m_pWrite != mz_zip_heap_write_func) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE; *ppBuf = pZip->m_pState->m_pMem; *pSize = pZip->m_pState->m_mem_size; pZip->m_pState->m_pMem = NULL; pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; return MZ_TRUE; } mz_bool mz_zip_writer_end(mz_zip_archive *pZip) { return mz_zip_writer_end_internal(pZip, MZ_TRUE); } #ifndef MINIZ_NO_STDIO mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) { return mz_zip_add_mem_to_archive_file_in_place_v2(pZip_filename, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, NULL); } mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr) { mz_bool status, created_new_archive = MZ_FALSE; mz_zip_archive zip_archive; struct MZ_FILE_STAT_STRUCT file_stat; mz_zip_error actual_err = MZ_ZIP_NO_ERROR; mz_zip_zero_struct(&zip_archive); if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) { if (pErr) *pErr = MZ_ZIP_INVALID_PARAMETER; return MZ_FALSE; } if (!mz_zip_writer_validate_archive_name(pArchive_name)) { if (pErr) *pErr = MZ_ZIP_INVALID_FILENAME; return MZ_FALSE; } /* Important: The regular non-64 bit version of stat() can fail here if the file is very large, which could cause the archive to be overwritten. */ /* So be sure to compile with _LARGEFILE64_SOURCE 1 */ if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) { /* Create a new archive. */ if (!mz_zip_writer_init_file_v2(&zip_archive, pZip_filename, 0, level_and_flags)) { if (pErr) *pErr = zip_archive.m_last_error; return MZ_FALSE; } created_new_archive = MZ_TRUE; } else { /* Append to an existing archive. */ if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0, 0)) { if (pErr) *pErr = zip_archive.m_last_error; return MZ_FALSE; } if (!mz_zip_writer_init_from_reader_v2(&zip_archive, pZip_filename, level_and_flags)) { if (pErr) *pErr = zip_archive.m_last_error; mz_zip_reader_end_internal(&zip_archive, MZ_FALSE); return MZ_FALSE; } } status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0); actual_err = zip_archive.m_last_error; /* Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.) */ if (!mz_zip_writer_finalize_archive(&zip_archive)) { if (!actual_err) actual_err = zip_archive.m_last_error; status = MZ_FALSE; } if (!mz_zip_writer_end_internal(&zip_archive, status)) { if (!actual_err) actual_err = zip_archive.m_last_error; status = MZ_FALSE; } if ((!status) && (created_new_archive)) { /* It's a new archive and something went wrong, so just delete it. */ int ignoredStatus = MZ_DELETE_FILE(pZip_filename); (void)ignoredStatus; } if (pErr) *pErr = actual_err; return status; } void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr) { mz_uint32 file_index; mz_zip_archive zip_archive; void *p = NULL; if (pSize) *pSize = 0; if ((!pZip_filename) || (!pArchive_name)) { if (pErr) *pErr = MZ_ZIP_INVALID_PARAMETER; return NULL; } mz_zip_zero_struct(&zip_archive); if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0, 0)) { if (pErr) *pErr = zip_archive.m_last_error; return NULL; } if (mz_zip_reader_locate_file_v2(&zip_archive, pArchive_name, pComment, flags, &file_index)) { p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); } mz_zip_reader_end_internal(&zip_archive, p != NULL); if (pErr) *pErr = zip_archive.m_last_error; return p; } void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags) { return mz_zip_extract_archive_file_to_heap_v2(pZip_filename, pArchive_name, NULL, pSize, flags, NULL); } #endif /* #ifndef MINIZ_NO_STDIO */ #endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */ /* ------------------- Misc utils */ mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip) { return pZip ? pZip->m_zip_mode : MZ_ZIP_MODE_INVALID; } mz_zip_type mz_zip_get_type(mz_zip_archive *pZip) { return pZip ? pZip->m_zip_type : MZ_ZIP_TYPE_INVALID; } mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num) { mz_zip_error prev_err; if (!pZip) return MZ_ZIP_INVALID_PARAMETER; prev_err = pZip->m_last_error; pZip->m_last_error = err_num; return prev_err; } mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip) { if (!pZip) return MZ_ZIP_INVALID_PARAMETER; return pZip->m_last_error; } mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip) { return mz_zip_set_last_error(pZip, MZ_ZIP_NO_ERROR); } mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip) { mz_zip_error prev_err; if (!pZip) return MZ_ZIP_INVALID_PARAMETER; prev_err = pZip->m_last_error; pZip->m_last_error = MZ_ZIP_NO_ERROR; return prev_err; } const char *mz_zip_get_error_string(mz_zip_error mz_err) { switch (mz_err) { case MZ_ZIP_NO_ERROR: return "no error"; case MZ_ZIP_UNDEFINED_ERROR: return "undefined error"; case MZ_ZIP_TOO_MANY_FILES: return "too many files"; case MZ_ZIP_FILE_TOO_LARGE: return "file too large"; case MZ_ZIP_UNSUPPORTED_METHOD: return "unsupported method"; case MZ_ZIP_UNSUPPORTED_ENCRYPTION: return "unsupported encryption"; case MZ_ZIP_UNSUPPORTED_FEATURE: return "unsupported feature"; case MZ_ZIP_FAILED_FINDING_CENTRAL_DIR: return "failed finding central directory"; case MZ_ZIP_NOT_AN_ARCHIVE: return "not a ZIP archive"; case MZ_ZIP_INVALID_HEADER_OR_CORRUPTED: return "invalid header or archive is corrupted"; case MZ_ZIP_UNSUPPORTED_MULTIDISK: return "unsupported multidisk archive"; case MZ_ZIP_DECOMPRESSION_FAILED: return "decompression failed or archive is corrupted"; case MZ_ZIP_COMPRESSION_FAILED: return "compression failed"; case MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE: return "unexpected decompressed size"; case MZ_ZIP_CRC_CHECK_FAILED: return "CRC-32 check failed"; case MZ_ZIP_UNSUPPORTED_CDIR_SIZE: return "unsupported central directory size"; case MZ_ZIP_ALLOC_FAILED: return "allocation failed"; case MZ_ZIP_FILE_OPEN_FAILED: return "file open failed"; case MZ_ZIP_FILE_CREATE_FAILED: return "file create failed"; case MZ_ZIP_FILE_WRITE_FAILED: return "file write failed"; case MZ_ZIP_FILE_READ_FAILED: return "file read failed"; case MZ_ZIP_FILE_CLOSE_FAILED: return "file close failed"; case MZ_ZIP_FILE_SEEK_FAILED: return "file seek failed"; case MZ_ZIP_FILE_STAT_FAILED: return "file stat failed"; case MZ_ZIP_INVALID_PARAMETER: return "invalid parameter"; case MZ_ZIP_INVALID_FILENAME: return "invalid filename"; case MZ_ZIP_BUF_TOO_SMALL: return "buffer too small"; case MZ_ZIP_INTERNAL_ERROR: return "internal error"; case MZ_ZIP_FILE_NOT_FOUND: return "file not found"; case MZ_ZIP_ARCHIVE_TOO_LARGE: return "archive is too large"; case MZ_ZIP_VALIDATION_FAILED: return "validation failed"; case MZ_ZIP_WRITE_CALLBACK_FAILED: return "write calledback failed"; default: break; } return "unknown error"; } /* Note: Just because the archive is not zip64 doesn't necessarily mean it doesn't have Zip64 extended information extra field, argh. */ mz_bool mz_zip_is_zip64(mz_zip_archive *pZip) { if ((!pZip) || (!pZip->m_pState)) return MZ_FALSE; return pZip->m_pState->m_zip64; } size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip) { if ((!pZip) || (!pZip->m_pState)) return 0; return pZip->m_pState->m_central_dir.m_size; } mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) { return pZip ? pZip->m_total_files : 0; } mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip) { if (!pZip) return 0; return pZip->m_archive_size; } mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip) { if ((!pZip) || (!pZip->m_pState)) return 0; return pZip->m_pState->m_file_archive_start_ofs; } MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip) { if ((!pZip) || (!pZip->m_pState)) return 0; return pZip->m_pState->m_pFile; } size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n) { if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return pZip->m_pRead(pZip->m_pIO_opaque, file_ofs, pBuf, n); } mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size) { mz_uint n; const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); if (!p) { if (filename_buf_size) pFilename[0] = '\0'; mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return 0; } n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); if (filename_buf_size) { n = MZ_MIN(n, filename_buf_size - 1); memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pFilename[n] = '\0'; } return n + 1; } mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat) { return mz_zip_file_stat_internal(pZip, file_index, mz_zip_get_cdh(pZip, file_index), pStat, NULL); } mz_bool mz_zip_end(mz_zip_archive *pZip) { if (!pZip) return MZ_FALSE; if (pZip->m_zip_mode == MZ_ZIP_MODE_READING) return mz_zip_reader_end(pZip); #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS else if ((pZip->m_zip_mode == MZ_ZIP_MODE_WRITING) || (pZip->m_zip_mode == MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)) return mz_zip_writer_end(pZip); #endif return MZ_FALSE; } #ifdef __cplusplus } #endif #endif /*#ifndef MINIZ_NO_ARCHIVE_APIS*/ ================================================ FILE: 3rdparty/miniz/miniz.h ================================================ /************************************************************************** * * Copyright 2013-2014 RAD Game Tools and Valve Software * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * **************************************************************************/ /* miniz.c 2.1.0 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing See "unlicense" statement at the end of this file. Rich Geldreich , last updated Oct. 13, 2013 Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). * Low-level Deflate/Inflate implementation notes: Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses approximately as well as zlib. Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory block large enough to hold the entire file. The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. * zlib-style API notes: miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in zlib replacement in many apps: The z_stream struct, optional memory allocation callbacks deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound inflateInit/inflateInit2/inflate/inflateReset/inflateEnd compress, compress2, compressBound, uncompress CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. Supports raw deflate streams or standard zlib streams with adler-32 checking. Limitations: The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but there are no guarantees that miniz.c pulls this off perfectly. * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by Alex Evans. Supports 1-4 bytes/pixel images. * ZIP archive API notes: The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to get the job done with minimal fuss. There are simple API's to retrieve file information, read files from existing archives, create new archives, append new files to existing archives, or clone archive data from one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), or you can specify custom file read/write callbacks. - Archive reading: Just call this function to read a single file from a disk archive: void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags); For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); The locate operation can optionally check file comments too, which (as one example) can be used to identify multiple versions of the same file in an archive. This function uses a simple linear search through the central directory, so it's not very fast. Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and retrieve detailed info on each file by calling mz_zip_reader_file_stat(). - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data to disk and builds an exact image of the central directory in memory. The central directory image is written all at once at the end of the archive file when the archive is finalized. The archive writer can optionally align each file's local header and file data to any power of 2 alignment, which can be useful when the archive will be read from optical media. Also, the writer supports placing arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still readable by any ZIP tool. - Archive appending: The simple way to add a single file to an archive is to call this function: mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); The archive will be created if it doesn't already exist, otherwise it'll be appended to. Note the appending is done in-place and is not an atomic operation, so if something goes wrong during the operation it's possible the archive could be left without a central directory (although the local file headers and file data will be fine, so the archive will be recoverable). For more complex archive modification scenarios: 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and you're done. This is safe but requires a bunch of temporary disk space or heap memory. 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), append new files as needed, then finalize the archive which will write an updated central directory to the original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a possibility that the archive's central directory could be lost with this method if anything goes wrong, though. - ZIP archive support limitations: No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. Requires streams capable of seeking. * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. * Important: For best perf. be sure to customize the below macros for your target platform: #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 #define MINIZ_LITTLE_ENDIAN 1 #define MINIZ_HAS_64BIT_REGISTERS 1 * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). */ #pragma once /* Defines to completely disable specific portions of miniz.c: If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */ /* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */ /*#define MINIZ_NO_STDIO */ /* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */ /* get/set file times, and the C run-time funcs that get/set times won't be called. */ /* The current downside is the times written to your archives will be from 1979. */ /*#define MINIZ_NO_TIME */ /* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */ /*#define MINIZ_NO_ARCHIVE_APIS */ /* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */ /*#define MINIZ_NO_ARCHIVE_WRITING_APIS */ /* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */ /*#define MINIZ_NO_ZLIB_APIS */ /* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */ /*#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ /* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */ /*#define MINIZ_NO_MALLOC */ #if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) /* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */ #define MINIZ_NO_TIME #endif #include #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) #include #endif #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) /* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */ #define MINIZ_X86_OR_X64_CPU 1 #else #define MINIZ_X86_OR_X64_CPU 0 #endif #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU /* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */ #define MINIZ_LITTLE_ENDIAN 1 #else #define MINIZ_LITTLE_ENDIAN 0 #endif /* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES only if not set */ #if !defined(MINIZ_USE_UNALIGNED_LOADS_AND_STORES) #if MINIZ_X86_OR_X64_CPU /* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */ #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 #define MINIZ_UNALIGNED_USE_MEMCPY #else #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 #endif #endif #if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) /* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */ #define MINIZ_HAS_64BIT_REGISTERS 1 #else #define MINIZ_HAS_64BIT_REGISTERS 0 #endif #ifdef __cplusplus extern "C" { #endif /* ------------------- zlib-style API Definitions. */ /* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */ typedef unsigned long mz_ulong; /* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */ void mz_free(void *p); #define MZ_ADLER32_INIT (1) /* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */ mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); #define MZ_CRC32_INIT (0) /* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */ mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); /* Compression strategies. */ enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; /* Method */ #define MZ_DEFLATED 8 /* Heap allocation callbacks. Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. */ typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); typedef void (*mz_free_func)(void *opaque, void *address); typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); /* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */ enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; #define MZ_VERSION "10.1.0" #define MZ_VERNUM 0xA100 #define MZ_VER_MAJOR 10 #define MZ_VER_MINOR 1 #define MZ_VER_REVISION 0 #define MZ_VER_SUBREVISION 0 #ifndef MINIZ_NO_ZLIB_APIS /* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */ enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; /* Return status codes. MZ_PARAM_ERROR is non-standard. */ enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; /* Window bits */ #define MZ_DEFAULT_WINDOW_BITS 15 struct mz_internal_state; /* Compression/decompression stream struct. */ typedef struct mz_stream_s { const unsigned char *next_in; /* pointer to next byte to read */ unsigned int avail_in; /* number of bytes available at next_in */ mz_ulong total_in; /* total number of bytes consumed so far */ unsigned char *next_out; /* pointer to next byte to write */ unsigned int avail_out; /* number of bytes that can be written to next_out */ mz_ulong total_out; /* total number of bytes produced so far */ char *msg; /* error msg (unused) */ struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */ mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */ mz_free_func zfree; /* optional heap free function (defaults to free) */ void *opaque; /* heap alloc function user pointer */ int data_type; /* data_type (unused) */ mz_ulong adler; /* adler32 of the source or uncompressed data */ mz_ulong reserved; /* not used */ } mz_stream; typedef mz_stream *mz_streamp; /* Returns the version string of miniz.c. */ const char *mz_version(void); /* mz_deflateInit() initializes a compressor with default options: */ /* Parameters: */ /* pStream must point to an initialized mz_stream struct. */ /* level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */ /* level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. */ /* (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */ /* Return values: */ /* MZ_OK on success. */ /* MZ_STREAM_ERROR if the stream is bogus. */ /* MZ_PARAM_ERROR if the input parameters are bogus. */ /* MZ_MEM_ERROR on out of memory. */ int mz_deflateInit(mz_streamp pStream, int level); /* mz_deflateInit2() is like mz_deflate(), except with more control: */ /* Additional parameters: */ /* method must be MZ_DEFLATED */ /* window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */ /* mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */ int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); /* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */ int mz_deflateReset(mz_streamp pStream); /* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. */ /* Parameters: */ /* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ /* flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */ /* Return values: */ /* MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */ /* MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */ /* MZ_STREAM_ERROR if the stream is bogus. */ /* MZ_PARAM_ERROR if one of the parameters is invalid. */ /* MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */ int mz_deflate(mz_streamp pStream, int flush); /* mz_deflateEnd() deinitializes a compressor: */ /* Return values: */ /* MZ_OK on success. */ /* MZ_STREAM_ERROR if the stream is bogus. */ int mz_deflateEnd(mz_streamp pStream); /* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */ mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); /* Single-call compression functions mz_compress() and mz_compress2(): */ /* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */ int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); /* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */ mz_ulong mz_compressBound(mz_ulong source_len); /* Initializes a decompressor. */ int mz_inflateInit(mz_streamp pStream); /* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */ /* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */ int mz_inflateInit2(mz_streamp pStream, int window_bits); /* Quickly resets a compressor without having to reallocate anything. Same as calling mz_inflateEnd() followed by mz_inflateInit()/mz_inflateInit2(). */ int mz_inflateReset(mz_streamp pStream); /* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */ /* Parameters: */ /* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ /* flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */ /* On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */ /* MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */ /* Return values: */ /* MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */ /* MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */ /* MZ_STREAM_ERROR if the stream is bogus. */ /* MZ_DATA_ERROR if the deflate stream is invalid. */ /* MZ_PARAM_ERROR if one of the parameters is invalid. */ /* MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */ /* with more input data, or with more room in the output buffer (except when using single call decompression, described above). */ int mz_inflate(mz_streamp pStream, int flush); /* Deinitializes a decompressor. */ int mz_inflateEnd(mz_streamp pStream); /* Single-call decompression. */ /* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */ int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); /* Returns a string description of the specified error code, or NULL if the error code is invalid. */ const char *mz_error(int err); /* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */ /* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */ #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES typedef unsigned char Byte; typedef unsigned int uInt; typedef mz_ulong uLong; typedef Byte Bytef; typedef uInt uIntf; typedef char charf; typedef int intf; typedef void *voidpf; typedef uLong uLongf; typedef void *voidp; typedef void *const voidpc; #define Z_NULL 0 #define Z_NO_FLUSH MZ_NO_FLUSH #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH #define Z_SYNC_FLUSH MZ_SYNC_FLUSH #define Z_FULL_FLUSH MZ_FULL_FLUSH #define Z_FINISH MZ_FINISH #define Z_BLOCK MZ_BLOCK #define Z_OK MZ_OK #define Z_STREAM_END MZ_STREAM_END #define Z_NEED_DICT MZ_NEED_DICT #define Z_ERRNO MZ_ERRNO #define Z_STREAM_ERROR MZ_STREAM_ERROR #define Z_DATA_ERROR MZ_DATA_ERROR #define Z_MEM_ERROR MZ_MEM_ERROR #define Z_BUF_ERROR MZ_BUF_ERROR #define Z_VERSION_ERROR MZ_VERSION_ERROR #define Z_PARAM_ERROR MZ_PARAM_ERROR #define Z_NO_COMPRESSION MZ_NO_COMPRESSION #define Z_BEST_SPEED MZ_BEST_SPEED #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY #define Z_FILTERED MZ_FILTERED #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY #define Z_RLE MZ_RLE #define Z_FIXED MZ_FIXED #define Z_DEFLATED MZ_DEFLATED #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS #define alloc_func mz_alloc_func #define free_func mz_free_func #define internal_state mz_internal_state #define z_stream mz_stream #define deflateInit mz_deflateInit #define deflateInit2 mz_deflateInit2 #define deflateReset mz_deflateReset #define deflate mz_deflate #define deflateEnd mz_deflateEnd #define deflateBound mz_deflateBound #define compress mz_compress #define compress2 mz_compress2 #define compressBound mz_compressBound #define inflateInit mz_inflateInit #define inflateInit2 mz_inflateInit2 #define inflateReset mz_inflateReset #define inflate mz_inflate #define inflateEnd mz_inflateEnd #define uncompress mz_uncompress #define crc32 mz_crc32 #define adler32 mz_adler32 #define MAX_WBITS 15 #define MAX_MEM_LEVEL 9 #define zError mz_error #define ZLIB_VERSION MZ_VERSION #define ZLIB_VERNUM MZ_VERNUM #define ZLIB_VER_MAJOR MZ_VER_MAJOR #define ZLIB_VER_MINOR MZ_VER_MINOR #define ZLIB_VER_REVISION MZ_VER_REVISION #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION #define zlibVersion mz_version #define zlib_version mz_version() #endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ #endif /* MINIZ_NO_ZLIB_APIS */ #ifdef __cplusplus } #endif #pragma once #include #include #include #include /* ------------------- Types and macros */ typedef unsigned char mz_uint8; typedef signed short mz_int16; typedef unsigned short mz_uint16; typedef unsigned int mz_uint32; typedef unsigned int mz_uint; typedef int64_t mz_int64; typedef uint64_t mz_uint64; typedef int mz_bool; #define MZ_FALSE (0) #define MZ_TRUE (1) /* Works around MSVC's spammy "warning C4127: conditional expression is constant" message. */ #ifdef _MSC_VER #define MZ_MACRO_END while (0, 0) #else #define MZ_MACRO_END while (0) #endif #ifdef MINIZ_NO_STDIO #define MZ_FILE void * #else #include #define MZ_FILE FILE #endif /* #ifdef MINIZ_NO_STDIO */ #ifdef MINIZ_NO_TIME typedef struct mz_dummy_time_t_tag { int m_dummy; } mz_dummy_time_t; #define MZ_TIME_T mz_dummy_time_t #else #define MZ_TIME_T time_t #endif #define MZ_ASSERT(x) assert(x) #ifdef MINIZ_NO_MALLOC #define MZ_MALLOC(x) NULL #define MZ_FREE(x) (void)x, ((void)0) #define MZ_REALLOC(p, x) NULL #else #define MZ_MALLOC(x) malloc(x) #define MZ_FREE(x) free(x) #define MZ_REALLOC(p, x) realloc(p, x) #endif #define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN #define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) #define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) #else #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) #endif #define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U)) #ifdef _MSC_VER #define MZ_FORCEINLINE __forceinline #elif defined(__GNUC__) #define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__)) #else #define MZ_FORCEINLINE inline #endif #ifdef __cplusplus extern "C" { #endif extern void *miniz_def_alloc_func(void *opaque, size_t items, size_t size); extern void miniz_def_free_func(void *opaque, void *address); extern void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size); #define MZ_UINT16_MAX (0xFFFFU) #define MZ_UINT32_MAX (0xFFFFFFFFU) #ifdef __cplusplus } #endif #pragma once #ifdef __cplusplus extern "C" { #endif /* ------------------- Low-level Compression API Definitions */ /* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */ #define TDEFL_LESS_MEMORY 0 /* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */ /* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */ enum { TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF }; /* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */ /* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */ /* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */ /* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */ /* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */ /* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */ /* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */ /* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */ /* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */ enum { TDEFL_WRITE_ZLIB_HEADER = 0x01000, TDEFL_COMPUTE_ADLER32 = 0x02000, TDEFL_GREEDY_PARSING_FLAG = 0x04000, TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, TDEFL_RLE_MATCHES = 0x10000, TDEFL_FILTER_MATCHES = 0x20000, TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 }; /* High level compression functions: */ /* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */ /* On entry: */ /* pSrc_buf, src_buf_len: Pointer and size of source block to compress. */ /* flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */ /* On return: */ /* Function returns a pointer to the compressed data, or NULL on failure. */ /* *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */ /* The caller must free() the returned block when it's no longer needed. */ void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); /* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */ /* Returns 0 on failure. */ size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); /* Compresses an image to a compressed PNG file in memory. */ /* On entry: */ /* pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */ /* The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */ /* level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */ /* If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */ /* On return: */ /* Function returns a pointer to the compressed data, or NULL on failure. */ /* *pLen_out will be set to the size of the PNG image file. */ /* The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */ void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); /* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */ typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); /* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */ mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; /* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */ #if TDEFL_LESS_MEMORY enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; #else enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; #endif /* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */ typedef enum { TDEFL_STATUS_BAD_PARAM = -2, TDEFL_STATUS_PUT_BUF_FAILED = -1, TDEFL_STATUS_OKAY = 0, TDEFL_STATUS_DONE = 1 } tdefl_status; /* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */ typedef enum { TDEFL_NO_FLUSH = 0, TDEFL_SYNC_FLUSH = 2, TDEFL_FULL_FLUSH = 3, TDEFL_FINISH = 4 } tdefl_flush; /* tdefl's compression state structure. */ typedef struct { tdefl_put_buf_func_ptr m_pPut_buf_func; void *m_pPut_buf_user; mz_uint m_flags, m_max_probes[2]; int m_greedy_parsing; mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; tdefl_status m_prev_return_status; const void *m_pIn_buf; void *m_pOut_buf; size_t *m_pIn_buf_size, *m_pOut_buf_size; tdefl_flush m_flush; const mz_uint8 *m_pSrc; size_t m_src_buf_left, m_out_buf_ofs; mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; } tdefl_compressor; /* Initializes the compressor. */ /* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */ /* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */ /* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */ /* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */ tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); /* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */ tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); /* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */ /* tdefl_compress_buffer() always consumes the entire input buffer. */ tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); mz_uint32 tdefl_get_adler32(tdefl_compressor *d); /* Create tdefl_compress() flags given zlib-style compression parameters. */ /* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */ /* window_bits may be -15 (raw deflate) or 15 (zlib) */ /* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */ mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); #ifndef MINIZ_NO_MALLOC /* Allocate the tdefl_compressor structure in C so that */ /* non-C language bindings to tdefl_ API don't need to worry about */ /* structure size and allocation mechanism. */ tdefl_compressor *tdefl_compressor_alloc(void); void tdefl_compressor_free(tdefl_compressor *pComp); #endif #ifdef __cplusplus } #endif #pragma once /* ------------------- Low-level Decompression API Definitions */ #ifdef __cplusplus extern "C" { #endif /* Decompression flags used by tinfl_decompress(). */ /* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */ /* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */ /* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */ /* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */ enum { TINFL_FLAG_PARSE_ZLIB_HEADER = 1, TINFL_FLAG_HAS_MORE_INPUT = 2, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, TINFL_FLAG_COMPUTE_ADLER32 = 8 }; /* High level decompression functions: */ /* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */ /* On entry: */ /* pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */ /* On return: */ /* Function returns a pointer to the decompressed data, or NULL on failure. */ /* *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */ /* The caller must call mz_free() on the returned block when it's no longer needed. */ void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); /* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */ /* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */ #define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); /* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */ /* Returns 1 on success or 0 on failure. */ typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; #ifndef MINIZ_NO_MALLOC /* Allocate the tinfl_decompressor structure in C so that */ /* non-C language bindings to tinfl_ API don't need to worry about */ /* structure size and allocation mechanism. */ tinfl_decompressor *tinfl_decompressor_alloc(void); void tinfl_decompressor_free(tinfl_decompressor *pDecomp); #endif /* Max size of LZ dictionary. */ #define TINFL_LZ_DICT_SIZE 32768 /* Return status. */ typedef enum { /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */ /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */ /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */ TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4, /* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */ TINFL_STATUS_BAD_PARAM = -3, /* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */ TINFL_STATUS_ADLER32_MISMATCH = -2, /* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */ TINFL_STATUS_FAILED = -1, /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */ /* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */ /* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */ TINFL_STATUS_DONE = 0, /* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */ /* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */ /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */ TINFL_STATUS_NEEDS_MORE_INPUT = 1, /* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */ /* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */ /* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */ /* so I may need to add some code to address this. */ TINFL_STATUS_HAS_MORE_OUTPUT = 2 } tinfl_status; /* Initializes the decompressor to its initial state. */ #define tinfl_init(r) \ do \ { \ (r)->m_state = 0; \ } \ MZ_MACRO_END #define tinfl_get_adler32(r) (r)->m_check_adler32 /* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */ /* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */ tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); /* Internal/private bits follow. */ enum { TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS }; typedef struct { mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; } tinfl_huff_table; #if MINIZ_HAS_64BIT_REGISTERS #define TINFL_USE_64BIT_BITBUF 1 #else #define TINFL_USE_64BIT_BITBUF 0 #endif #if TINFL_USE_64BIT_BITBUF typedef mz_uint64 tinfl_bit_buf_t; #define TINFL_BITBUF_SIZE (64) #else typedef mz_uint32 tinfl_bit_buf_t; #define TINFL_BITBUF_SIZE (32) #endif struct tinfl_decompressor_tag { mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; tinfl_bit_buf_t m_bit_buf; size_t m_dist_from_out_buf_start; tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; }; #ifdef __cplusplus } #endif #pragma once /* ------------------- ZIP archive reading/writing */ #ifndef MINIZ_NO_ARCHIVE_APIS #ifdef __cplusplus extern "C" { #endif enum { /* Note: These enums can be reduced as needed to save memory or stack space - they are pretty conservative. */ MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 512, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512 }; typedef struct { /* Central directory file index. */ mz_uint32 m_file_index; /* Byte offset of this entry in the archive's central directory. Note we currently only support up to UINT_MAX or less bytes in the central dir. */ mz_uint64 m_central_dir_ofs; /* These fields are copied directly from the zip's central dir. */ mz_uint16 m_version_made_by; mz_uint16 m_version_needed; mz_uint16 m_bit_flag; mz_uint16 m_method; #ifndef MINIZ_NO_TIME MZ_TIME_T m_time; #endif /* CRC-32 of uncompressed data. */ mz_uint32 m_crc32; /* File's compressed size. */ mz_uint64 m_comp_size; /* File's uncompressed size. Note, I've seen some old archives where directory entries had 512 bytes for their uncompressed sizes, but when you try to unpack them you actually get 0 bytes. */ mz_uint64 m_uncomp_size; /* Zip internal and external file attributes. */ mz_uint16 m_internal_attr; mz_uint32 m_external_attr; /* Entry's local header file offset in bytes. */ mz_uint64 m_local_header_ofs; /* Size of comment in bytes. */ mz_uint32 m_comment_size; /* MZ_TRUE if the entry appears to be a directory. */ mz_bool m_is_directory; /* MZ_TRUE if the entry uses encryption/strong encryption (which miniz_zip doesn't support) */ mz_bool m_is_encrypted; /* MZ_TRUE if the file is not encrypted, a patch file, and if it uses a compression method we support. */ mz_bool m_is_supported; /* Filename. If string ends in '/' it's a subdirectory entry. */ /* Guaranteed to be zero terminated, may be truncated to fit. */ char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; /* Comment field. */ /* Guaranteed to be zero terminated, may be truncated to fit. */ char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; } mz_zip_archive_file_stat; typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n); typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n); typedef mz_bool (*mz_file_needs_keepalive)(void *pOpaque); struct mz_zip_internal_state_tag; typedef struct mz_zip_internal_state_tag mz_zip_internal_state; typedef enum { MZ_ZIP_MODE_INVALID = 0, MZ_ZIP_MODE_READING = 1, MZ_ZIP_MODE_WRITING = 2, MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 } mz_zip_mode; typedef enum { MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800, MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG = 0x1000, /* if enabled, mz_zip_reader_locate_file() will be called on each file as its validated to ensure the func finds the file in the central dir (intended for testing) */ MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY = 0x2000, /* validate the local headers, but don't decompress the entire file and check the crc32 */ MZ_ZIP_FLAG_WRITE_ZIP64 = 0x4000, /* always use the zip64 file format, instead of the original zip file format with automatic switch to zip64. Use as flags parameter with mz_zip_writer_init*_v2 */ MZ_ZIP_FLAG_WRITE_ALLOW_READING = 0x8000, MZ_ZIP_FLAG_ASCII_FILENAME = 0x10000 } mz_zip_flags; typedef enum { MZ_ZIP_TYPE_INVALID = 0, MZ_ZIP_TYPE_USER, MZ_ZIP_TYPE_MEMORY, MZ_ZIP_TYPE_HEAP, MZ_ZIP_TYPE_FILE, MZ_ZIP_TYPE_CFILE, MZ_ZIP_TOTAL_TYPES } mz_zip_type; /* miniz error codes. Be sure to update mz_zip_get_error_string() if you add or modify this enum. */ typedef enum { MZ_ZIP_NO_ERROR = 0, MZ_ZIP_UNDEFINED_ERROR, MZ_ZIP_TOO_MANY_FILES, MZ_ZIP_FILE_TOO_LARGE, MZ_ZIP_UNSUPPORTED_METHOD, MZ_ZIP_UNSUPPORTED_ENCRYPTION, MZ_ZIP_UNSUPPORTED_FEATURE, MZ_ZIP_FAILED_FINDING_CENTRAL_DIR, MZ_ZIP_NOT_AN_ARCHIVE, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED, MZ_ZIP_UNSUPPORTED_MULTIDISK, MZ_ZIP_DECOMPRESSION_FAILED, MZ_ZIP_COMPRESSION_FAILED, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE, MZ_ZIP_CRC_CHECK_FAILED, MZ_ZIP_UNSUPPORTED_CDIR_SIZE, MZ_ZIP_ALLOC_FAILED, MZ_ZIP_FILE_OPEN_FAILED, MZ_ZIP_FILE_CREATE_FAILED, MZ_ZIP_FILE_WRITE_FAILED, MZ_ZIP_FILE_READ_FAILED, MZ_ZIP_FILE_CLOSE_FAILED, MZ_ZIP_FILE_SEEK_FAILED, MZ_ZIP_FILE_STAT_FAILED, MZ_ZIP_INVALID_PARAMETER, MZ_ZIP_INVALID_FILENAME, MZ_ZIP_BUF_TOO_SMALL, MZ_ZIP_INTERNAL_ERROR, MZ_ZIP_FILE_NOT_FOUND, MZ_ZIP_ARCHIVE_TOO_LARGE, MZ_ZIP_VALIDATION_FAILED, MZ_ZIP_WRITE_CALLBACK_FAILED, MZ_ZIP_TOTAL_ERRORS } mz_zip_error; typedef struct { mz_uint64 m_archive_size; mz_uint64 m_central_directory_file_ofs; /* We only support up to UINT32_MAX files in zip64 mode. */ mz_uint32 m_total_files; mz_zip_mode m_zip_mode; mz_zip_type m_zip_type; mz_zip_error m_last_error; mz_uint64 m_file_offset_alignment; mz_alloc_func m_pAlloc; mz_free_func m_pFree; mz_realloc_func m_pRealloc; void *m_pAlloc_opaque; mz_file_read_func m_pRead; mz_file_write_func m_pWrite; mz_file_needs_keepalive m_pNeeds_keepalive; void *m_pIO_opaque; mz_zip_internal_state *m_pState; } mz_zip_archive; typedef struct { mz_zip_archive *pZip; mz_uint flags; int status; #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS mz_uint file_crc32; #endif mz_uint64 read_buf_size, read_buf_ofs, read_buf_avail, comp_remaining, out_buf_ofs, cur_file_ofs; mz_zip_archive_file_stat file_stat; void *pRead_buf; void *pWrite_buf; size_t out_blk_remain; tinfl_decompressor inflator; } mz_zip_reader_extract_iter_state; /* -------- ZIP reading */ /* Inits a ZIP archive reader. */ /* These functions read and validate the archive's central directory. */ mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags); mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags); #ifndef MINIZ_NO_STDIO /* Read a archive from a disk file. */ /* file_start_ofs is the file offset where the archive actually begins, or 0. */ /* actual_archive_size is the true total size of the archive, which may be smaller than the file's actual size on disk. If zero the entire file is treated as the archive. */ mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags); mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size); /* Read an archive from an already opened FILE, beginning at the current file position. */ /* The archive is assumed to be archive_size bytes long. If archive_size is < 0, then the entire rest of the file is assumed to contain the archive. */ /* The FILE will NOT be closed when mz_zip_reader_end() is called. */ mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags); #endif /* Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. */ mz_bool mz_zip_reader_end(mz_zip_archive *pZip); /* -------- ZIP reading or writing */ /* Clears a mz_zip_archive struct to all zeros. */ /* Important: This must be done before passing the struct to any mz_zip functions. */ void mz_zip_zero_struct(mz_zip_archive *pZip); mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip); mz_zip_type mz_zip_get_type(mz_zip_archive *pZip); /* Returns the total number of files in the archive. */ mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip); mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip); MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip); /* Reads n bytes of raw archive data, starting at file offset file_ofs, to pBuf. */ size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n); /* All mz_zip funcs set the m_last_error field in the mz_zip_archive struct. These functions retrieve/manipulate this field. */ /* Note that the m_last_error functionality is not thread safe. */ mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num); mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip); mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip); mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip); const char *mz_zip_get_error_string(mz_zip_error mz_err); /* MZ_TRUE if the archive file entry is a directory entry. */ mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index); /* MZ_TRUE if the file is encrypted/strong encrypted. */ mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index); /* MZ_TRUE if the compression method is supported, and the file is not encrypted, and the file is not a compressed patch file. */ mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index); /* Retrieves the filename of an archive file entry. */ /* Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. */ mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size); /* Attempts to locates a file in the archive's central directory. */ /* Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH */ /* Returns -1 if the file cannot be found. */ int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); int mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *file_index); /* Returns detailed information about an archive file entry. */ mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat); /* MZ_TRUE if the file is in zip64 format. */ /* A file is considered zip64 if it contained a zip64 end of central directory marker, or if it contained any zip64 extended file information fields in the central directory. */ mz_bool mz_zip_is_zip64(mz_zip_archive *pZip); /* Returns the total central directory size in bytes. */ /* The current max supported size is <= MZ_UINT32_MAX. */ size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip); /* Extracts a archive file to a memory buffer using no memory allocation. */ /* There must be at least enough room on the stack to store the inflator's state (~34KB or so). */ mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); /* Extracts a archive file to a memory buffer. */ mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags); mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags); /* Extracts a archive file to a dynamically allocated heap buffer. */ /* The memory will be allocated via the mz_zip_archive's alloc/realloc functions. */ /* Returns NULL and sets the last error on failure. */ void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags); void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags); /* Extracts a archive file using a callback function to output the file's data. */ mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); /* Extract a file iteratively */ mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size); mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState); #ifndef MINIZ_NO_STDIO /* Extracts a archive file to a disk file and sets its last accessed and modified times. */ /* This function only extracts files, not archive directory records. */ mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags); mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags); /* Extracts a archive file starting at the current position in the destination FILE stream. */ mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *File, mz_uint flags); mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags); #endif #if 0 /* TODO */ typedef void *mz_zip_streaming_extract_state_ptr; mz_zip_streaming_extract_state_ptr mz_zip_streaming_extract_begin(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); uint64_t mz_zip_streaming_extract_get_size(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); uint64_t mz_zip_streaming_extract_get_cur_ofs(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); mz_bool mz_zip_streaming_extract_seek(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, uint64_t new_ofs); size_t mz_zip_streaming_extract_read(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, void *pBuf, size_t buf_size); mz_bool mz_zip_streaming_extract_end(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); #endif /* This function compares the archive's local headers, the optional local zip64 extended information block, and the optional descriptor following the compressed data vs. the data in the central directory. */ /* It also validates that each file can be successfully uncompressed unless the MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY is specified. */ mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); /* Validates an entire archive by calling mz_zip_validate_file() on each file. */ mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags); /* Misc utils/helpers, valid for ZIP reading or writing */ mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr); mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr); /* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */ mz_bool mz_zip_end(mz_zip_archive *pZip); /* -------- ZIP writing */ #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS /* Inits a ZIP archive writer. */ /*Set pZip->m_pWrite (and pZip->m_pIO_opaque) before calling mz_zip_writer_init or mz_zip_writer_init_v2*/ /*The output is streamable, i.e. file_ofs in mz_file_write_func always increases only by n*/ mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags); mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags); #ifndef MINIZ_NO_STDIO mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning); mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags); mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags); #endif /* Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. */ /* For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. */ /* For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). */ /* Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. */ /* Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before */ /* the archive is finalized the file's central directory will be hosed. */ mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename); mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); /* Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. */ /* To add a directory entry, call this method with an archive name ending in a forwardslash with an empty buffer. */ /* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags); /* Like mz_zip_writer_add_mem(), except you can specify a file comment field, and optionally supply the function with already compressed data. */ /* uncomp_size/uncomp_crc32 are only used if the MZ_ZIP_FLAG_COMPRESSED_DATA flag is specified. */ mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data_local, mz_uint user_extra_data_local_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len); /* Adds the contents of a file to an archive. This function also records the disk file's modified time into the archive. */ /* File data is supplied via a read callback function. User mz_zip_writer_add_(c)file to add a file directly.*/ mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive *pZip, const char *pArchive_name, mz_file_read_func read_callback, void* callback_opaque, mz_uint64 size_to_add, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len); #ifndef MINIZ_NO_STDIO /* Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. */ /* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); /* Like mz_zip_writer_add_file(), except the file data is read from the specified FILE stream. */ mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 size_to_add, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len); #endif /* Adds a file to an archive by fully cloning the data from another archive. */ /* This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data (it may add or modify the zip64 local header extra data field), and the optional descriptor following the compressed data. */ mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index); /* Finalizes the archive by writing the central directory records followed by the end of central directory record. */ /* After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). */ /* An archive must be manually finalized by calling this function for it to be valid. */ mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); /* Finalizes a heap archive, returning a poiner to the heap block and its size. */ /* The heap block will be allocated using the mz_zip_archive's alloc/realloc callbacks. */ mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize); /* Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. */ /* Note for the archive to be valid, it *must* have been finalized before ending (this function will not do it for you). */ mz_bool mz_zip_writer_end(mz_zip_archive *pZip); /* -------- Misc. high-level helper functions: */ /* mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. */ /* Note this is NOT a fully safe operation. If it crashes or dies in some way your archive can be left in a screwed up state (without a central directory). */ /* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ /* TODO: Perhaps add an option to leave the existing central dir in place in case the add dies? We could then truncate the file (so the old central dir would be at the end) if something goes wrong. */ mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr); /* Reads a single file from an archive into a heap block. */ /* If pComment is not NULL, only the file with the specified comment will be extracted. */ /* Returns NULL on failure. */ void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags); void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr); #endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */ #ifdef __cplusplus } #endif #endif /* MINIZ_NO_ARCHIVE_APIS */ ================================================ FILE: 3rdparty/mshadow/.gitignore ================================================ # Compiled Object files *.slo *.lo *.o # Compiled Dynamic libraries *.so *.dylib # Compiled Static libraries *.lai *.la *.a *~ doc/html doc/latex rabit dmlc-core *.db *.bak build ================================================ FILE: 3rdparty/mshadow/.travis.yml ================================================ # disable sudo to use container based build sudo: false # Use Build Matrix to do lint and build seperately env: matrix: - TASK=lint LINT_LANG=cpp - TASK=doc - TASK=build CXX=g++ # dependent apt packages addons: apt: packages: - doxygen - wget - unzip - libblas-dev - python3-pip before_install: - git clone https://github.com/dmlc/dmlc-core - export TRAVIS=dmlc-core/scripts/travis - source ${TRAVIS}/travis_setup_env.sh install: - pip3 install --upgrade pip --user - pip3 install --user cpplint pylint script: scripts/travis_script.sh before_cache: - ${TRAVIS}/travis_before_cache.sh cache: directories: - ${HOME}/.cache/usr notifications: email: on_success: change on_failure: always ================================================ FILE: 3rdparty/mshadow/CHANGES.md ================================================ Change Log ===== mshadow-1.0 ===== * Initial release mshadow-2.0: in progress ===== * Support multiple data type * Great refactoring of code * Parameter server interface for MultiGPU and distributed learning ================================================ FILE: 3rdparty/mshadow/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.13) project(mshadow C CXX) include(CMakeDependentOption) option(USE_CUDA "Build with CUDA support" ON) option(USE_CUDNN ON) cmake_dependent_option(USE_SSE "Build with x86 SSE instruction support" ON "CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL amd64" OFF) cmake_dependent_option(USE_F16C "Build with x86 F16C instruction support" ON "CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL amd64" OFF) # autodetects support if ON option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF) option(MSHADOW_IN_CXX11 ON) add_library(mshadow INTERFACE) file(GLOB_RECURSE MSHADOWSOURCE "mshadow/*.h") target_include_directories(mshadow INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}") target_sources(mshadow INTERFACE ${MSHADOWSOURCE}) if(UNIX) target_compile_options(mshadow INTERFACE "$<$,$>:-Wno-braced-scalar-init>" "$<$,$>:-Wno-pass-failed>" # TODO Replace Wno-unused-lambda-capture with [[maybe_unused]] annotation once requiring C++17 "$<$,$>:-Wno-unused-lambda-capture>" # TODO Fixing the warning leads to compile error on 4.8; fix once 4.8 support is dropped "$<$,$>:-Wno-undefined-var-template>" "$<$:-Wno-unused-parameter>" "$<$:-Wno-unknown-pragmas>" "$<$:-Wno-unused-local-typedefs>" "$<$:--expt-relaxed-constexpr>") endif() if(USE_CUDA) enable_language(CUDA) file(GLOB_RECURSE MSHADOW_CUDASOURCE "mshadow/*.cuh") target_sources(mshadow INTERFACE ${MSHADOW_CUDASOURCE}) target_compile_definitions(mshadow INTERFACE MSHADOW_USE_CUDA=1 MSHADOW_FORCE_STREAM) else() target_compile_definitions(mshadow INTERFACE MSHADOW_USE_CUDA=0) endif() if(USE_SSE) # For cross compilation, we can't rely on the compiler checks, but mshadow # will add platform specific includes not available in other arches include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-msse3" SUPPORT_MSSE3) check_cxx_compiler_flag("-msse2" SUPPORT_MSSE2) if(SUPPORT_MSSE3) target_compile_definitions(mshadow INTERFACE MSHADOW_USE_SSE) target_compile_options(mshadow INTERFACE $<$:-msse3>) elseif(SUPPORT_MSSE2) target_compile_definitions(mshadow INTERFACE MSHADOW_USE_SSE) target_compile_options(mshadow INTERFACE $<$:-msse2>) else() target_compile_definitions(mshadow INTERFACE MSHADOW_USE_SSE=0) endif() else() target_compile_definitions(mshadow INTERFACE MSHADOW_USE_SSE=0) endif() if(USE_CUDNN) target_compile_definitions(mshadow INTERFACE MSHADOW_USE_CUDNN) endif() if(USE_CUTENSOR) target_compile_definitions(mshadow INTERFACE MSHADOW_USE_CUTENSOR) endif() if(MSHADOW_IN_CXX11) target_compile_definitions(mshadow INTERFACE MSHADOW_IN_CXX11) endif() if(USE_F16C) # Determine if hardware supports F16C instruction set message(STATUS "Determining F16C support") include(cmake/AutoDetectF16C.cmake) if(SUPPORT_F16C) target_compile_options(mshadow INTERFACE $<$:-mf16c>) else() target_compile_definitions(mshadow INTERFACE MSHADOW_USE_F16C=0) endif() else() target_compile_definitions(mshadow INTERFACE MSHADOW_USE_F16C=0) endif() if(USE_INT64_TENSOR_SIZE) message(STATUS "Using 64-bit integer for tensor size") target_compile_definitions(mshadow INTERFACE MSHADOW_INT64_TENSOR_SIZE=1) else() target_compile_definitions(mshadow INTERFACE MSHADOW_INT64_TENSOR_SIZE=0) endif() set(mshadow_LINT_DIRS mshadow mshadow-ps) find_package(Python3) add_custom_target(mshadow_lint COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DPYTHON_EXECUTABLE=${Python3_EXECUTABLE} -DLINT_DIRS=${mshadow_LINT_DIRS} -DPROJECT_SOURCE_DIR=${PROJECT_SOURCE_DIR} -DPROJECT_NAME=mshadow -P ${PROJECT_SOURCE_DIR}/../dmlc-core/cmake/lint.cmake) ================================================ FILE: 3rdparty/mshadow/LICENSE ================================================ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: 3rdparty/mshadow/README.md ================================================ mshadow: Matrix Shadow ====== [![Build Status](https://travis-ci.org/dmlc/mshadow.svg?branch=master)](https://travis-ci.org/dmlc/mshadow) MShadow is a lightweight CPU/GPU Matrix/Tensor Template Library in C++/CUDA. The goal of mshadow is to support ***efficient***, ***device invariant*** and ***simple*** tensor library for machine learning project that aims for maximum performance and control, while also emphasize simplicity. MShadow also provides interface that allows writing Multi-GPU and distributed deep learning programs in an easy and unified way. * [Contributors](https://github.com/tqchen/mshadow/graphs/contributors) * [Tutorial](guide) * [Documentation](doc) * [Parameter Server Interface for GPU Tensor](guide/mshadow-ps) Features -------- * Efficient: all the expression you write will be lazily evaluated and compiled into optimized code - No temporal memory allocation will happen for expression you write - mshadow will generate specific kernel for every expression you write in compile time. * Device invariant: you can write one code and it will run on both CPU and GPU * Simple: mshadow allows you to write machine learning code using expressions. * Whitebox: put a float* into the Tensor struct and take the benefit of the package, no memory allocation is happened unless explicitly called * Lightweight library: light amount of code to support frequently used functions in machine learning * Extendable: user can write simple functions that plugs into mshadow and run on GPU/CPU, no experience in CUDA is required. * MultiGPU and Distributed ML: mshadow-ps interface allows user to write efficient MultiGPU and distributed programs in an unified way. Version ------- * This version mshadow-2.x, there are a lot of changes in the interface and it is not backward compatible with mshadow-1.0 - If you use older version of cxxnet, you will need to use the legacy mshadow code * For legacy code, refer to [Here](https://github.com/tqchen/mshadow/releases/tag/v1.1) * Change log in [CHANGES.md](CHANGES.md) Projects Using MShadow ---------------------- * [MXNet: Efficient and Flexible Distributed Deep Learning Framework](https://github.com/apache/mxnet) * [CXXNet: A lightweight C++ based deep learnig framework](https://github.com/dmlc/cxxnet) ================================================ FILE: 3rdparty/mshadow/cmake/AutoDetectF16C.cmake ================================================ # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # Determines whether hardware and compiler support F16C # instruction set # # The following are set after configuration is done: # SUPPORT_F16C if(AUTO_DETECT_F16_CMAKE_INCLUDED) return() endif() set(AUTO_DETECT_F16_CMAKE_INCLUDED True) set(SUPPORT_F16C False) if(MSVC) message("F16C instruction set is not yet supported for MSVC") return() endif() include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-mf16c" COMPILER_SUPPORT_MF16C) if(CMAKE_SYSTEM_NAME STREQUAL "Linux") execute_process(COMMAND cat /proc/cpuinfo COMMAND grep flags COMMAND grep f16c OUTPUT_VARIABLE CPU_SUPPORT_F16C) elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep F16C OUTPUT_VARIABLE CPU_SUPPORT_F16C) endif() if(NOT CPU_SUPPORT_F16C) message("CPU does not support F16C instructions") return() endif() if(CPU_SUPPORT_F16C AND COMPILER_SUPPORT_MF16C) set(SUPPORT_F16C TRUE) endif() ================================================ FILE: 3rdparty/mshadow/doc/Doxyfile ================================================ # Doxyfile 1.8.8 # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "mshadow" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify an logo or icon that is included in # the documentation. The maximum height of the logo should not exceed 55 pixels # and the maximum width should not exceed 200 pixels. Doxygen will copy the logo # to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = doc # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a # new page for each member. If set to NO, the documentation of a member will be # part of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 8 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: # FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: # Fortran. In the later case the parser tries to guess whether the code is fixed # or free formatted code, this is the default for Fortran type files), VHDL. For # instance to make doxygen treat .inc files as Fortran files (default is PHP), # and .f files as C (default is Fortran), use: inc=Fortran f=C. # # Note For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by by putting a % sign in front of the word # or globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined # locally in source files will be included in the documentation. If set to NO # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO these classes will be included in the various overviews. This option has # no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = YES # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the # todo list. This list is created by putting \todo commands in the # documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the # test list. This list is created by putting \test commands in the # documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES the list # will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO doxygen will only warn about wrong or incomplete parameter # documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = YES # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. # Note: If this tag is empty the current directory is searched. INPUT = mshadow \ mshadow-ps # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank the # following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, # *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, # *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, # *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, # *.qsf, *.as and *.js. FILE_PATTERNS = # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = *-inl.* \ utils.h \ thread_util.h \ thread.h \ kv_array.h # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = mshadow::expr::Plan* \ mshadow::expr::*Engine* # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER ) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES, then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES, then doxygen will use the # clang parser (see: http://clang.llvm.org/) for more accurate parsing at the # cost of reduced performance. This can be particularly helpful with template # rich C++ code for which doxygen's built-in parser lacks the necessary type # information. # Note: The availability of this option depends on whether or not doxygen was # compiled with the --with-libclang option. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefor more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra stylesheet files is of importance (e.g. the last # stylesheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the stylesheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to NO can help when comparing the output of multiple runs. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler ( hhc.exe). If non-empty # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( # YES) or that it should be included in the master .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated ( # YES) or a normal table of contents ( NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using prerendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://www.mathjax.org/mathjax # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /