Repository: mozilla/DeepSpeech Branch: master Commit: 6913ae817bc0 Files: 2177 Total size: 18.9 MB Directory structure: gitextract_e4csqy6p/ ├── .cardboardlint.yml ├── .compute ├── .gitattributes ├── .github/ │ ├── actions/ │ │ ├── build-tensorflow/ │ │ │ └── action.yml │ │ ├── check_artifact_exists/ │ │ │ ├── README.md │ │ │ ├── action.yml │ │ │ ├── dist/ │ │ │ │ ├── index.js │ │ │ │ └── licenses.txt │ │ │ ├── main.js │ │ │ └── package.json │ │ ├── chroot-bind-mount/ │ │ │ └── action.yml │ │ ├── get_cache_key/ │ │ │ ├── README.md │ │ │ └── action.yml │ │ ├── host-build/ │ │ │ └── action.yml │ │ ├── install-python-upstream/ │ │ │ └── action.yml │ │ ├── install-xldd/ │ │ │ └── action.yml │ │ ├── multistrap/ │ │ │ └── action.yml │ │ ├── node-build/ │ │ │ └── action.yml │ │ ├── node-install/ │ │ │ └── action.yml │ │ ├── numpy_vers/ │ │ │ ├── README.md │ │ │ └── action.yml │ │ ├── package/ │ │ │ └── action.yml │ │ ├── package-tensorflow/ │ │ │ └── action.yml │ │ ├── python-build/ │ │ │ └── action.yml │ │ ├── run-tests/ │ │ │ └── action.yml │ │ ├── select-xcode/ │ │ │ └── action.yml │ │ ├── setup-tensorflow/ │ │ │ └── action.yml │ │ └── win-install-sox/ │ │ └── action.yml │ ├── lock.yml │ └── workflows/ │ ├── .git-keep-empty-folder │ ├── build-and-test.yml │ ├── docker.yml │ └── lint.yml ├── .gitignore ├── .gitmodules ├── .isort.cfg ├── .pylintrc ├── .readthedocs.yml ├── BIBLIOGRAPHY.md ├── CODE_OF_CONDUCT.md ├── CODE_OWNERS.rst ├── CONTRIBUTING.rst ├── DeepSpeech.py ├── Dockerfile.build.tmpl ├── Dockerfile.train.tmpl ├── ISSUE_TEMPLATE.md ├── LICENSE ├── Makefile ├── README.rst ├── RELEASE.rst ├── SUPPORT.rst ├── bazel.patch ├── bin/ │ ├── README.rst │ ├── compare_samples.py │ ├── data_set_tool.py │ ├── graphdef_binary_to_text.py │ ├── import_aidatatang.py │ ├── import_aishell.py │ ├── import_ccpmf.py │ ├── import_cv.py │ ├── import_cv2.py │ ├── import_fisher.py │ ├── import_freestmandarin.py │ ├── import_gram_vaani.py │ ├── import_ldc93s1.py │ ├── import_librivox.py │ ├── import_lingua_libre.py │ ├── import_m-ailabs.py │ ├── import_magicdata.py │ ├── import_primewords.py │ ├── import_slr57.py │ ├── import_swb.py │ ├── import_swc.py │ ├── import_ted.py │ ├── import_timit.py │ ├── import_ts.py │ ├── import_tuda.py │ ├── import_vctk.py │ ├── import_voxforge.py │ ├── ops_in_graph.py │ ├── play.py │ ├── run-ci-graph_augmentations.sh │ ├── run-ci-ldc93s1_checkpoint.sh │ ├── run-ci-ldc93s1_checkpoint_bytes.sh │ ├── run-ci-ldc93s1_checkpoint_sdb.sh │ ├── run-ci-ldc93s1_new.sh │ ├── run-ci-ldc93s1_new_bytes.sh │ ├── run-ci-ldc93s1_new_bytes_tflite.sh │ ├── run-ci-ldc93s1_new_metrics.sh │ ├── run-ci-ldc93s1_new_sdb.sh │ ├── run-ci-ldc93s1_new_sdb_csv.sh │ ├── run-ci-ldc93s1_singleshotinference.sh │ ├── run-ci-ldc93s1_tflite.sh │ ├── run-ci-sample_augmentations.sh │ ├── run-ci-transfer.sh │ └── run-ldc93s1.sh ├── build-python-wheel.yml-DISABLED_ENABLE_ME_TO_REBUILD_DURING_PR ├── ci_scripts/ │ ├── aarch64-build.sh │ ├── all-utils.sh │ ├── all-vars.sh │ ├── armv7-build.sh │ ├── asserts.sh │ ├── build-utils.sh │ ├── cpp-bytes-tests.sh │ ├── cpp-tests-prod.sh │ ├── cpp-tests.sh │ ├── cpp_tflite-tests-prod.sh │ ├── cpp_tflite-tests.sh │ ├── cpp_tflite_basic-tests.sh │ ├── cppwin-tests.sh │ ├── cppwin_tflite-tests.sh │ ├── docs-requirements.txt │ ├── electronjs-tests-prod.sh │ ├── electronjs-tests.sh │ ├── electronjs_tflite-tests-prod.sh │ ├── electronjs_tflite-tests.sh │ ├── host-build.sh │ ├── node-tests-prod.sh │ ├── node-tests.sh │ ├── node_tflite-tests-prod.sh │ ├── node_tflite-tests.sh │ ├── package-utils.sh │ ├── package.sh │ ├── python-tests-prod.sh │ ├── python-tests.sh │ ├── python_tflite-tests-prod.sh │ ├── python_tflite-tests.sh │ ├── tf-build.sh │ ├── tf-package.sh │ ├── tf-setup.sh │ └── tf-vars.sh ├── data/ │ ├── README.rst │ ├── alphabet.txt │ ├── lm/ │ │ └── generate_lm.py │ ├── smoke_test/ │ │ ├── LDC93S1.txt │ │ ├── pruned_lm.bytes.scorer │ │ ├── pruned_lm.scorer │ │ ├── russian_sample_data/ │ │ │ ├── alphabet.ru │ │ │ └── ru.csv │ │ ├── vocab.pruned.bytes.txt │ │ ├── vocab.pruned.txt │ │ └── vocab.txt │ └── ted/ │ └── .gitkeep ├── doc/ │ ├── BUILDING.rst │ ├── BUILDING_DotNet.rst │ ├── C-API.rst │ ├── C-Examples.rst │ ├── Contributed-Examples.rst │ ├── Decoder.rst │ ├── DeepSpeech.rst │ ├── DotNet-API.rst │ ├── DotNet-Examples.rst │ ├── Error-Codes.rst │ ├── Flags.rst │ ├── Geometry.rst │ ├── HotWordBoosting-Examples.rst │ ├── Java-API.rst │ ├── Java-Examples.rst │ ├── Makefile │ ├── NodeJS-API.rst │ ├── NodeJS-Examples.rst │ ├── ParallelOptimization.rst │ ├── Python-API.rst │ ├── Python-Examples.rst │ ├── SUPPORTED_PLATFORMS.rst │ ├── Scorer.rst │ ├── Structs.rst │ ├── TRAINING.rst │ ├── USING.rst │ ├── conf.py │ ├── doxygen-c.conf │ ├── doxygen-dotnet.conf │ ├── doxygen-java.conf │ ├── index.rst │ └── make.bat ├── ds_generic.supp ├── ds_lib.supp ├── ds_openfst.supp ├── ds_sox.supp ├── evaluate.py ├── evaluate_tflite.py ├── examples/ │ └── README.rst ├── lm_optimizer.py ├── native_client/ │ ├── Android.mk │ ├── BUILD │ ├── CODINGSTYLE.md │ ├── Makefile │ ├── alphabet.cc │ ├── alphabet.h │ ├── args.h │ ├── bazel_workspace_status_cmd.sh │ ├── client.cc │ ├── ctcdecode/ │ │ ├── COPYING │ │ ├── LICENSE.paddlepaddle │ │ ├── LICENSE.parlance │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── build_archive.py │ │ ├── ctc_beam_search_decoder.cpp │ │ ├── ctc_beam_search_decoder.h │ │ ├── decoder_utils.cpp │ │ ├── decoder_utils.h │ │ ├── numpy.i │ │ ├── output.h │ │ ├── path_trie.cpp │ │ ├── path_trie.h │ │ ├── scorer.cpp │ │ ├── scorer.h │ │ ├── setup.cfg │ │ ├── setup.py │ │ ├── swigwrapper.i │ │ └── third_party/ │ │ ├── ThreadPool/ │ │ │ ├── COPYING │ │ │ ├── README.md │ │ │ ├── ThreadPool.h │ │ │ └── example.cpp │ │ ├── object_pool/ │ │ │ ├── README.mozilla │ │ │ ├── object_pool.h │ │ │ └── unique_ptr.h │ │ ├── openfst-1.6.7/ │ │ │ ├── AUTHORS │ │ │ ├── COPYING │ │ │ ├── INSTALL │ │ │ ├── Makefile.am │ │ │ ├── Makefile.in │ │ │ ├── NEWS │ │ │ ├── README │ │ │ ├── aclocal.m4 │ │ │ ├── ar-lib │ │ │ ├── compile │ │ │ ├── config.guess │ │ │ ├── config.h.in │ │ │ ├── config.sub │ │ │ ├── configure │ │ │ ├── configure.ac │ │ │ ├── depcomp │ │ │ ├── install-sh │ │ │ ├── ltmain.sh │ │ │ ├── m4/ │ │ │ │ ├── ac_python_devel.m4 │ │ │ │ ├── libtool.m4 │ │ │ │ ├── ltoptions.m4 │ │ │ │ ├── ltsugar.m4 │ │ │ │ ├── ltversion.m4 │ │ │ │ └── lt~obsolete.m4 │ │ │ ├── missing │ │ │ ├── src/ │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── bin/ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── fstarcsort-main.cc │ │ │ │ │ ├── fstarcsort.cc │ │ │ │ │ ├── fstclosure-main.cc │ │ │ │ │ ├── fstclosure.cc │ │ │ │ │ ├── fstcompile-main.cc │ │ │ │ │ ├── fstcompile.cc │ │ │ │ │ ├── fstcompose-main.cc │ │ │ │ │ ├── fstcompose.cc │ │ │ │ │ ├── fstconcat-main.cc │ │ │ │ │ ├── fstconcat.cc │ │ │ │ │ ├── fstconnect-main.cc │ │ │ │ │ ├── fstconnect.cc │ │ │ │ │ ├── fstconvert-main.cc │ │ │ │ │ ├── fstconvert.cc │ │ │ │ │ ├── fstdeterminize-main.cc │ │ │ │ │ ├── fstdeterminize.cc │ │ │ │ │ ├── fstdifference-main.cc │ │ │ │ │ ├── fstdifference.cc │ │ │ │ │ ├── fstdisambiguate-main.cc │ │ │ │ │ ├── fstdisambiguate.cc │ │ │ │ │ ├── fstdraw-main.cc │ │ │ │ │ ├── fstdraw.cc │ │ │ │ │ ├── fstencode-main.cc │ │ │ │ │ ├── fstencode.cc │ │ │ │ │ ├── fstepsnormalize-main.cc │ │ │ │ │ ├── fstepsnormalize.cc │ │ │ │ │ ├── fstequal-main.cc │ │ │ │ │ ├── fstequal.cc │ │ │ │ │ ├── fstequivalent-main.cc │ │ │ │ │ ├── fstequivalent.cc │ │ │ │ │ ├── fstinfo-main.cc │ │ │ │ │ ├── fstinfo.cc │ │ │ │ │ ├── fstintersect-main.cc │ │ │ │ │ ├── fstintersect.cc │ │ │ │ │ ├── fstinvert-main.cc │ │ │ │ │ ├── fstinvert.cc │ │ │ │ │ ├── fstisomorphic-main.cc │ │ │ │ │ ├── fstisomorphic.cc │ │ │ │ │ ├── fstmap-main.cc │ │ │ │ │ ├── fstmap.cc │ │ │ │ │ ├── fstminimize-main.cc │ │ │ │ │ ├── fstminimize.cc │ │ │ │ │ ├── fstprint-main.cc │ │ │ │ │ ├── fstprint.cc │ │ │ │ │ ├── fstproject-main.cc │ │ │ │ │ ├── fstproject.cc │ │ │ │ │ ├── fstprune-main.cc │ │ │ │ │ ├── fstprune.cc │ │ │ │ │ ├── fstpush-main.cc │ │ │ │ │ ├── fstpush.cc │ │ │ │ │ ├── fstrandgen-main.cc │ │ │ │ │ ├── fstrandgen.cc │ │ │ │ │ ├── fstrelabel-main.cc │ │ │ │ │ ├── fstrelabel.cc │ │ │ │ │ ├── fstreplace-main.cc │ │ │ │ │ ├── fstreplace.cc │ │ │ │ │ ├── fstreverse-main.cc │ │ │ │ │ ├── fstreverse.cc │ │ │ │ │ ├── fstreweight-main.cc │ │ │ │ │ ├── fstreweight.cc │ │ │ │ │ ├── fstrmepsilon-main.cc │ │ │ │ │ ├── fstrmepsilon.cc │ │ │ │ │ ├── fstshortestdistance-main.cc │ │ │ │ │ ├── fstshortestdistance.cc │ │ │ │ │ ├── fstshortestpath-main.cc │ │ │ │ │ ├── fstshortestpath.cc │ │ │ │ │ ├── fstsymbols-main.cc │ │ │ │ │ ├── fstsymbols.cc │ │ │ │ │ ├── fstsynchronize-main.cc │ │ │ │ │ ├── fstsynchronize.cc │ │ │ │ │ ├── fsttopsort-main.cc │ │ │ │ │ ├── fsttopsort.cc │ │ │ │ │ ├── fstunion-main.cc │ │ │ │ │ └── fstunion.cc │ │ │ │ ├── extensions/ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── compact/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── compact16_acceptor-fst.cc │ │ │ │ │ │ ├── compact16_string-fst.cc │ │ │ │ │ │ ├── compact16_unweighted-fst.cc │ │ │ │ │ │ ├── compact16_unweighted_acceptor-fst.cc │ │ │ │ │ │ ├── compact16_weighted_string-fst.cc │ │ │ │ │ │ ├── compact64_acceptor-fst.cc │ │ │ │ │ │ ├── compact64_string-fst.cc │ │ │ │ │ │ ├── compact64_unweighted-fst.cc │ │ │ │ │ │ ├── compact64_unweighted_acceptor-fst.cc │ │ │ │ │ │ ├── compact64_weighted_string-fst.cc │ │ │ │ │ │ ├── compact8_acceptor-fst.cc │ │ │ │ │ │ ├── compact8_string-fst.cc │ │ │ │ │ │ ├── compact8_unweighted-fst.cc │ │ │ │ │ │ ├── compact8_unweighted_acceptor-fst.cc │ │ │ │ │ │ └── compact8_weighted_string-fst.cc │ │ │ │ │ ├── compress/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── compress-script.cc │ │ │ │ │ │ ├── fstcompress.cc │ │ │ │ │ │ └── fstrandmod.cc │ │ │ │ │ ├── const/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── const16-fst.cc │ │ │ │ │ │ ├── const64-fst.cc │ │ │ │ │ │ └── const8-fst.cc │ │ │ │ │ ├── far/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── far-class.cc │ │ │ │ │ │ ├── farcompilestrings.cc │ │ │ │ │ │ ├── farcreate.cc │ │ │ │ │ │ ├── farequal.cc │ │ │ │ │ │ ├── farextract.cc │ │ │ │ │ │ ├── farinfo.cc │ │ │ │ │ │ ├── farisomorphic.cc │ │ │ │ │ │ ├── farprintstrings.cc │ │ │ │ │ │ ├── farscript.cc │ │ │ │ │ │ ├── getters.cc │ │ │ │ │ │ ├── script-impl.cc │ │ │ │ │ │ ├── stlist.cc │ │ │ │ │ │ ├── strings.cc │ │ │ │ │ │ └── sttable.cc │ │ │ │ │ ├── linear/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── fstlinear.cc │ │ │ │ │ │ ├── fstloglinearapply.cc │ │ │ │ │ │ ├── linear-classifier-fst.cc │ │ │ │ │ │ ├── linear-tagger-fst.cc │ │ │ │ │ │ └── linearscript.cc │ │ │ │ │ ├── lookahead/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── arc_lookahead-fst.cc │ │ │ │ │ │ ├── ilabel_lookahead-fst.cc │ │ │ │ │ │ └── olabel_lookahead-fst.cc │ │ │ │ │ ├── mpdt/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── mpdtcompose.cc │ │ │ │ │ │ ├── mpdtexpand.cc │ │ │ │ │ │ ├── mpdtinfo.cc │ │ │ │ │ │ ├── mpdtreverse.cc │ │ │ │ │ │ └── mpdtscript.cc │ │ │ │ │ ├── ngram/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── bitmap-index.cc │ │ │ │ │ │ ├── ngram-fst.cc │ │ │ │ │ │ └── nthbit.cc │ │ │ │ │ ├── pdt/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── getters.cc │ │ │ │ │ │ ├── pdtcompose.cc │ │ │ │ │ │ ├── pdtexpand.cc │ │ │ │ │ │ ├── pdtinfo.cc │ │ │ │ │ │ ├── pdtreplace.cc │ │ │ │ │ │ ├── pdtreverse.cc │ │ │ │ │ │ ├── pdtscript.cc │ │ │ │ │ │ └── pdtshortestpath.cc │ │ │ │ │ ├── python/ │ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ │ ├── Makefile.in │ │ │ │ │ │ ├── basictypes.pxd │ │ │ │ │ │ ├── fst.pxd │ │ │ │ │ │ ├── ios.pxd │ │ │ │ │ │ ├── memory.pxd │ │ │ │ │ │ ├── pywrapfst.cc │ │ │ │ │ │ ├── pywrapfst.pxd │ │ │ │ │ │ └── pywrapfst.pyx │ │ │ │ │ └── special/ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── phi-fst.cc │ │ │ │ │ ├── rho-fst.cc │ │ │ │ │ └── sigma-fst.cc │ │ │ │ ├── include/ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ └── fst/ │ │ │ │ │ ├── accumulator.h │ │ │ │ │ ├── add-on.h │ │ │ │ │ ├── arc-arena.h │ │ │ │ │ ├── arc-map.h │ │ │ │ │ ├── arc.h │ │ │ │ │ ├── arcfilter.h │ │ │ │ │ ├── arcsort.h │ │ │ │ │ ├── bi-table.h │ │ │ │ │ ├── cache.h │ │ │ │ │ ├── closure.h │ │ │ │ │ ├── compact-fst.h │ │ │ │ │ ├── compat.h │ │ │ │ │ ├── complement.h │ │ │ │ │ ├── compose-filter.h │ │ │ │ │ ├── compose.h │ │ │ │ │ ├── concat.h │ │ │ │ │ ├── config.h │ │ │ │ │ ├── config.h.in │ │ │ │ │ ├── connect.h │ │ │ │ │ ├── const-fst.h │ │ │ │ │ ├── determinize.h │ │ │ │ │ ├── dfs-visit.h │ │ │ │ │ ├── difference.h │ │ │ │ │ ├── disambiguate.h │ │ │ │ │ ├── edit-fst.h │ │ │ │ │ ├── encode.h │ │ │ │ │ ├── epsnormalize.h │ │ │ │ │ ├── equal.h │ │ │ │ │ ├── equivalent.h │ │ │ │ │ ├── expanded-fst.h │ │ │ │ │ ├── expectation-weight.h │ │ │ │ │ ├── extensions/ │ │ │ │ │ │ ├── compress/ │ │ │ │ │ │ │ ├── compress-script.h │ │ │ │ │ │ │ ├── compress.h │ │ │ │ │ │ │ ├── elias.h │ │ │ │ │ │ │ ├── gzfile.h │ │ │ │ │ │ │ └── randmod.h │ │ │ │ │ │ ├── far/ │ │ │ │ │ │ │ ├── compile-strings.h │ │ │ │ │ │ │ ├── create.h │ │ │ │ │ │ │ ├── equal.h │ │ │ │ │ │ │ ├── extract.h │ │ │ │ │ │ │ ├── far-class.h │ │ │ │ │ │ │ ├── far.h │ │ │ │ │ │ │ ├── farlib.h │ │ │ │ │ │ │ ├── farscript.h │ │ │ │ │ │ │ ├── getters.h │ │ │ │ │ │ │ ├── info.h │ │ │ │ │ │ │ ├── isomorphic.h │ │ │ │ │ │ │ ├── print-strings.h │ │ │ │ │ │ │ ├── script-impl.h │ │ │ │ │ │ │ ├── stlist.h │ │ │ │ │ │ │ └── sttable.h │ │ │ │ │ │ ├── linear/ │ │ │ │ │ │ │ ├── linear-fst-data-builder.h │ │ │ │ │ │ │ ├── linear-fst-data.h │ │ │ │ │ │ │ ├── linear-fst.h │ │ │ │ │ │ │ ├── linearscript.h │ │ │ │ │ │ │ ├── loglinear-apply.h │ │ │ │ │ │ │ └── trie.h │ │ │ │ │ │ ├── mpdt/ │ │ │ │ │ │ │ ├── compose.h │ │ │ │ │ │ │ ├── expand.h │ │ │ │ │ │ │ ├── info.h │ │ │ │ │ │ │ ├── mpdt.h │ │ │ │ │ │ │ ├── mpdtlib.h │ │ │ │ │ │ │ ├── mpdtscript.h │ │ │ │ │ │ │ ├── read_write_utils.h │ │ │ │ │ │ │ └── reverse.h │ │ │ │ │ │ ├── ngram/ │ │ │ │ │ │ │ ├── bitmap-index.h │ │ │ │ │ │ │ ├── ngram-fst.h │ │ │ │ │ │ │ └── nthbit.h │ │ │ │ │ │ ├── pdt/ │ │ │ │ │ │ │ ├── collection.h │ │ │ │ │ │ │ ├── compose.h │ │ │ │ │ │ │ ├── expand.h │ │ │ │ │ │ │ ├── getters.h │ │ │ │ │ │ │ ├── info.h │ │ │ │ │ │ │ ├── paren.h │ │ │ │ │ │ │ ├── pdt.h │ │ │ │ │ │ │ ├── pdtlib.h │ │ │ │ │ │ │ ├── pdtscript.h │ │ │ │ │ │ │ ├── replace.h │ │ │ │ │ │ │ ├── reverse.h │ │ │ │ │ │ │ └── shortest-path.h │ │ │ │ │ │ └── special/ │ │ │ │ │ │ ├── phi-fst.h │ │ │ │ │ │ ├── rho-fst.h │ │ │ │ │ │ └── sigma-fst.h │ │ │ │ │ ├── factor-weight.h │ │ │ │ │ ├── filter-state.h │ │ │ │ │ ├── flags.h │ │ │ │ │ ├── float-weight.h │ │ │ │ │ ├── fst-decl.h │ │ │ │ │ ├── fst.h │ │ │ │ │ ├── fstlib.h │ │ │ │ │ ├── generic-register.h │ │ │ │ │ ├── heap.h │ │ │ │ │ ├── icu.h │ │ │ │ │ ├── intersect.h │ │ │ │ │ ├── interval-set.h │ │ │ │ │ ├── invert.h │ │ │ │ │ ├── isomorphic.h │ │ │ │ │ ├── label-reachable.h │ │ │ │ │ ├── lexicographic-weight.h │ │ │ │ │ ├── lock.h │ │ │ │ │ ├── log.h │ │ │ │ │ ├── lookahead-filter.h │ │ │ │ │ ├── lookahead-matcher.h │ │ │ │ │ ├── map.h │ │ │ │ │ ├── mapped-file.h │ │ │ │ │ ├── matcher-fst.h │ │ │ │ │ ├── matcher.h │ │ │ │ │ ├── memory.h │ │ │ │ │ ├── minimize.h │ │ │ │ │ ├── mutable-fst.h │ │ │ │ │ ├── pair-weight.h │ │ │ │ │ ├── partition.h │ │ │ │ │ ├── power-weight.h │ │ │ │ │ ├── product-weight.h │ │ │ │ │ ├── project.h │ │ │ │ │ ├── properties.h │ │ │ │ │ ├── prune.h │ │ │ │ │ ├── push.h │ │ │ │ │ ├── queue.h │ │ │ │ │ ├── randequivalent.h │ │ │ │ │ ├── randgen.h │ │ │ │ │ ├── rational.h │ │ │ │ │ ├── register.h │ │ │ │ │ ├── relabel.h │ │ │ │ │ ├── replace-util.h │ │ │ │ │ ├── replace.h │ │ │ │ │ ├── reverse.h │ │ │ │ │ ├── reweight.h │ │ │ │ │ ├── rmepsilon.h │ │ │ │ │ ├── rmfinalepsilon.h │ │ │ │ │ ├── script/ │ │ │ │ │ │ ├── arc-class.h │ │ │ │ │ │ ├── arciterator-class.h │ │ │ │ │ │ ├── arcsort.h │ │ │ │ │ │ ├── arg-packs.h │ │ │ │ │ │ ├── closure.h │ │ │ │ │ │ ├── compile-impl.h │ │ │ │ │ │ ├── compile.h │ │ │ │ │ │ ├── compose.h │ │ │ │ │ │ ├── concat.h │ │ │ │ │ │ ├── connect.h │ │ │ │ │ │ ├── convert.h │ │ │ │ │ │ ├── decode.h │ │ │ │ │ │ ├── determinize.h │ │ │ │ │ │ ├── difference.h │ │ │ │ │ │ ├── disambiguate.h │ │ │ │ │ │ ├── draw-impl.h │ │ │ │ │ │ ├── draw.h │ │ │ │ │ │ ├── encode.h │ │ │ │ │ │ ├── encodemapper-class.h │ │ │ │ │ │ ├── epsnormalize.h │ │ │ │ │ │ ├── equal.h │ │ │ │ │ │ ├── equivalent.h │ │ │ │ │ │ ├── fst-class.h │ │ │ │ │ │ ├── fstscript-decl.h │ │ │ │ │ │ ├── fstscript.h │ │ │ │ │ │ ├── getters.h │ │ │ │ │ │ ├── info-impl.h │ │ │ │ │ │ ├── info.h │ │ │ │ │ │ ├── intersect.h │ │ │ │ │ │ ├── invert.h │ │ │ │ │ │ ├── isomorphic.h │ │ │ │ │ │ ├── map.h │ │ │ │ │ │ ├── minimize.h │ │ │ │ │ │ ├── print-impl.h │ │ │ │ │ │ ├── print.h │ │ │ │ │ │ ├── project.h │ │ │ │ │ │ ├── prune.h │ │ │ │ │ │ ├── push.h │ │ │ │ │ │ ├── randequivalent.h │ │ │ │ │ │ ├── randgen.h │ │ │ │ │ │ ├── register.h │ │ │ │ │ │ ├── relabel.h │ │ │ │ │ │ ├── replace.h │ │ │ │ │ │ ├── reverse.h │ │ │ │ │ │ ├── reweight.h │ │ │ │ │ │ ├── rmepsilon.h │ │ │ │ │ │ ├── script-impl.h │ │ │ │ │ │ ├── shortest-distance.h │ │ │ │ │ │ ├── shortest-path.h │ │ │ │ │ │ ├── stateiterator-class.h │ │ │ │ │ │ ├── synchronize.h │ │ │ │ │ │ ├── text-io.h │ │ │ │ │ │ ├── topsort.h │ │ │ │ │ │ ├── union.h │ │ │ │ │ │ ├── verify.h │ │ │ │ │ │ └── weight-class.h │ │ │ │ │ ├── set-weight.h │ │ │ │ │ ├── shortest-distance.h │ │ │ │ │ ├── shortest-path.h │ │ │ │ │ ├── signed-log-weight.h │ │ │ │ │ ├── sparse-power-weight.h │ │ │ │ │ ├── sparse-tuple-weight.h │ │ │ │ │ ├── state-map.h │ │ │ │ │ ├── state-reachable.h │ │ │ │ │ ├── state-table.h │ │ │ │ │ ├── statesort.h │ │ │ │ │ ├── string-weight.h │ │ │ │ │ ├── string.h │ │ │ │ │ ├── symbol-table-ops.h │ │ │ │ │ ├── symbol-table.h │ │ │ │ │ ├── synchronize.h │ │ │ │ │ ├── test-properties.h │ │ │ │ │ ├── topsort.h │ │ │ │ │ ├── tuple-weight.h │ │ │ │ │ ├── types.h │ │ │ │ │ ├── union-find.h │ │ │ │ │ ├── union-weight.h │ │ │ │ │ ├── union.h │ │ │ │ │ ├── util.h │ │ │ │ │ ├── vector-fst.h │ │ │ │ │ ├── verify.h │ │ │ │ │ ├── visit.h │ │ │ │ │ └── weight.h │ │ │ │ ├── lib/ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── compat.cc │ │ │ │ │ ├── flags.cc │ │ │ │ │ ├── fst-types.cc │ │ │ │ │ ├── fst.cc │ │ │ │ │ ├── mapped-file.cc │ │ │ │ │ ├── properties.cc │ │ │ │ │ ├── symbol-table-ops.cc │ │ │ │ │ ├── symbol-table.cc │ │ │ │ │ ├── util.cc │ │ │ │ │ └── weight.cc │ │ │ │ ├── script/ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── arciterator-class.cc │ │ │ │ │ ├── arcsort.cc │ │ │ │ │ ├── closure.cc │ │ │ │ │ ├── compile.cc │ │ │ │ │ ├── compose.cc │ │ │ │ │ ├── concat.cc │ │ │ │ │ ├── connect.cc │ │ │ │ │ ├── convert.cc │ │ │ │ │ ├── decode.cc │ │ │ │ │ ├── determinize.cc │ │ │ │ │ ├── difference.cc │ │ │ │ │ ├── disambiguate.cc │ │ │ │ │ ├── draw.cc │ │ │ │ │ ├── encode.cc │ │ │ │ │ ├── encodemapper-class.cc │ │ │ │ │ ├── epsnormalize.cc │ │ │ │ │ ├── equal.cc │ │ │ │ │ ├── equivalent.cc │ │ │ │ │ ├── fst-class.cc │ │ │ │ │ ├── getters.cc │ │ │ │ │ ├── info-impl.cc │ │ │ │ │ ├── info.cc │ │ │ │ │ ├── intersect.cc │ │ │ │ │ ├── invert.cc │ │ │ │ │ ├── isomorphic.cc │ │ │ │ │ ├── map.cc │ │ │ │ │ ├── minimize.cc │ │ │ │ │ ├── print.cc │ │ │ │ │ ├── project.cc │ │ │ │ │ ├── prune.cc │ │ │ │ │ ├── push.cc │ │ │ │ │ ├── randequivalent.cc │ │ │ │ │ ├── randgen.cc │ │ │ │ │ ├── relabel.cc │ │ │ │ │ ├── replace.cc │ │ │ │ │ ├── reverse.cc │ │ │ │ │ ├── reweight.cc │ │ │ │ │ ├── rmepsilon.cc │ │ │ │ │ ├── shortest-distance.cc │ │ │ │ │ ├── shortest-path.cc │ │ │ │ │ ├── stateiterator-class.cc │ │ │ │ │ ├── synchronize.cc │ │ │ │ │ ├── text-io.cc │ │ │ │ │ ├── topsort.cc │ │ │ │ │ ├── union.cc │ │ │ │ │ ├── verify.cc │ │ │ │ │ └── weight-class.cc │ │ │ │ └── test/ │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── algo_test.cc │ │ │ │ ├── algo_test.h │ │ │ │ ├── fst_test.cc │ │ │ │ ├── fst_test.h │ │ │ │ ├── rand-fst.h │ │ │ │ ├── weight-tester.h │ │ │ │ └── weight_test.cc │ │ │ └── test-driver │ │ └── openfst-1.6.9-win/ │ │ ├── .gitignore │ │ ├── AUTHORS │ │ ├── CMakeLists.txt │ │ ├── COPYING │ │ ├── INSTALL │ │ ├── Makefile.am │ │ ├── Makefile.in │ │ ├── NEWS │ │ ├── README │ │ ├── README.md │ │ ├── README.mozilla │ │ ├── aclocal.m4 │ │ ├── ar-lib │ │ ├── compile │ │ ├── config.guess │ │ ├── config.h.in │ │ ├── config.sub │ │ ├── configure │ │ ├── configure.ac │ │ ├── depcomp │ │ ├── install-sh │ │ ├── ltmain.sh │ │ ├── m4/ │ │ │ ├── ac_python_devel.m4 │ │ │ ├── libtool.m4 │ │ │ ├── ltoptions.m4 │ │ │ ├── ltsugar.m4 │ │ │ ├── ltversion.m4 │ │ │ └── lt~obsolete.m4 │ │ ├── missing │ │ ├── openfst.sln │ │ ├── package.cmd │ │ ├── src/ │ │ │ ├── CMakeLists.txt │ │ │ ├── Makefile.am │ │ │ ├── Makefile.in │ │ │ ├── bin/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── bin.vcxproj │ │ │ │ ├── fstarcsort-main.cc │ │ │ │ ├── fstarcsort.cc │ │ │ │ ├── fstclosure-main.cc │ │ │ │ ├── fstclosure.cc │ │ │ │ ├── fstcompile-main.cc │ │ │ │ ├── fstcompile.cc │ │ │ │ ├── fstcompose-main.cc │ │ │ │ ├── fstcompose.cc │ │ │ │ ├── fstconcat-main.cc │ │ │ │ ├── fstconcat.cc │ │ │ │ ├── fstconnect-main.cc │ │ │ │ ├── fstconnect.cc │ │ │ │ ├── fstconvert-main.cc │ │ │ │ ├── fstconvert.cc │ │ │ │ ├── fstdeterminize-main.cc │ │ │ │ ├── fstdeterminize.cc │ │ │ │ ├── fstdifference-main.cc │ │ │ │ ├── fstdifference.cc │ │ │ │ ├── fstdisambiguate-main.cc │ │ │ │ ├── fstdisambiguate.cc │ │ │ │ ├── fstdraw-main.cc │ │ │ │ ├── fstdraw.cc │ │ │ │ ├── fstencode-main.cc │ │ │ │ ├── fstencode.cc │ │ │ │ ├── fstepsnormalize-main.cc │ │ │ │ ├── fstepsnormalize.cc │ │ │ │ ├── fstequal-main.cc │ │ │ │ ├── fstequal.cc │ │ │ │ ├── fstequivalent-main.cc │ │ │ │ ├── fstequivalent.cc │ │ │ │ ├── fstinfo-main.cc │ │ │ │ ├── fstinfo.cc │ │ │ │ ├── fstintersect-main.cc │ │ │ │ ├── fstintersect.cc │ │ │ │ ├── fstinvert-main.cc │ │ │ │ ├── fstinvert.cc │ │ │ │ ├── fstisomorphic-main.cc │ │ │ │ ├── fstisomorphic.cc │ │ │ │ ├── fstmap-main.cc │ │ │ │ ├── fstmap.cc │ │ │ │ ├── fstminimize-main.cc │ │ │ │ ├── fstminimize.cc │ │ │ │ ├── fstprint-main.cc │ │ │ │ ├── fstprint.cc │ │ │ │ ├── fstproject-main.cc │ │ │ │ ├── fstproject.cc │ │ │ │ ├── fstprune-main.cc │ │ │ │ ├── fstprune.cc │ │ │ │ ├── fstpush-main.cc │ │ │ │ ├── fstpush.cc │ │ │ │ ├── fstrandgen-main.cc │ │ │ │ ├── fstrandgen.cc │ │ │ │ ├── fstrelabel-main.cc │ │ │ │ ├── fstrelabel.cc │ │ │ │ ├── fstreplace-main.cc │ │ │ │ ├── fstreplace.cc │ │ │ │ ├── fstreverse-main.cc │ │ │ │ ├── fstreverse.cc │ │ │ │ ├── fstreweight-main.cc │ │ │ │ ├── fstreweight.cc │ │ │ │ ├── fstrmepsilon-main.cc │ │ │ │ ├── fstrmepsilon.cc │ │ │ │ ├── fstshortestdistance-main.cc │ │ │ │ ├── fstshortestdistance.cc │ │ │ │ ├── fstshortestpath-main.cc │ │ │ │ ├── fstshortestpath.cc │ │ │ │ ├── fstsymbols-main.cc │ │ │ │ ├── fstsymbols.cc │ │ │ │ ├── fstsynchronize-main.cc │ │ │ │ ├── fstsynchronize.cc │ │ │ │ ├── fsttopsort-main.cc │ │ │ │ ├── fsttopsort.cc │ │ │ │ ├── fstunion-main.cc │ │ │ │ └── fstunion.cc │ │ │ ├── extensions/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── compact/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── compact16_acceptor-fst.cc │ │ │ │ │ ├── compact16_string-fst.cc │ │ │ │ │ ├── compact16_unweighted-fst.cc │ │ │ │ │ ├── compact16_unweighted_acceptor-fst.cc │ │ │ │ │ ├── compact16_weighted_string-fst.cc │ │ │ │ │ ├── compact64_acceptor-fst.cc │ │ │ │ │ ├── compact64_string-fst.cc │ │ │ │ │ ├── compact64_unweighted-fst.cc │ │ │ │ │ ├── compact64_unweighted_acceptor-fst.cc │ │ │ │ │ ├── compact64_weighted_string-fst.cc │ │ │ │ │ ├── compact8_acceptor-fst.cc │ │ │ │ │ ├── compact8_string-fst.cc │ │ │ │ │ ├── compact8_unweighted-fst.cc │ │ │ │ │ ├── compact8_unweighted_acceptor-fst.cc │ │ │ │ │ └── compact8_weighted_string-fst.cc │ │ │ │ ├── compress/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── compress-script.cc │ │ │ │ │ ├── fstcompress.cc │ │ │ │ │ └── fstrandmod.cc │ │ │ │ ├── const/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── const16-fst.cc │ │ │ │ │ ├── const64-fst.cc │ │ │ │ │ └── const8-fst.cc │ │ │ │ ├── far/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── far-class.cc │ │ │ │ │ ├── farcompilestrings.cc │ │ │ │ │ ├── farcreate.cc │ │ │ │ │ ├── farequal.cc │ │ │ │ │ ├── farextract.cc │ │ │ │ │ ├── farinfo.cc │ │ │ │ │ ├── farisomorphic.cc │ │ │ │ │ ├── farprintstrings.cc │ │ │ │ │ ├── farscript.cc │ │ │ │ │ ├── getters.cc │ │ │ │ │ ├── script-impl.cc │ │ │ │ │ ├── stlist.cc │ │ │ │ │ ├── strings.cc │ │ │ │ │ └── sttable.cc │ │ │ │ ├── linear/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── fstlinear.cc │ │ │ │ │ ├── fstloglinearapply.cc │ │ │ │ │ ├── linear-classifier-fst.cc │ │ │ │ │ ├── linear-tagger-fst.cc │ │ │ │ │ └── linearscript.cc │ │ │ │ ├── lookahead/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── arc_lookahead-fst.cc │ │ │ │ │ ├── ilabel_lookahead-fst.cc │ │ │ │ │ └── olabel_lookahead-fst.cc │ │ │ │ ├── mpdt/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── mpdtcompose.cc │ │ │ │ │ ├── mpdtexpand.cc │ │ │ │ │ ├── mpdtinfo.cc │ │ │ │ │ ├── mpdtreverse.cc │ │ │ │ │ └── mpdtscript.cc │ │ │ │ ├── ngram/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── bitmap-index.cc │ │ │ │ │ ├── ngram-fst.cc │ │ │ │ │ └── nthbit.cc │ │ │ │ ├── pdt/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── getters.cc │ │ │ │ │ ├── pdtcompose.cc │ │ │ │ │ ├── pdtexpand.cc │ │ │ │ │ ├── pdtinfo.cc │ │ │ │ │ ├── pdtreplace.cc │ │ │ │ │ ├── pdtreverse.cc │ │ │ │ │ ├── pdtscript.cc │ │ │ │ │ └── pdtshortestpath.cc │ │ │ │ ├── python/ │ │ │ │ │ ├── Makefile.am │ │ │ │ │ ├── Makefile.in │ │ │ │ │ ├── basictypes.pxd │ │ │ │ │ ├── fst.pxd │ │ │ │ │ ├── ios.pxd │ │ │ │ │ ├── memory.pxd │ │ │ │ │ ├── pywrapfst.cc │ │ │ │ │ ├── pywrapfst.pxd │ │ │ │ │ └── pywrapfst.pyx │ │ │ │ └── special/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── fstspecial.cc │ │ │ │ ├── phi-fst.cc │ │ │ │ ├── rho-fst.cc │ │ │ │ └── sigma-fst.cc │ │ │ ├── include/ │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ └── fst/ │ │ │ │ ├── accumulator.h │ │ │ │ ├── add-on.h │ │ │ │ ├── algo_test.h │ │ │ │ ├── arc-arena.h │ │ │ │ ├── arc-map.h │ │ │ │ ├── arc.h │ │ │ │ ├── arcfilter.h │ │ │ │ ├── arcsort.h │ │ │ │ ├── bi-table.h │ │ │ │ ├── cache.h │ │ │ │ ├── closure.h │ │ │ │ ├── collection.h │ │ │ │ ├── compact-fst.h │ │ │ │ ├── compat.h │ │ │ │ ├── complement.h │ │ │ │ ├── compose (1).h │ │ │ │ ├── compose (2).h │ │ │ │ ├── compose-filter.h │ │ │ │ ├── compose.h │ │ │ │ ├── concat.h │ │ │ │ ├── config.h │ │ │ │ ├── config.h.in │ │ │ │ ├── connect.h │ │ │ │ ├── const-fst.h │ │ │ │ ├── determinize.h │ │ │ │ ├── dfs-visit.h │ │ │ │ ├── difference.h │ │ │ │ ├── disambiguate.h │ │ │ │ ├── edit-fst.h │ │ │ │ ├── encode.h │ │ │ │ ├── epsnormalize.h │ │ │ │ ├── equal.h │ │ │ │ ├── equivalent.h │ │ │ │ ├── expand.h │ │ │ │ ├── expanded-fst.h │ │ │ │ ├── expectation-weight.h │ │ │ │ ├── extensions/ │ │ │ │ │ ├── compress/ │ │ │ │ │ │ ├── compress-script.h │ │ │ │ │ │ ├── compress.h │ │ │ │ │ │ ├── elias.h │ │ │ │ │ │ ├── gzfile.h │ │ │ │ │ │ └── randmod.h │ │ │ │ │ ├── far/ │ │ │ │ │ │ ├── compile-strings.h │ │ │ │ │ │ ├── create.h │ │ │ │ │ │ ├── equal.h │ │ │ │ │ │ ├── extract.h │ │ │ │ │ │ ├── far-class.h │ │ │ │ │ │ ├── far.h │ │ │ │ │ │ ├── farlib.h │ │ │ │ │ │ ├── farscript.h │ │ │ │ │ │ ├── getters.h │ │ │ │ │ │ ├── info.h │ │ │ │ │ │ ├── isomorphic.h │ │ │ │ │ │ ├── print-strings.h │ │ │ │ │ │ ├── script-impl.h │ │ │ │ │ │ ├── stlist.h │ │ │ │ │ │ └── sttable.h │ │ │ │ │ ├── linear/ │ │ │ │ │ │ ├── linear-fst-data-builder.h │ │ │ │ │ │ ├── linear-fst-data.h │ │ │ │ │ │ ├── linear-fst.h │ │ │ │ │ │ ├── linearscript.h │ │ │ │ │ │ ├── loglinear-apply.h │ │ │ │ │ │ └── trie.h │ │ │ │ │ ├── mpdt/ │ │ │ │ │ │ ├── compose.h │ │ │ │ │ │ ├── expand.h │ │ │ │ │ │ ├── info.h │ │ │ │ │ │ ├── mpdt.h │ │ │ │ │ │ ├── mpdtlib.h │ │ │ │ │ │ ├── mpdtscript.h │ │ │ │ │ │ ├── read_write_utils.h │ │ │ │ │ │ └── reverse.h │ │ │ │ │ ├── ngram/ │ │ │ │ │ │ ├── bitmap-index.h │ │ │ │ │ │ ├── ngram-fst.h │ │ │ │ │ │ └── nthbit.h │ │ │ │ │ ├── pdt/ │ │ │ │ │ │ ├── collection.h │ │ │ │ │ │ ├── compose.h │ │ │ │ │ │ ├── expand.h │ │ │ │ │ │ ├── getters.h │ │ │ │ │ │ ├── info.h │ │ │ │ │ │ ├── paren.h │ │ │ │ │ │ ├── pdt.h │ │ │ │ │ │ ├── pdtlib.h │ │ │ │ │ │ ├── pdtscript.h │ │ │ │ │ │ ├── replace.h │ │ │ │ │ │ ├── reverse.h │ │ │ │ │ │ └── shortest-path.h │ │ │ │ │ └── special/ │ │ │ │ │ ├── phi-fst.h │ │ │ │ │ ├── rho-fst.h │ │ │ │ │ └── sigma-fst.h │ │ │ │ ├── factor-weight.h │ │ │ │ ├── filter-state.h │ │ │ │ ├── flags.h │ │ │ │ ├── float-weight.h │ │ │ │ ├── fst-decl.h │ │ │ │ ├── fst.h │ │ │ │ ├── fstlib.h │ │ │ │ ├── generic-register.h │ │ │ │ ├── heap.h │ │ │ │ ├── icu.h │ │ │ │ ├── intersect.h │ │ │ │ ├── interval-set.h │ │ │ │ ├── invert.h │ │ │ │ ├── isomorphic.h │ │ │ │ ├── label-reachable.h │ │ │ │ ├── lexicographic-weight.h │ │ │ │ ├── linear-fst.h │ │ │ │ ├── lock.h │ │ │ │ ├── log.h │ │ │ │ ├── lookahead-filter.h │ │ │ │ ├── lookahead-matcher.h │ │ │ │ ├── map.h │ │ │ │ ├── mapped-file.h │ │ │ │ ├── matcher-fst.h │ │ │ │ ├── matcher.h │ │ │ │ ├── memory.h │ │ │ │ ├── minimize.h │ │ │ │ ├── mpdt.h │ │ │ │ ├── mutable-fst.h │ │ │ │ ├── ngram-fst.h │ │ │ │ ├── pair-weight.h │ │ │ │ ├── paren.h │ │ │ │ ├── partition.h │ │ │ │ ├── pdt.h │ │ │ │ ├── power-weight.h │ │ │ │ ├── product-weight.h │ │ │ │ ├── project.h │ │ │ │ ├── properties.h │ │ │ │ ├── prune.h │ │ │ │ ├── push.h │ │ │ │ ├── queue.h │ │ │ │ ├── randequivalent.h │ │ │ │ ├── randgen.h │ │ │ │ ├── rational.h │ │ │ │ ├── register.h │ │ │ │ ├── relabel.h │ │ │ │ ├── replace-util.h │ │ │ │ ├── replace.h │ │ │ │ ├── reverse.h │ │ │ │ ├── reweight.h │ │ │ │ ├── rmepsilon.h │ │ │ │ ├── rmfinalepsilon.h │ │ │ │ ├── script/ │ │ │ │ │ ├── arc-class.h │ │ │ │ │ ├── arciterator-class.h │ │ │ │ │ ├── arcsort.h │ │ │ │ │ ├── arg-packs.h │ │ │ │ │ ├── closure.h │ │ │ │ │ ├── compile-impl.h │ │ │ │ │ ├── compile.h │ │ │ │ │ ├── compose.h │ │ │ │ │ ├── concat.h │ │ │ │ │ ├── connect.h │ │ │ │ │ ├── convert.h │ │ │ │ │ ├── decode.h │ │ │ │ │ ├── determinize.h │ │ │ │ │ ├── difference.h │ │ │ │ │ ├── disambiguate.h │ │ │ │ │ ├── draw-impl.h │ │ │ │ │ ├── draw.h │ │ │ │ │ ├── encode.h │ │ │ │ │ ├── encodemapper-class.h │ │ │ │ │ ├── epsnormalize.h │ │ │ │ │ ├── equal.h │ │ │ │ │ ├── equivalent.h │ │ │ │ │ ├── fst-class.h │ │ │ │ │ ├── fstscript-decl.h │ │ │ │ │ ├── fstscript.h │ │ │ │ │ ├── getters.h │ │ │ │ │ ├── info-impl.h │ │ │ │ │ ├── info.h │ │ │ │ │ ├── intersect.h │ │ │ │ │ ├── invert.h │ │ │ │ │ ├── isomorphic.h │ │ │ │ │ ├── map.h │ │ │ │ │ ├── minimize.h │ │ │ │ │ ├── print-impl.h │ │ │ │ │ ├── print.h │ │ │ │ │ ├── project.h │ │ │ │ │ ├── prune.h │ │ │ │ │ ├── push.h │ │ │ │ │ ├── randequivalent.h │ │ │ │ │ ├── randgen.h │ │ │ │ │ ├── register.h │ │ │ │ │ ├── relabel.h │ │ │ │ │ ├── replace.h │ │ │ │ │ ├── reverse.h │ │ │ │ │ ├── reweight.h │ │ │ │ │ ├── rmepsilon.h │ │ │ │ │ ├── script-impl.h │ │ │ │ │ ├── shortest-distance.h │ │ │ │ │ ├── shortest-path.h │ │ │ │ │ ├── stateiterator-class.h │ │ │ │ │ ├── synchronize.h │ │ │ │ │ ├── text-io.h │ │ │ │ │ ├── topsort.h │ │ │ │ │ ├── union.h │ │ │ │ │ ├── verify.h │ │ │ │ │ └── weight-class.h │ │ │ │ ├── set-weight.h │ │ │ │ ├── shortest-distance.h │ │ │ │ ├── shortest-path.h │ │ │ │ ├── signed-log-weight.h │ │ │ │ ├── sparse-power-weight.h │ │ │ │ ├── sparse-tuple-weight.h │ │ │ │ ├── state-map.h │ │ │ │ ├── state-reachable.h │ │ │ │ ├── state-table.h │ │ │ │ ├── statesort.h │ │ │ │ ├── string-weight.h │ │ │ │ ├── string.h │ │ │ │ ├── symbol-table-ops.h │ │ │ │ ├── symbol-table.h │ │ │ │ ├── synchronize.h │ │ │ │ ├── test/ │ │ │ │ │ ├── algo_test.h │ │ │ │ │ ├── fst_test.h │ │ │ │ │ ├── rand-fst.h │ │ │ │ │ └── weight-tester.h │ │ │ │ ├── test-properties.h │ │ │ │ ├── topsort.h │ │ │ │ ├── tuple-weight.h │ │ │ │ ├── types.h │ │ │ │ ├── union-find.h │ │ │ │ ├── union-weight.h │ │ │ │ ├── union.h │ │ │ │ ├── util.h │ │ │ │ ├── vector-fst.h │ │ │ │ ├── verify.h │ │ │ │ ├── visit.h │ │ │ │ └── weight.h │ │ │ ├── lib/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── compat.cc │ │ │ │ ├── flags.cc │ │ │ │ ├── fst-types.cc │ │ │ │ ├── fst.cc │ │ │ │ ├── libfst.vcxproj │ │ │ │ ├── libfst.vcxproj.filters │ │ │ │ ├── mapped-file.cc │ │ │ │ ├── properties.cc │ │ │ │ ├── symbol-table-ops.cc │ │ │ │ ├── symbol-table.cc │ │ │ │ ├── util.cc │ │ │ │ └── weight.cc │ │ │ ├── openfst-multibin.targets │ │ │ ├── openfst.props │ │ │ ├── openfst.targets │ │ │ ├── openfst.user.props │ │ │ ├── script/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── arciterator-class.cc │ │ │ │ ├── arcsort.cc │ │ │ │ ├── closure.cc │ │ │ │ ├── compile.cc │ │ │ │ ├── compose.cc │ │ │ │ ├── concat.cc │ │ │ │ ├── connect.cc │ │ │ │ ├── convert.cc │ │ │ │ ├── decode.cc │ │ │ │ ├── determinize.cc │ │ │ │ ├── difference.cc │ │ │ │ ├── disambiguate.cc │ │ │ │ ├── draw.cc │ │ │ │ ├── encode.cc │ │ │ │ ├── encodemapper-class.cc │ │ │ │ ├── epsnormalize.cc │ │ │ │ ├── equal.cc │ │ │ │ ├── equivalent.cc │ │ │ │ ├── fst-class.cc │ │ │ │ ├── getters.cc │ │ │ │ ├── info-impl.cc │ │ │ │ ├── info.cc │ │ │ │ ├── intersect.cc │ │ │ │ ├── invert.cc │ │ │ │ ├── isomorphic.cc │ │ │ │ ├── libfstscript.vcxproj │ │ │ │ ├── libfstscript.vcxproj.filters │ │ │ │ ├── map.cc │ │ │ │ ├── minimize.cc │ │ │ │ ├── print.cc │ │ │ │ ├── project.cc │ │ │ │ ├── prune.cc │ │ │ │ ├── push.cc │ │ │ │ ├── randequivalent.cc │ │ │ │ ├── randgen.cc │ │ │ │ ├── relabel.cc │ │ │ │ ├── replace.cc │ │ │ │ ├── reverse.cc │ │ │ │ ├── reweight.cc │ │ │ │ ├── rmepsilon.cc │ │ │ │ ├── shortest-distance.cc │ │ │ │ ├── shortest-path.cc │ │ │ │ ├── stateiterator-class.cc │ │ │ │ ├── synchronize.cc │ │ │ │ ├── text-io.cc │ │ │ │ ├── topsort.cc │ │ │ │ ├── union.cc │ │ │ │ ├── verify.cc │ │ │ │ └── weight-class.cc │ │ │ └── test/ │ │ │ ├── CMakeLists.txt │ │ │ ├── Makefile.am │ │ │ ├── Makefile.in │ │ │ ├── algo_test.cc │ │ │ ├── fst_test.cc │ │ │ └── weight_test.cc │ │ └── test-driver │ ├── deepspeech.cc │ ├── deepspeech.h │ ├── deepspeech_errors.cc │ ├── definitions.mk │ ├── dotnet/ │ │ ├── .gitignore │ │ ├── DeepSpeech.sln │ │ ├── DeepSpeechClient/ │ │ │ ├── DeepSpeech.cs │ │ │ ├── DeepSpeechClient.csproj │ │ │ ├── Enums/ │ │ │ │ └── ErrorCodes.cs │ │ │ ├── Extensions/ │ │ │ │ └── NativeExtensions.cs │ │ │ ├── Interfaces/ │ │ │ │ └── IDeepSpeech.cs │ │ │ ├── Models/ │ │ │ │ ├── CandidateTranscript.cs │ │ │ │ ├── DeepSpeechStream.cs │ │ │ │ ├── Metadata.cs │ │ │ │ └── TokenMetadata.cs │ │ │ ├── NativeImp.cs │ │ │ └── Structs/ │ │ │ ├── CandidateTranscript.cs │ │ │ ├── Metadata.cs │ │ │ └── TokenMetadata.cs │ │ ├── DeepSpeechConsole/ │ │ │ ├── App.config │ │ │ ├── DeepSpeechConsole.csproj │ │ │ ├── Program.cs │ │ │ ├── Properties/ │ │ │ │ └── AssemblyInfo.cs │ │ │ └── packages.config │ │ ├── DeepSpeechWPF/ │ │ │ ├── .gitignore │ │ │ ├── App.config │ │ │ ├── App.xaml │ │ │ ├── App.xaml.cs │ │ │ ├── DeepSpeech.WPF.csproj │ │ │ ├── DeepSpeech.WPF.sln │ │ │ ├── MainWindow.xaml │ │ │ ├── MainWindow.xaml.cs │ │ │ ├── Properties/ │ │ │ │ ├── AssemblyInfo.cs │ │ │ │ ├── Resources.Designer.cs │ │ │ │ ├── Resources.resx │ │ │ │ ├── Settings.Designer.cs │ │ │ │ └── Settings.settings │ │ │ ├── ViewModels/ │ │ │ │ ├── BindableBase.cs │ │ │ │ └── MainWindowViewModel.cs │ │ │ └── packages.config │ │ └── nupkg/ │ │ ├── build/ │ │ │ ├── .gitpreserve │ │ │ └── DeepSpeech.targets │ │ ├── deepspeech.nuspec.in │ │ ├── lib/ │ │ │ ├── net45/ │ │ │ │ └── .gitpreserve │ │ │ ├── net46/ │ │ │ │ └── .gitpreserve │ │ │ └── net47/ │ │ │ └── .gitpreserve │ │ └── tools/ │ │ └── .gitpreserve │ ├── enumerate_kenlm_vocabulary.cpp │ ├── gen_workspace_status.sh │ ├── generate_scorer_package.cpp │ ├── getopt_win.h │ ├── java/ │ │ ├── .gitignore │ │ ├── .idea/ │ │ │ ├── codeStyles/ │ │ │ │ └── Project.xml │ │ │ ├── gradle.xml │ │ │ ├── misc.xml │ │ │ └── runConfigurations.xml │ │ ├── Makefile │ │ ├── README.md │ │ ├── app/ │ │ │ ├── .gitignore │ │ │ ├── build.gradle │ │ │ ├── proguard-rules.pro │ │ │ └── src/ │ │ │ ├── androidTest/ │ │ │ │ └── java/ │ │ │ │ └── org/ │ │ │ │ └── deepspeech/ │ │ │ │ └── ExampleInstrumentedTest.java │ │ │ ├── main/ │ │ │ │ ├── AndroidManifest.xml │ │ │ │ ├── java/ │ │ │ │ │ └── org/ │ │ │ │ │ └── deepspeech/ │ │ │ │ │ └── DeepSpeechActivity.java │ │ │ │ └── res/ │ │ │ │ ├── drawable/ │ │ │ │ │ └── ic_launcher_background.xml │ │ │ │ ├── drawable-v24/ │ │ │ │ │ └── ic_launcher_foreground.xml │ │ │ │ ├── layout/ │ │ │ │ │ └── activity_deep_speech.xml │ │ │ │ ├── mipmap-anydpi-v26/ │ │ │ │ │ ├── ic_launcher.xml │ │ │ │ │ └── ic_launcher_round.xml │ │ │ │ └── values/ │ │ │ │ ├── colors.xml │ │ │ │ ├── strings.xml │ │ │ │ └── styles.xml │ │ │ └── test/ │ │ │ └── java/ │ │ │ └── org/ │ │ │ └── deepspeech/ │ │ │ └── ExampleUnitTest.java │ │ ├── build.gradle │ │ ├── gradle/ │ │ │ └── wrapper/ │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ │ ├── gradle.properties │ │ ├── gradlew │ │ ├── gradlew.bat │ │ ├── jni/ │ │ │ └── deepspeech.i │ │ ├── libdeepspeech/ │ │ │ ├── .gitignore │ │ │ ├── CMakeLists.txt │ │ │ ├── build.gradle │ │ │ ├── gradle.properties │ │ │ ├── libs/ │ │ │ │ └── .gitignore │ │ │ ├── proguard-rules.pro │ │ │ └── src/ │ │ │ ├── androidTest/ │ │ │ │ └── java/ │ │ │ │ └── org/ │ │ │ │ └── deepspeech/ │ │ │ │ └── libdeepspeech/ │ │ │ │ └── test/ │ │ │ │ └── BasicTest.java │ │ │ ├── main/ │ │ │ │ ├── AndroidManifest.xml │ │ │ │ ├── java/ │ │ │ │ │ └── org/ │ │ │ │ │ └── deepspeech/ │ │ │ │ │ ├── libdeepspeech/ │ │ │ │ │ │ ├── DeepSpeechModel.java │ │ │ │ │ │ └── DeepSpeechStreamingState.java │ │ │ │ │ └── libdeepspeech_doc/ │ │ │ │ │ ├── CandidateTranscript.java │ │ │ │ │ ├── DeepSpeech_Error_Codes.java │ │ │ │ │ ├── Metadata.java │ │ │ │ │ ├── README.rst │ │ │ │ │ └── TokenMetadata.java │ │ │ │ └── res/ │ │ │ │ └── values/ │ │ │ │ └── strings.xml │ │ │ └── test/ │ │ │ └── java/ │ │ │ └── org/ │ │ │ └── deepspeech/ │ │ │ └── libdeepspeech/ │ │ │ └── ExampleUnitTest.java │ │ └── settings.gradle │ ├── javascript/ │ │ ├── Makefile │ │ ├── README.md │ │ ├── abi_crosswalk_priv.json │ │ ├── binding.gyp │ │ ├── client.ts │ │ ├── deepspeech.i │ │ ├── index.ts │ │ ├── node-pre-gyp.d.ts │ │ ├── package.json.in │ │ └── tsconfig.json │ ├── kenlm/ │ │ ├── .gitignore │ │ ├── BUILDING │ │ ├── CMakeLists.txt │ │ ├── COPYING │ │ ├── COPYING.3 │ │ ├── COPYING.LESSER.3 │ │ ├── Doxyfile │ │ ├── GIT_REVISION │ │ ├── LICENSE │ │ ├── MANIFEST.in │ │ ├── README.md │ │ ├── README.mozilla │ │ ├── clean_query_only.sh │ │ ├── cmake/ │ │ │ ├── KenLMFunctions.cmake │ │ │ └── modules/ │ │ │ └── FindEigen3.cmake │ │ ├── compile_query_only.sh │ │ ├── lm/ │ │ │ ├── CMakeLists.txt │ │ │ ├── bhiksha.cc │ │ │ ├── bhiksha.hh │ │ │ ├── binary_format.cc │ │ │ ├── binary_format.hh │ │ │ ├── blank.hh │ │ │ ├── build_binary_main.cc │ │ │ ├── common/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── compare.hh │ │ │ │ ├── joint_order.hh │ │ │ │ ├── model_buffer.cc │ │ │ │ ├── model_buffer.hh │ │ │ │ ├── model_buffer_test.cc │ │ │ │ ├── ngram.hh │ │ │ │ ├── ngram_stream.hh │ │ │ │ ├── print.cc │ │ │ │ ├── print.hh │ │ │ │ ├── renumber.cc │ │ │ │ ├── renumber.hh │ │ │ │ ├── size_option.cc │ │ │ │ ├── size_option.hh │ │ │ │ ├── special.hh │ │ │ │ └── test_data/ │ │ │ │ ├── generate.sh │ │ │ │ ├── toy0.1 │ │ │ │ ├── toy0.2 │ │ │ │ ├── toy0.3 │ │ │ │ ├── toy0.arpa │ │ │ │ ├── toy0.kenlm_intermediate │ │ │ │ ├── toy0.vocab │ │ │ │ ├── toy1.1 │ │ │ │ ├── toy1.2 │ │ │ │ ├── toy1.3 │ │ │ │ ├── toy1.arpa │ │ │ │ ├── toy1.kenlm_intermediate │ │ │ │ └── toy1.vocab │ │ │ ├── config.cc │ │ │ ├── config.hh │ │ │ ├── enumerate_vocab.hh │ │ │ ├── facade.hh │ │ │ ├── fragment_main.cc │ │ │ ├── interpolate/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── backoff_matrix.hh │ │ │ │ ├── backoff_reunification.cc │ │ │ │ ├── backoff_reunification.hh │ │ │ │ ├── backoff_reunification_test.cc │ │ │ │ ├── bounded_sequence_encoding.cc │ │ │ │ ├── bounded_sequence_encoding.hh │ │ │ │ ├── bounded_sequence_encoding_test.cc │ │ │ │ ├── interpolate_info.hh │ │ │ │ ├── interpolate_main.cc │ │ │ │ ├── merge_probabilities.cc │ │ │ │ ├── merge_probabilities.hh │ │ │ │ ├── merge_test/ │ │ │ │ │ ├── test1 │ │ │ │ │ ├── test2 │ │ │ │ │ ├── test3 │ │ │ │ │ ├── test_bad_order │ │ │ │ │ └── test_no_unk │ │ │ │ ├── merge_vocab.cc │ │ │ │ ├── merge_vocab.hh │ │ │ │ ├── merge_vocab_test.cc │ │ │ │ ├── normalize.cc │ │ │ │ ├── normalize.hh │ │ │ │ ├── normalize_test.cc │ │ │ │ ├── pipeline.cc │ │ │ │ ├── pipeline.hh │ │ │ │ ├── split_worker.cc │ │ │ │ ├── split_worker.hh │ │ │ │ ├── streaming_example_main.cc │ │ │ │ ├── tune_derivatives.cc │ │ │ │ ├── tune_derivatives.hh │ │ │ │ ├── tune_derivatives_test.cc │ │ │ │ ├── tune_instances.cc │ │ │ │ ├── tune_instances.hh │ │ │ │ ├── tune_instances_test.cc │ │ │ │ ├── tune_matrix.hh │ │ │ │ ├── tune_weights.cc │ │ │ │ ├── tune_weights.hh │ │ │ │ ├── universal_vocab.cc │ │ │ │ └── universal_vocab.hh │ │ │ ├── kenlm_benchmark_main.cc │ │ │ ├── left.hh │ │ │ ├── left_test.cc │ │ │ ├── lm_exception.cc │ │ │ ├── lm_exception.hh │ │ │ ├── max_order.hh │ │ │ ├── model.cc │ │ │ ├── model.hh │ │ │ ├── model_test.cc │ │ │ ├── model_type.hh │ │ │ ├── ngram_query.hh │ │ │ ├── partial.hh │ │ │ ├── partial_test.cc │ │ │ ├── quantize.cc │ │ │ ├── quantize.hh │ │ │ ├── query_main.cc │ │ │ ├── read_arpa.cc │ │ │ ├── read_arpa.hh │ │ │ ├── return.hh │ │ │ ├── search_hashed.cc │ │ │ ├── search_hashed.hh │ │ │ ├── search_trie.cc │ │ │ ├── search_trie.hh │ │ │ ├── sizes.cc │ │ │ ├── sizes.hh │ │ │ ├── state.hh │ │ │ ├── test.arpa │ │ │ ├── test_nounk.arpa │ │ │ ├── trie.cc │ │ │ ├── trie.hh │ │ │ ├── trie_sort.cc │ │ │ ├── trie_sort.hh │ │ │ ├── value.hh │ │ │ ├── value_build.cc │ │ │ ├── value_build.hh │ │ │ ├── virtual_interface.cc │ │ │ ├── virtual_interface.hh │ │ │ ├── vocab.cc │ │ │ ├── vocab.hh │ │ │ ├── weights.hh │ │ │ ├── word_index.hh │ │ │ └── wrappers/ │ │ │ ├── README │ │ │ ├── nplm.cc │ │ │ └── nplm.hh │ │ ├── setup.py │ │ └── util/ │ │ ├── CMakeLists.txt │ │ ├── bit_packing.cc │ │ ├── bit_packing.hh │ │ ├── bit_packing_test.cc │ │ ├── cat_compressed_main.cc │ │ ├── double-conversion/ │ │ │ ├── CMakeLists.txt │ │ │ ├── LICENSE │ │ │ ├── bignum-dtoa.cc │ │ │ ├── bignum-dtoa.h │ │ │ ├── bignum.cc │ │ │ ├── bignum.h │ │ │ ├── cached-powers.cc │ │ │ ├── cached-powers.h │ │ │ ├── diy-fp.cc │ │ │ ├── diy-fp.h │ │ │ ├── double-conversion.cc │ │ │ ├── double-conversion.h │ │ │ ├── fast-dtoa.cc │ │ │ ├── fast-dtoa.h │ │ │ ├── fixed-dtoa.cc │ │ │ ├── fixed-dtoa.h │ │ │ ├── ieee.h │ │ │ ├── strtod.cc │ │ │ ├── strtod.h │ │ │ └── utils.h │ │ ├── ersatz_progress.cc │ │ ├── ersatz_progress.hh │ │ ├── exception.cc │ │ ├── exception.hh │ │ ├── fake_ostream.hh │ │ ├── file.cc │ │ ├── file.hh │ │ ├── file_piece.cc │ │ ├── file_piece.hh │ │ ├── file_piece_test.cc │ │ ├── file_stream.hh │ │ ├── fixed_array.hh │ │ ├── float_to_string.cc │ │ ├── float_to_string.hh │ │ ├── have.hh │ │ ├── integer_to_string.cc │ │ ├── integer_to_string.hh │ │ ├── integer_to_string_test.cc │ │ ├── joint_sort.hh │ │ ├── joint_sort_test.cc │ │ ├── mmap.cc │ │ ├── mmap.hh │ │ ├── multi_intersection.hh │ │ ├── multi_intersection_test.cc │ │ ├── murmur_hash.cc │ │ ├── murmur_hash.hh │ │ ├── parallel_read.cc │ │ ├── parallel_read.hh │ │ ├── pcqueue.hh │ │ ├── pcqueue_test.cc │ │ ├── pool.cc │ │ ├── pool.hh │ │ ├── probing_hash_table.hh │ │ ├── probing_hash_table_benchmark_main.cc │ │ ├── probing_hash_table_test.cc │ │ ├── proxy_iterator.hh │ │ ├── read_compressed.cc │ │ ├── read_compressed.hh │ │ ├── read_compressed_test.cc │ │ ├── scoped.cc │ │ ├── scoped.hh │ │ ├── sized_iterator.hh │ │ ├── sized_iterator_test.cc │ │ ├── sorted_uniform.hh │ │ ├── sorted_uniform_test.cc │ │ ├── spaces.cc │ │ ├── spaces.hh │ │ ├── string_piece.cc │ │ ├── string_piece.hh │ │ ├── string_piece_hash.hh │ │ ├── string_stream.hh │ │ ├── string_stream_test.cc │ │ ├── thread_pool.hh │ │ ├── tokenize_piece.hh │ │ ├── tokenize_piece_test.cc │ │ ├── usage.cc │ │ └── usage.hh │ ├── modelstate.cc │ ├── modelstate.h │ ├── multistrap_armbian64_buster.conf │ ├── multistrap_raspbian_buster.conf │ ├── python/ │ │ ├── Makefile │ │ ├── README.rst │ │ ├── __init__.py │ │ ├── client.py │ │ ├── impl.i │ │ ├── numpy.i │ │ ├── setup.cfg │ │ └── setup.py │ ├── swift/ │ │ ├── .gitignore │ │ ├── deepspeech-ios.podspec │ │ ├── deepspeech_ios/ │ │ │ ├── DeepSpeech.swift │ │ │ ├── Info.plist │ │ │ ├── deepspeech_ios.h │ │ │ └── deepspeech_ios.modulemap │ │ ├── deepspeech_ios.xcodeproj/ │ │ │ ├── project.pbxproj │ │ │ ├── project.xcworkspace/ │ │ │ │ ├── contents.xcworkspacedata │ │ │ │ └── xcshareddata/ │ │ │ │ └── IDEWorkspaceChecks.plist │ │ │ └── xcshareddata/ │ │ │ └── xcschemes/ │ │ │ └── deepspeech_ios.xcscheme │ │ ├── deepspeech_ios.xcworkspace/ │ │ │ ├── contents.xcworkspacedata │ │ │ └── xcshareddata/ │ │ │ ├── IDEWorkspaceChecks.plist │ │ │ └── WorkspaceSettings.xcsettings │ │ ├── deepspeech_ios_test/ │ │ │ ├── AppDelegate.swift │ │ │ ├── Assets.xcassets/ │ │ │ │ ├── AppIcon.appiconset/ │ │ │ │ │ └── Contents.json │ │ │ │ └── Contents.json │ │ │ ├── AudioContext.swift │ │ │ ├── Base.lproj/ │ │ │ │ └── LaunchScreen.storyboard │ │ │ ├── ContentView.swift │ │ │ ├── Info.plist │ │ │ ├── Preview Content/ │ │ │ │ └── Preview Assets.xcassets/ │ │ │ │ └── Contents.json │ │ │ ├── SceneDelegate.swift │ │ │ └── SpeechRecognitionImpl.swift │ │ ├── deepspeech_ios_test.xcodeproj/ │ │ │ ├── project.pbxproj │ │ │ ├── project.xcworkspace/ │ │ │ │ ├── contents.xcworkspacedata │ │ │ │ └── xcshareddata/ │ │ │ │ └── IDEWorkspaceChecks.plist │ │ │ └── xcshareddata/ │ │ │ └── xcschemes/ │ │ │ └── deepspeech_ios_test.xcscheme │ │ ├── deepspeech_ios_testTests/ │ │ │ ├── Info.plist │ │ │ └── deepspeech_ios_testTests.swift │ │ └── deepspeech_ios_testUITests/ │ │ ├── Info.plist │ │ └── deepspeech_ios_testUITests.swift │ ├── test/ │ │ └── concurrent_streams.py │ ├── tflitemodelstate.cc │ ├── tflitemodelstate.h │ ├── tfmodelstate.cc │ ├── tfmodelstate.h │ ├── trie_load.cc │ ├── workspace_status.h │ └── xldd ├── parse_valgrind_suppressions.sh ├── requirements_eval_tflite.txt ├── requirements_tests.txt ├── requirements_transcribe.txt ├── setup.py ├── stats.py ├── taskcluster/ │ ├── .build.yml │ ├── .shared.yml │ ├── README.rst │ ├── android-apk-build.sh │ ├── android-apk-package.sh │ ├── android-arm64-cpu-dbg.yml │ ├── android-arm64-cpu-opt.yml │ ├── android-armv7-cpu-dbg.yml │ ├── android-armv7-cpu-opt.yml │ ├── android-build-dbg.sh │ ├── android-build.sh │ ├── android-cache-arm64-v8a-android-24.yml │ ├── android-cache-arm64-v8a-android-25.yml │ ├── android-cache-armeabi-v7a-android-24.yml │ ├── android-cache-armeabi-v7a-android-25.yml │ ├── android-cache-sdk-android-27.yml │ ├── android-cache-x86_64-android-24.yml │ ├── android-cache-x86_64-android-25.yml │ ├── android-cache-x86_64-android-26.yml │ ├── android-cache-x86_64-android-28.yml │ ├── android-cache-x86_64-android-29.yml │ ├── android-cache-x86_64-android-30.yml │ ├── android-java-opt.yml │ ├── android-package.sh │ ├── android-x86_64-cpu-dbg.yml │ ├── android-x86_64-cpu-opt.yml │ ├── android_cache-build.sh │ ├── android_cache-package.sh │ ├── arm64-build-dbg.sh │ ├── arm64-build.sh │ ├── build-python-wheel.tyml │ ├── cuda-build-dbg.sh │ ├── cuda-build.sh │ ├── darwin-amd64-cpu-opt.yml │ ├── darwin-amd64-ctc-opt.yml │ ├── darwin-amd64-tflite-opt.yml │ ├── darwin-opt-base.tyml │ ├── decoder-build.sh │ ├── decoder-package.sh │ ├── docker-build-base.tyml │ ├── docker-image-build.yml │ ├── docker-image-train.yml │ ├── docs-build.sh │ ├── docs-package.sh │ ├── docs.tyml │ ├── docs.yml │ ├── examples-base.tyml │ ├── examples-electronjs.yml │ ├── examples-ffmpeg_vad_streaming-node10.yml │ ├── examples-ffmpeg_vad_streaming-node12.yml │ ├── examples-mic_vad_streaming-py36.yml │ ├── examples-mic_vad_streaming-py37.yml │ ├── examples-mic_vad_streaming-py38.yml │ ├── examples-mic_vad_streaming-py39.yml.DISABLED_UNTIL_SCIPY_PY39 │ ├── examples-nodejs_wav-node10.yml │ ├── examples-nodejs_wav-node12.yml │ ├── examples-vad_transcriber-py35.yml │ ├── examples-vad_transcriber-py36.yml │ ├── examples-vad_transcriber-py37.yml │ ├── examples-vad_transcriber-py38.yml │ ├── examples-vad_transcriber-py39.yml │ ├── generic_tc_caching-darwin-opt-base.tyml │ ├── generic_tc_caching-linux-opt-base.tyml │ ├── generic_tc_caching-win-opt-base.tyml │ ├── gradle-build.sh │ ├── gradle-cache.yml │ ├── gradle-package.sh │ ├── homebrew-build.sh │ ├── homebrew-package.sh │ ├── homebrew_builds-darwin-amd64.yml │ ├── homebrew_tests-darwin-amd64.yml │ ├── host-build-dbg.sh │ ├── host-build.sh │ ├── ios-arm64-tflite-opt.yml │ ├── ios-build.sh │ ├── ios-package.sh │ ├── ios-x86_64-tflite-opt.yml │ ├── kenlm_android-arm64-cpu-opt.yml │ ├── kenlm_android-armv7-cpu-opt.yml │ ├── kenlm_android-x86_64-cpu-opt.yml │ ├── kenlm_darwin-amd64-cpu-opt.yml │ ├── kenlm_linux-amd64-cpu-opt.yml │ ├── kenlm_linux-arm64-cpu-opt.yml │ ├── kenlm_linux-rpi3-cpu-opt.yml │ ├── kenlm_multistrap_arm64_buster.conf │ ├── kenlm_multistrap_rpi3_buster.conf │ ├── kenlm_tc-build.sh │ ├── kenlm_tc-package.sh │ ├── kenlm_tc-setup.sh │ ├── kenlm_win-amd64-cpu-opt.yml.DISABLED │ ├── linux-amd64-cpu-dbg.yml │ ├── linux-amd64-cpu-opt.yml │ ├── linux-amd64-ctc-opt.yml │ ├── linux-amd64-gpu-dbg.yml.DISABLED │ ├── linux-amd64-gpu-opt.yml │ ├── linux-amd64-tflite-dbg.yml │ ├── linux-amd64-tflite-opt.yml │ ├── linux-arm64-cpu-dbg.yml │ ├── linux-arm64-cpu-opt.yml │ ├── linux-opt-base.tyml │ ├── linux-rpi3-cpu-dbg.yml │ ├── linux-rpi3-cpu-opt.yml │ ├── node-build.sh │ ├── node-gyp-cache.yml │ ├── node-gyp-package.sh │ ├── node-gyp-populate.sh │ ├── node-package-cpu.yml │ ├── node-package-gpu.yml │ ├── node-package-opt-base.tyml │ ├── node-package-tflite.yml │ ├── node-package.sh │ ├── package.sh │ ├── pyenv-build.sh │ ├── pyenv-darwin-amd64.yml │ ├── pyenv-linux-amd64.yml │ ├── pyenv-package.sh │ ├── pyenv-win-amd64.yml │ ├── rpi3-build-dbg.sh │ ├── rpi3-build.sh │ ├── scriptworker-task-github.yml │ ├── scriptworker-task-jcenter.yml │ ├── scriptworker-task-npm.yml │ ├── scriptworker-task-nuget.yml │ ├── scriptworker-task-pypi.yml │ ├── scriptworker-task-readthedocs.yml │ ├── simple-task.tyml │ ├── swig-darwin-amd64.yml │ ├── swig-linux-amd64.yml │ ├── swig-win-amd64.yml │ ├── tc-all-utils.sh │ ├── tc-all-vars.sh │ ├── tc-android-apk-tests.sh │ ├── tc-android-ds-tests.sh │ ├── tc-android-utils.sh │ ├── tc-asserts.sh │ ├── tc-augmentation-tests.sh │ ├── tc-build-utils.sh │ ├── tc-cpp-bytes-ds-tests.sh │ ├── tc-cpp-ds-tests-prod.sh │ ├── tc-cpp-ds-tests.sh │ ├── tc-cpp_tflite-ds-tests-prod.sh │ ├── tc-cpp_tflite-ds-tests.sh │ ├── tc-cpp_tflite_basic-ds-tests.sh │ ├── tc-cppwin-ds-tests.sh │ ├── tc-decision.py │ ├── tc-decision_reqs.txt │ ├── tc-dotnet-utils.sh │ ├── tc-electron-tests.sh │ ├── tc-electron_tflite-tests.sh │ ├── tc-evaluate_tflite.sh │ ├── tc-netframework-ds-tests.sh │ ├── tc-node-tests-prod.sh │ ├── tc-node-tests.sh │ ├── tc-node-utils.sh │ ├── tc-node_tflite-tests-prod.sh │ ├── tc-node_tflite-tests.sh │ ├── tc-package.sh │ ├── tc-py-utils.sh │ ├── tc-python-tests-prod.sh │ ├── tc-python-tests.sh │ ├── tc-python_tflite-tests-prod.sh │ ├── tc-python_tflite-tests.sh │ ├── tc-schedule.sh │ ├── tc-scorer-tests.sh │ ├── tc-single-shot-inference.sh │ ├── tc-tests-utils.sh │ ├── tc-train-extra-tests.sh │ ├── tc-train-tests.sh │ ├── tc-train-unittests.sh │ ├── tc-transcribe-tests.sh │ ├── tc-transfer-tests.sh │ ├── tc-true.sh │ ├── tc-update-index.sh │ ├── tc-valgrind-cpp.sh │ ├── tc-valgrind-cpp_tflite.sh │ ├── tc-valgrind-utils.sh │ ├── test-android-opt-base.tyml │ ├── test-apk-android-24-x86_64-opt.yml │ ├── test-apk-android-25-x86_64-opt.yml │ ├── test-apk-android-26-x86_64-opt.yml │ ├── test-apk-android-28-x86_64-opt.yml │ ├── test-apk-android-29-x86_64-opt.yml │ ├── test-apk-android-30-x86_64-opt.yml │ ├── test-armbian-opt-base.tyml │ ├── test-augmentations-linux-amd64-py36m-opt.yml │ ├── test-cpp-android-24-arm64-opt.yml │ ├── test-cpp-android-24-armv7-opt.yml │ ├── test-cpp-android-25-arm64-opt.yml │ ├── test-cpp-android-25-armv7-opt.yml │ ├── test-cpp-linux-amd64-prod_pbmodel-opt.yml │ ├── test-cpp_16k-armbian-arm64-opt.yml │ ├── test-cpp_16k-darwin-amd64-opt.yml │ ├── test-cpp_16k-linux-amd64-opt.yml │ ├── test-cpp_16k-raspbian-rpi3-opt.yml │ ├── test-cpp_16k-win-amd64-opt.yml │ ├── test-cpp_16k-win-cuda-opt.yml │ ├── test-cpp_16k_bytes-darwin-amd64-opt.yml │ ├── test-cpp_16k_bytes-linux-amd64-opt.yml │ ├── test-cpp_16k_tflite-darwin-amd64-opt.yml │ ├── test-cpp_16k_tflite-linux-amd64-opt.yml │ ├── test-cpp_16k_tflite-win-amd64-opt.yml │ ├── test-cpp_8k-linux-amd64-opt.yml │ ├── test-cpp_8k_tflite-linux-amd64-opt.yml │ ├── test-cpp_basic_tflite_valgrind-linux-amd64-dbg.yml │ ├── test-cpp_basic_valgrind-linux-amd64-dbg.yml │ ├── test-cpp_metadata_tflite_valgrind-linux-amd64-dbg.yml │ ├── test-cpp_metadata_valgrind-linux-amd64-dbg.yml │ ├── test-cpp_tflite-linux-amd64-prod-opt.yml │ ├── test-darwin-opt-base.tyml │ ├── test-electronjs_v10.0-darwin-amd64-opt.yml │ ├── test-electronjs_v10.0-win-amd64-opt.yml │ ├── test-electronjs_v10.0_16k-linux-amd64-opt.yml │ ├── test-electronjs_v10.0_8k-linux-amd64-opt.yml │ ├── test-electronjs_v10.0_multiarchpkg-win-amd64-opt.yml │ ├── test-electronjs_v10.0_multiarchpkg-win-cuda-opt.yml │ ├── test-electronjs_v10.0_multiarchpkg-win-tflite-opt.yml │ ├── test-electronjs_v10.1-darwin-amd64-opt.yml │ ├── test-electronjs_v10.1-win-amd64-opt.yml │ ├── test-electronjs_v10.1_16k-linux-amd64-opt.yml │ ├── test-electronjs_v10.1_8k-linux-amd64-opt.yml │ ├── test-electronjs_v10.1_multiarchpkg-win-amd64-opt.yml │ ├── test-electronjs_v10.1_multiarchpkg-win-cuda-opt.yml │ ├── test-electronjs_v10.1_multiarchpkg-win-tflite-opt.yml │ ├── test-electronjs_v11.0-darwin-amd64-opt.yml │ ├── test-electronjs_v11.0-win-amd64-opt.yml │ ├── test-electronjs_v11.0_16k-linux-amd64-opt.yml │ ├── test-electronjs_v11.0_8k-linux-amd64-opt.yml │ ├── test-electronjs_v11.0_multiarchpkg-win-amd64-opt.yml │ ├── test-electronjs_v11.0_multiarchpkg-win-cuda-opt.yml │ ├── test-electronjs_v11.0_multiarchpkg-win-tflite-opt.yml │ ├── test-electronjs_v12.0-darwin-amd64-opt.yml │ ├── test-electronjs_v12.0-win-amd64-opt.yml │ ├── test-electronjs_v12.0_16k-linux-amd64-opt.yml │ ├── test-electronjs_v12.0_8k-linux-amd64-opt.yml │ ├── test-electronjs_v12.0_multiarchpkg-win-amd64-opt.yml │ ├── test-electronjs_v12.0_multiarchpkg-win-cuda-opt.yml │ ├── test-electronjs_v12.0_multiarchpkg-win-tflite-opt.yml │ ├── test-electronjs_v5.0-darwin-amd64-opt.yml │ ├── test-electronjs_v5.0-win-amd64-opt.yml │ ├── test-electronjs_v5.0_16k-linux-amd64-opt.yml │ ├── test-electronjs_v5.0_8k-linux-amd64-opt.yml │ ├── test-electronjs_v6.0-darwin-amd64-opt.yml │ ├── test-electronjs_v6.0-win-amd64-opt.yml │ ├── test-electronjs_v6.0_16k-linux-amd64-opt.yml │ ├── test-electronjs_v6.0_8k-linux-amd64-opt.yml │ ├── test-electronjs_v6.1-darwin-amd64-opt.yml │ ├── test-electronjs_v6.1-win-amd64-opt.yml │ ├── test-electronjs_v6.1_16k-linux-amd64-opt.yml │ ├── test-electronjs_v6.1_8k-linux-amd64-opt.yml │ ├── test-electronjs_v7.0-darwin-amd64-opt.yml │ ├── test-electronjs_v7.0-win-amd64-opt.yml │ ├── test-electronjs_v7.0_16k-linux-amd64-opt.yml │ ├── test-electronjs_v7.0_8k-linux-amd64-opt.yml │ ├── test-electronjs_v7.1-darwin-amd64-opt.yml │ ├── test-electronjs_v7.1-win-amd64-opt.yml │ ├── test-electronjs_v7.1_16k-linux-amd64-opt.yml │ ├── test-electronjs_v7.1_8k-linux-amd64-opt.yml │ ├── test-electronjs_v8.0-darwin-amd64-opt.yml │ ├── test-electronjs_v8.0-win-amd64-opt.yml │ ├── test-electronjs_v8.0_16k-linux-amd64-opt.yml │ ├── test-electronjs_v8.0_8k-linux-amd64-opt.yml │ ├── test-electronjs_v8.0_multiarchpkg-win-amd64-opt.yml │ ├── test-electronjs_v8.0_multiarchpkg-win-cuda-opt.yml │ ├── test-electronjs_v8.0_multiarchpkg-win-tflite-opt.yml │ ├── test-electronjs_v9.0-darwin-amd64-opt.yml │ ├── test-electronjs_v9.0-win-amd64-opt.yml │ ├── test-electronjs_v9.0_16k-linux-amd64-opt.yml │ ├── test-electronjs_v9.0_8k-linux-amd64-opt.yml │ ├── test-electronjs_v9.0_multiarchpkg-win-amd64-opt.yml │ ├── test-electronjs_v9.0_multiarchpkg-win-cuda-opt.yml │ ├── test-electronjs_v9.0_multiarchpkg-win-tflite-opt.yml │ ├── test-electronjs_v9.1-darwin-amd64-opt.yml │ ├── test-electronjs_v9.1-win-amd64-opt.yml │ ├── test-electronjs_v9.1_16k-linux-amd64-opt.yml │ ├── test-electronjs_v9.1_8k-linux-amd64-opt.yml │ ├── test-electronjs_v9.1_multiarchpkg-win-amd64-opt.yml │ ├── test-electronjs_v9.1_multiarchpkg-win-cuda-opt.yml │ ├── test-electronjs_v9.1_multiarchpkg-win-tflite-opt.yml │ ├── test-electronjs_v9.2-darwin-amd64-opt.yml │ ├── test-electronjs_v9.2-win-amd64-opt.yml │ ├── test-electronjs_v9.2_16k-linux-amd64-opt.yml │ ├── test-electronjs_v9.2_8k-linux-amd64-opt.yml │ ├── test-electronjs_v9.2_multiarchpkg-win-amd64-opt.yml │ ├── test-electronjs_v9.2_multiarchpkg-win-cuda-opt.yml │ ├── test-electronjs_v9.2_multiarchpkg-win-tflite-opt.yml │ ├── test-evaluate_tflite-linux-amd64-py36m-opt.yml │ ├── test-generate_scorer-android-24-arm64-opt.yml │ ├── test-generate_scorer-android-24-armv7-opt.yml │ ├── test-generate_scorer-android-24-x86_64-opt.yml │ ├── test-generate_scorer-android-25-arm64-opt.yml │ ├── test-generate_scorer-android-25-armv7-opt.yml │ ├── test-generate_scorer-android-25-x86_64-opt.yml │ ├── test-generate_scorer-android-26-x86_64-opt.yml │ ├── test-generate_scorer-android-28-x86_64-opt.yml │ ├── test-generate_scorer-android-29-x86_64-opt.yml │ ├── test-generate_scorer-android-30-x86_64-opt.yml │ ├── test-generate_scorer-darwin-amd64-opt.yml │ ├── test-generate_scorer-linux-amd64-opt.yml │ ├── test-generate_scorer-linux-arm64-opt.yml │ ├── test-generate_scorer-linux-rpi3-opt.yml │ ├── test-linux-opt-base.tyml │ ├── test-linux-opt-tag-base.tyml │ ├── test-netframework-win-amd64-opt.yml │ ├── test-netframework-win-cuda-opt.yml │ ├── test-netframework-win-tflite-opt.yml │ ├── test-nodejs_10x-armbian-arm64-opt.yml │ ├── test-nodejs_10x-darwin-amd64-opt.yml │ ├── test-nodejs_10x-raspbian-rpi3-opt.yml │ ├── test-nodejs_10x-win-amd64-opt.yml │ ├── test-nodejs_10x_16k-linux-amd64-opt.yml │ ├── test-nodejs_10x_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_10x_16k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_10x_8k-linux-amd64-opt.yml │ ├── test-nodejs_10x_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_10x_8k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_11x-armbian-arm64-opt.yml │ ├── test-nodejs_11x-darwin-amd64-opt.yml │ ├── test-nodejs_11x-raspbian-rpi3-opt.yml │ ├── test-nodejs_11x-win-amd64-opt.yml │ ├── test-nodejs_11x_16k-linux-amd64-opt.yml │ ├── test-nodejs_11x_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_11x_16k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_11x_8k-linux-amd64-opt.yml │ ├── test-nodejs_11x_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_11x_8k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_12x-armbian-arm64-opt.yml │ ├── test-nodejs_12x-darwin-amd64-opt.yml │ ├── test-nodejs_12x-raspbian-rpi3-opt.yml │ ├── test-nodejs_12x-win-amd64-opt.yml │ ├── test-nodejs_12x_16k-linux-amd64-opt.yml │ ├── test-nodejs_12x_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_12x_16k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_12x_8k-linux-amd64-opt.yml │ ├── test-nodejs_12x_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_12x_8k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_13x-armbian-arm64-opt.yml │ ├── test-nodejs_13x-darwin-amd64-opt.yml │ ├── test-nodejs_13x-raspbian-rpi3-opt.yml │ ├── test-nodejs_13x-win-amd64-opt.yml │ ├── test-nodejs_13x_16k-linux-amd64-opt.yml │ ├── test-nodejs_13x_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_13x_16k_multiarchpkg-linux-amd64-opt.yml │ ├── test-nodejs_13x_16k_multiarchpkg-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_13x_16k_multiarchpkg-linux-tflite-opt.yml │ ├── test-nodejs_13x_16k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_13x_8k-linux-amd64-opt.yml │ ├── test-nodejs_13x_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_13x_8k_multiarchpkg-linux-amd64-opt.yml │ ├── test-nodejs_13x_8k_multiarchpkg-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_13x_8k_multiarchpkg-linux-tflite-opt.yml │ ├── test-nodejs_13x_8k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_13x_multiarchpkg-armbian-arm64-opt.yml │ ├── test-nodejs_13x_multiarchpkg-darwin-amd64-opt.yml │ ├── test-nodejs_13x_multiarchpkg-darwin-tflite-opt.yml │ ├── test-nodejs_13x_multiarchpkg-raspbian-rpi3-opt.yml │ ├── test-nodejs_13x_multiarchpkg-win-amd64-opt.yml │ ├── test-nodejs_13x_multiarchpkg-win-cuda-opt.yml │ ├── test-nodejs_13x_multiarchpkg-win-tflite-opt.yml │ ├── test-nodejs_14x-armbian-arm64-opt.yml │ ├── test-nodejs_14x-darwin-amd64-opt.yml │ ├── test-nodejs_14x-raspbian-rpi3-opt.yml │ ├── test-nodejs_14x-win-amd64-opt.yml │ ├── test-nodejs_14x_16k-linux-amd64-opt.yml │ ├── test-nodejs_14x_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_14x_16k_multiarchpkg-linux-amd64-opt.yml │ ├── test-nodejs_14x_16k_multiarchpkg-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_14x_16k_multiarchpkg-linux-tflite-opt.yml │ ├── test-nodejs_14x_16k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_14x_8k-linux-amd64-opt.yml │ ├── test-nodejs_14x_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_14x_8k_multiarchpkg-linux-amd64-opt.yml │ ├── test-nodejs_14x_8k_multiarchpkg-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_14x_8k_multiarchpkg-linux-tflite-opt.yml │ ├── test-nodejs_14x_8k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_14x_multiarchpkg-armbian-arm64-opt.yml │ ├── test-nodejs_14x_multiarchpkg-darwin-amd64-opt.yml │ ├── test-nodejs_14x_multiarchpkg-darwin-tflite-opt.yml │ ├── test-nodejs_14x_multiarchpkg-raspbian-rpi3-opt.yml │ ├── test-nodejs_14x_multiarchpkg-win-amd64-opt.yml │ ├── test-nodejs_14x_multiarchpkg-win-cuda-opt.yml │ ├── test-nodejs_14x_multiarchpkg-win-tflite-opt.yml │ ├── test-nodejs_15x-armbian-arm64-opt.yml │ ├── test-nodejs_15x-darwin-amd64-opt.yml │ ├── test-nodejs_15x-raspbian-rpi3-opt.yml │ ├── test-nodejs_15x-win-amd64-opt.yml │ ├── test-nodejs_15x_16k-linux-amd64-opt.yml │ ├── test-nodejs_15x_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_15x_16k_multiarchpkg-linux-amd64-opt.yml │ ├── test-nodejs_15x_16k_multiarchpkg-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_15x_16k_multiarchpkg-linux-tflite-opt.yml │ ├── test-nodejs_15x_16k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_15x_8k-linux-amd64-opt.yml │ ├── test-nodejs_15x_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_15x_8k_multiarchpkg-linux-amd64-opt.yml │ ├── test-nodejs_15x_8k_multiarchpkg-linux-amd64-prod_pbmodel-opt.yml │ ├── test-nodejs_15x_8k_multiarchpkg-linux-tflite-opt.yml │ ├── test-nodejs_15x_8k_tflite-linux-amd64-prod-opt.yml │ ├── test-nodejs_15x_multiarchpkg-armbian-arm64-opt.yml │ ├── test-nodejs_15x_multiarchpkg-darwin-amd64-opt.yml │ ├── test-nodejs_15x_multiarchpkg-darwin-tflite-opt.yml │ ├── test-nodejs_15x_multiarchpkg-raspbian-rpi3-opt.yml │ ├── test-nodejs_15x_multiarchpkg-win-amd64-opt.yml │ ├── test-nodejs_15x_multiarchpkg-win-cuda-opt.yml │ ├── test-nodejs_15x_multiarchpkg-win-tflite-opt.yml │ ├── test-python_35-darwin-amd64-opt.yml │ ├── test-python_35-win-amd64-opt.yml │ ├── test-python_35-win-cuda-opt.yml │ ├── test-python_35_16k-linux-amd64-opt.yml │ ├── test-python_35_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_35_8k-linux-amd64-opt.yml │ ├── test-python_35_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_35_tflite_16k-darwin-amd64-opt.yml │ ├── test-python_35_tflite_16k-darwin-amd64-prod-opt.yml │ ├── test-python_35_tflite_16k-linux-amd64-opt.yml │ ├── test-python_35_tflite_16k-linux-amd64-prod-opt.yml │ ├── test-python_35_tflite_16k-win-amd64-opt.yml │ ├── test-python_35_tflite_16k-win-amd64-prod-opt.yml │ ├── test-python_35_tflite_8k-linux-amd64-prod-opt.yml │ ├── test-python_36-darwin-amd64-opt.yml │ ├── test-python_36-win-amd64-opt.yml │ ├── test-python_36-win-cuda-opt.yml │ ├── test-python_36_16k-linux-amd64-opt.yml │ ├── test-python_36_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_36_8k-linux-amd64-opt.yml │ ├── test-python_36_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_36_tflite_16k-darwin-amd64-opt.yml │ ├── test-python_36_tflite_16k-darwin-amd64-prod-opt.yml │ ├── test-python_36_tflite_16k-linux-amd64-opt.yml │ ├── test-python_36_tflite_16k-linux-amd64-prod-opt.yml │ ├── test-python_36_tflite_16k-win-amd64-opt.yml │ ├── test-python_36_tflite_16k-win-amd64-prod-opt.yml │ ├── test-python_36_tflite_8k-linux-amd64-prod-opt.yml │ ├── test-python_37-darwin-amd64-opt.yml │ ├── test-python_37-win-amd64-opt.yml │ ├── test-python_37-win-cuda-opt.yml │ ├── test-python_37_16k-linux-amd64-opt.yml │ ├── test-python_37_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_37_8k-linux-amd64-opt.yml │ ├── test-python_37_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_37_tflite_16k-darwin-amd64-opt.yml │ ├── test-python_37_tflite_16k-darwin-amd64-prod-opt.yml │ ├── test-python_37_tflite_16k-linux-amd64-opt.yml │ ├── test-python_37_tflite_16k-linux-amd64-prod-opt.yml │ ├── test-python_37_tflite_16k-win-amd64-opt.yml │ ├── test-python_37_tflite_16k-win-amd64-prod-opt.yml │ ├── test-python_37_tflite_8k-linux-amd64-prod-opt.yml │ ├── test-python_37m-armbian-arm64-opt.yml │ ├── test-python_37m-armbian-arm64-prod-opt.yml │ ├── test-python_37m-raspbian-rpi3-opt.yml │ ├── test-python_37m-raspbian-rpi3-prod-opt.yml │ ├── test-python_38-darwin-amd64-opt.yml │ ├── test-python_38-win-amd64-opt.yml │ ├── test-python_38-win-cuda-opt.yml │ ├── test-python_38_16k-linux-amd64-opt.yml │ ├── test-python_38_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_38_8k-linux-amd64-opt.yml │ ├── test-python_38_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_38_tflite_16k-darwin-amd64-opt.yml │ ├── test-python_38_tflite_16k-darwin-amd64-prod-opt.yml │ ├── test-python_38_tflite_16k-linux-amd64-opt.yml │ ├── test-python_38_tflite_16k-linux-amd64-prod-opt.yml │ ├── test-python_38_tflite_16k-win-amd64-opt.yml │ ├── test-python_38_tflite_16k-win-amd64-prod-opt.yml │ ├── test-python_38_tflite_8k-linux-amd64-prod-opt.yml │ ├── test-python_39-darwin-amd64-opt.yml │ ├── test-python_39-win-amd64-opt.yml │ ├── test-python_39-win-cuda-opt.yml │ ├── test-python_39_16k-linux-amd64-opt.yml │ ├── test-python_39_16k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_39_8k-linux-amd64-opt.yml │ ├── test-python_39_8k-linux-amd64-prod_pbmodel-opt.yml │ ├── test-python_39_tflite_16k-darwin-amd64-opt.yml │ ├── test-python_39_tflite_16k-darwin-amd64-prod-opt.yml │ ├── test-python_39_tflite_16k-linux-amd64-opt.yml │ ├── test-python_39_tflite_16k-linux-amd64-prod-opt.yml │ ├── test-python_39_tflite_16k-win-amd64-opt.yml │ ├── test-python_39_tflite_16k-win-amd64-prod-opt.yml │ ├── test-python_39_tflite_8k-linux-amd64-prod-opt.yml │ ├── test-raspbian-opt-base.tyml │ ├── test-singleshotinference-linux-amd64-py36m-opt.yml │ ├── test-training-extra_16k-linux-amd64-py36m-opt.yml │ ├── test-training-extra_16k-linux-amd64-py37m-opt.yml │ ├── test-training-extra_8k-linux-amd64-py36m-opt.yml │ ├── test-training-extra_8k-linux-amd64-py37m-opt.yml │ ├── test-training-pypi_16k-linux-amd64-py36m-opt.yml │ ├── test-training-pypi_16k-linux-amd64-py37m-opt.yml │ ├── test-training-pypi_8k-linux-amd64-py36m-opt.yml │ ├── test-training-pypi_8k-linux-amd64-py37m-opt.yml │ ├── test-training-unittests_8k-linux-amd64-py36m-opt.yml │ ├── test-training-unittests_8k-linux-amd64-py37m-opt.yml │ ├── test-training_16k-linux-amd64-py36m-opt.yml │ ├── test-training_16k-linux-amd64-py37m-opt.yml │ ├── test-training_8k-linux-amd64-py36m-opt.yml │ ├── test-training_8k-linux-amd64-py37m-opt.yml │ ├── test-transcribe_16k-linux-amd64-py36m-opt.yml │ ├── test-transcribe_16k-linux-amd64-py37m-opt.yml │ ├── test-transcribe_8k-linux-amd64-py36m-opt.yml │ ├── test-transcribe_8k-linux-amd64-py37m-opt.yml │ ├── test-transfer-linux-amd64-py36m-opt.yml │ ├── test-win-cuda-opt-base.tyml │ ├── test-win-opt-base.tyml │ ├── tf_android-arm64-dbg.yml │ ├── tf_android-arm64-opt.yml │ ├── tf_android-armv7-dbg.yml │ ├── tf_android-armv7-opt.yml │ ├── tf_darwin-amd64-opt.yml │ ├── tf_ios-arm64-opt.yml │ ├── tf_ios-x86_64-opt.yml │ ├── tf_linux-amd64-cpu-opt.yml │ ├── tf_linux-amd64-cpu_gcc9.yml │ ├── tf_linux-amd64-gpu-opt.yml │ ├── tf_linux-amd64-gpu_gcc9.yml.DISABLED │ ├── tf_linux-arm64-cpu-dbg.yml │ ├── tf_linux-arm64-cpu-opt.yml │ ├── tf_linux-rpi3-cpu-dbg.yml │ ├── tf_linux-rpi3-cpu-opt.yml │ ├── tf_tc-build.sh │ ├── tf_tc-package.sh │ ├── tf_tc-pip.sh │ ├── tf_tc-setup.sh │ ├── tf_tc-vars.sh │ ├── tf_win-amd64-cpu-opt.yml │ ├── tf_win-amd64-gpu-opt.yml │ ├── win-amd64-cpu-opt.yml │ ├── win-amd64-ctc-opt.yml │ ├── win-amd64-gpu-opt.yml │ ├── win-amd64-tflite-opt.yml │ ├── win-build.sh │ ├── win-opt-base.tyml │ ├── win-package.sh │ └── worker.cyml ├── taskcluster.disabled.yml ├── tensorflow_full_runtime.supp ├── tensorflow_tflite_runtime.supp ├── tests/ │ ├── __init__.py │ ├── test_data/ │ │ ├── alphabet_macos.txt │ │ ├── alphabet_unix.txt │ │ ├── alphabet_windows.txt │ │ └── validate_locale_fra.py │ ├── test_importers.py │ ├── test_text.py │ └── test_value_range.py ├── training/ │ └── deepspeech_training/ │ ├── GRAPH_VERSION │ ├── VERSION │ ├── __init__.py │ ├── evaluate.py │ ├── train.py │ └── util/ │ ├── __init__.py │ ├── audio.py │ ├── augmentations.py │ ├── check_characters.py │ ├── checkpoints.py │ ├── config.py │ ├── downloader.py │ ├── evaluate_tools.py │ ├── feeding.py │ ├── flags.py │ ├── gpu.py │ ├── helpers.py │ ├── importers.py │ ├── io.py │ ├── logging.py │ ├── sample_collections.py │ ├── stm.py │ └── text.py └── transcribe.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .cardboardlint.yml ================================================ linters: - pylint: filefilter: ['+ *.py', '+ bin/*.py'] ================================================ FILE: .compute ================================================ #!/bin/bash set -xe apt-get install -y python3-venv libopus0 python3 -m venv /tmp/venv source /tmp/venv/bin/activate pip install -U setuptools wheel pip pip install . pip uninstall -y tensorflow pip install tensorflow-gpu==1.14 mkdir -p ../keep/summaries data="${SHARED_DIR}/data" fis="${data}/LDC/fisher" swb="${data}/LDC/LDC97S62/swb" lbs="${data}/OpenSLR/LibriSpeech/librivox" cv="${data}/mozilla/CommonVoice/en_1087h_2019-06-12/clips" npr="${data}/NPR/WAMU/sets/v0.3" python -u DeepSpeech.py \ --train_files "${npr}/best-train.sdb","${npr}/good-train.sdb","${cv}/train.sdb","${fis}-train.sdb","${swb}-train.sdb","${lbs}-train-clean-100.sdb","${lbs}-train-clean-360.sdb","${lbs}-train-other-500.sdb" \ --dev_files "${lbs}-dev-clean.sdb" \ --test_files "${lbs}-test-clean.sdb" \ --train_batch_size 24 \ --dev_batch_size 48 \ --test_batch_size 48 \ --train_cudnn \ --n_hidden 2048 \ --learning_rate 0.0001 \ --dropout_rate 0.40 \ --epochs 150 \ --noearly_stop \ --feature_cache "../tmp/feature.cache" \ --checkpoint_dir "../keep" \ --summary_dir "../keep/summaries" ================================================ FILE: .gitattributes ================================================ data/lm/kenlm.scorer filter=lfs diff=lfs merge=lfs -text .github/actions/check_artifact_exists/dist/index.js binary ================================================ FILE: .github/actions/build-tensorflow/action.yml ================================================ name: "Build TensorFlow" description: "Build TensorFlow Build" inputs: flavor: description: "Build flavor" required: true runs: using: "composite" steps: - run: ./ci_scripts/tf-build.sh ${{ inputs.flavor }} shell: bash ================================================ FILE: .github/actions/check_artifact_exists/README.md ================================================ Building and using a TensorFlow cache: ====================================== The present action will check the existence of an artifact in the list of the repo artifacts. Since we don't want always to download the artifact, we can't rely on the official download-artifact action. Rationale: ---------- Because of the amount of code required to build TensorFlow, the library build is split into two main parts to make it much faster to run PRs: - a TensorFlow prebuild cache - actual code of the library The TensorFlow prebuild cache exists because building tensorflow (even just the `libtensorflow_cpp.so`) is a huge amount of code and it will take several hours even on decent systems. So we perform a cache build of it, because the tensorflow version does not change that often. However, each PR might have changes to the actual library code, so we rebuild this everytime. The `tensorflow_opt-macOS` job checks whether such build cache exists alrady. Those cache are stored as artifacts because [GitHub Actions cache](https://docs.github.com/en/actions/guides/caching-dependencies-to-speed-up-workflows) has size limitations. The `build-tensorflow-macOS` job has a dependency against the cache check to know whether it needs to run an actual build or not. Hacking: -------- For hacking into the action, please follow the [GitHub JavaScript Actions](https://docs.github.com/en/actions/creating-actions/creating-a-javascript-action#commit-tag-and-push-your-action-to-github) and specifically the usage of `ncc`. ``` $ npm install $ npx ncc build main.js --license licenses.txt $ git add dist/ ``` ================================================ FILE: .github/actions/check_artifact_exists/action.yml ================================================ name: "check/download artifacts" description: "Check and download that an artifact exists" inputs: name: description: "Artifact name" required: true github_token: description: "GitHub token" required: false default: ${{ github.token }} download: description: "Should we download?" required: false default: false path: description: "Where to unpack the artifact" required: false default: "./" repo: description: "Repository name with owner (like actions/checkout)" required: false default: ${{ github.repository }} outputs: status: description: "Status string of the artifact: 'missing' or 'found'" runs: using: "node12" main: "dist/index.js" ================================================ FILE: .github/actions/check_artifact_exists/dist/index.js ================================================ module.exports = /******/ (() => { // webpackBootstrap /******/ var __webpack_modules__ = ({ /***/ 5496: /***/ ((__unused_webpack_module, __unused_webpack_exports, __nccwpck_require__) => { const core = __nccwpck_require__(2186); const github = __nccwpck_require__(5438); const AdmZip = __nccwpck_require__(6761); const filesize = __nccwpck_require__(5060); const pathname = __nccwpck_require__(5622); const fs = __nccwpck_require__(5747); const { throttling } = __nccwpck_require__(9968); const { GitHub } = __nccwpck_require__(3030); async function getGoodArtifacts(client, owner, repo, name) { const goodRepoArtifacts = await client.paginate( "GET /repos/{owner}/{repo}/actions/artifacts", { owner: owner, repo: repo, per_page: 100, }, (repoArtifacts, done) => { // console.log(" ==> repoArtifacts", repoArtifacts); const goodArtifacts = repoArtifacts.data.filter((a) => { // console.log("==> Artifact check", a); return a.name == name }); if (goodArtifacts.length > 0) { done(); } return goodArtifacts; } ); console.log("==> maybe goodRepoArtifacts:", goodRepoArtifacts); return goodRepoArtifacts; } async function main() { const token = core.getInput("github_token", { required: true }); const [owner, repo] = core.getInput("repo", { required: true }).split("/"); const path = core.getInput("path", { required: true }); const name = core.getInput("name"); const download = core.getInput("download"); const OctokitWithThrottling = GitHub.plugin(throttling); const client = new OctokitWithThrottling({ auth: token, throttle: { onRateLimit: (retryAfter, options) => { console.log( `Request quota exhausted for request ${options.method} ${options.url}` ); // Retry twice after hitting a rate limit error, then give up if (options.request.retryCount <= 2) { console.log(`Retrying after ${retryAfter} seconds!`); return true; } }, onAbuseLimit: (retryAfter, options) => { // does not retry, only logs a warning console.log( `Abuse detected for request ${options.method} ${options.url}` ); }, }, }); console.log("==> Repo:", owner + "/" + repo); const goodArtifacts = await getGoodArtifacts(client, owner, repo, name); console.log("==> goodArtifacts:", goodArtifacts); let artifactStatus = ""; if (goodArtifacts.length === 0) { artifactStatus = "missing"; } else { artifactStatus = "found"; } console.log("==> Artifact", name, artifactStatus); console.log("==> download", download); core.setOutput("status", artifactStatus); if (artifactStatus === "found" && download == "true") { console.log("==> # artifacts:", goodArtifacts.length); let artifact = goodArtifacts[0]; console.log("==> Artifact:", artifact.id) const size = filesize(artifact.size_in_bytes, { base: 10 }) console.log("==> Downloading:", artifact.name + ".zip", `(${size})`) const zip = await client.actions.downloadArtifact({ owner: owner, repo: repo, artifact_id: artifact.id, archive_format: "zip", }) const dir = name ? path : pathname.join(path, artifact.name) fs.mkdirSync(dir, { recursive: true }) const adm = new AdmZip(Buffer.from(zip.data)) adm.getEntries().forEach((entry) => { const action = entry.isDirectory ? "creating" : "inflating" const filepath = pathname.join(dir, entry.entryName) console.log(` ${action}: ${filepath}`) }) adm.extractAllTo(dir, true) } if (artifactStatus === "missing" && download == "true") { core.setFailed("Required", name, "that is missing"); } return; } // We have to manually wrap the main function with a try-catch here because // GitHub will ignore uncatched exceptions and continue running the workflow, // leading to harder to diagnose errors downstream from this action. try { main(); } catch (error) { core.setFailed(error.message); } /***/ }), /***/ 7351: /***/ (function(__unused_webpack_module, exports, __nccwpck_require__) { "use strict"; var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k]; result["default"] = mod; return result; }; Object.defineProperty(exports, "__esModule", ({ value: true })); const os = __importStar(__nccwpck_require__(2087)); const utils_1 = __nccwpck_require__(5278); /** * Commands * * Command Format: * ::name key=value,key=value::message * * Examples: * ::warning::This is the message * ::set-env name=MY_VAR::some value */ function issueCommand(command, properties, message) { const cmd = new Command(command, properties, message); process.stdout.write(cmd.toString() + os.EOL); } exports.issueCommand = issueCommand; function issue(name, message = '') { issueCommand(name, {}, message); } exports.issue = issue; const CMD_STRING = '::'; class Command { constructor(command, properties, message) { if (!command) { command = 'missing.command'; } this.command = command; this.properties = properties; this.message = message; } toString() { let cmdStr = CMD_STRING + this.command; if (this.properties && Object.keys(this.properties).length > 0) { cmdStr += ' '; let first = true; for (const key in this.properties) { if (this.properties.hasOwnProperty(key)) { const val = this.properties[key]; if (val) { if (first) { first = false; } else { cmdStr += ','; } cmdStr += `${key}=${escapeProperty(val)}`; } } } } cmdStr += `${CMD_STRING}${escapeData(this.message)}`; return cmdStr; } } function escapeData(s) { return utils_1.toCommandValue(s) .replace(/%/g, '%25') .replace(/\r/g, '%0D') .replace(/\n/g, '%0A'); } function escapeProperty(s) { return utils_1.toCommandValue(s) .replace(/%/g, '%25') .replace(/\r/g, '%0D') .replace(/\n/g, '%0A') .replace(/:/g, '%3A') .replace(/,/g, '%2C'); } //# sourceMappingURL=command.js.map /***/ }), /***/ 2186: /***/ (function(__unused_webpack_module, exports, __nccwpck_require__) { "use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k]; result["default"] = mod; return result; }; Object.defineProperty(exports, "__esModule", ({ value: true })); const command_1 = __nccwpck_require__(7351); const file_command_1 = __nccwpck_require__(717); const utils_1 = __nccwpck_require__(5278); const os = __importStar(__nccwpck_require__(2087)); const path = __importStar(__nccwpck_require__(5622)); /** * The code to exit an action */ var ExitCode; (function (ExitCode) { /** * A code indicating that the action was successful */ ExitCode[ExitCode["Success"] = 0] = "Success"; /** * A code indicating that the action was a failure */ ExitCode[ExitCode["Failure"] = 1] = "Failure"; })(ExitCode = exports.ExitCode || (exports.ExitCode = {})); //----------------------------------------------------------------------- // Variables //----------------------------------------------------------------------- /** * Sets env variable for this action and future actions in the job * @param name the name of the variable to set * @param val the value of the variable. Non-string values will be converted to a string via JSON.stringify */ // eslint-disable-next-line @typescript-eslint/no-explicit-any function exportVariable(name, val) { const convertedVal = utils_1.toCommandValue(val); process.env[name] = convertedVal; const filePath = process.env['GITHUB_ENV'] || ''; if (filePath) { const delimiter = '_GitHubActionsFileCommandDelimeter_'; const commandValue = `${name}<<${delimiter}${os.EOL}${convertedVal}${os.EOL}${delimiter}`; file_command_1.issueCommand('ENV', commandValue); } else { command_1.issueCommand('set-env', { name }, convertedVal); } } exports.exportVariable = exportVariable; /** * Registers a secret which will get masked from logs * @param secret value of the secret */ function setSecret(secret) { command_1.issueCommand('add-mask', {}, secret); } exports.setSecret = setSecret; /** * Prepends inputPath to the PATH (for this action and future actions) * @param inputPath */ function addPath(inputPath) { const filePath = process.env['GITHUB_PATH'] || ''; if (filePath) { file_command_1.issueCommand('PATH', inputPath); } else { command_1.issueCommand('add-path', {}, inputPath); } process.env['PATH'] = `${inputPath}${path.delimiter}${process.env['PATH']}`; } exports.addPath = addPath; /** * Gets the value of an input. The value is also trimmed. * * @param name name of the input to get * @param options optional. See InputOptions. * @returns string */ function getInput(name, options) { const val = process.env[`INPUT_${name.replace(/ /g, '_').toUpperCase()}`] || ''; if (options && options.required && !val) { throw new Error(`Input required and not supplied: ${name}`); } return val.trim(); } exports.getInput = getInput; /** * Sets the value of an output. * * @param name name of the output to set * @param value value to store. Non-string values will be converted to a string via JSON.stringify */ // eslint-disable-next-line @typescript-eslint/no-explicit-any function setOutput(name, value) { command_1.issueCommand('set-output', { name }, value); } exports.setOutput = setOutput; /** * Enables or disables the echoing of commands into stdout for the rest of the step. * Echoing is disabled by default if ACTIONS_STEP_DEBUG is not set. * */ function setCommandEcho(enabled) { command_1.issue('echo', enabled ? 'on' : 'off'); } exports.setCommandEcho = setCommandEcho; //----------------------------------------------------------------------- // Results //----------------------------------------------------------------------- /** * Sets the action status to failed. * When the action exits it will be with an exit code of 1 * @param message add error issue message */ function setFailed(message) { process.exitCode = ExitCode.Failure; error(message); } exports.setFailed = setFailed; //----------------------------------------------------------------------- // Logging Commands //----------------------------------------------------------------------- /** * Gets whether Actions Step Debug is on or not */ function isDebug() { return process.env['RUNNER_DEBUG'] === '1'; } exports.isDebug = isDebug; /** * Writes debug message to user log * @param message debug message */ function debug(message) { command_1.issueCommand('debug', {}, message); } exports.debug = debug; /** * Adds an error issue * @param message error issue message. Errors will be converted to string via toString() */ function error(message) { command_1.issue('error', message instanceof Error ? message.toString() : message); } exports.error = error; /** * Adds an warning issue * @param message warning issue message. Errors will be converted to string via toString() */ function warning(message) { command_1.issue('warning', message instanceof Error ? message.toString() : message); } exports.warning = warning; /** * Writes info to log with console.log. * @param message info message */ function info(message) { process.stdout.write(message + os.EOL); } exports.info = info; /** * Begin an output group. * * Output until the next `groupEnd` will be foldable in this group * * @param name The name of the output group */ function startGroup(name) { command_1.issue('group', name); } exports.startGroup = startGroup; /** * End an output group. */ function endGroup() { command_1.issue('endgroup'); } exports.endGroup = endGroup; /** * Wrap an asynchronous function call in a group. * * Returns the same type as the function itself. * * @param name The name of the group * @param fn The function to wrap in the group */ function group(name, fn) { return __awaiter(this, void 0, void 0, function* () { startGroup(name); let result; try { result = yield fn(); } finally { endGroup(); } return result; }); } exports.group = group; //----------------------------------------------------------------------- // Wrapper action state //----------------------------------------------------------------------- /** * Saves state for current action, the state can only be retrieved by this action's post job execution. * * @param name name of the state to store * @param value value to store. Non-string values will be converted to a string via JSON.stringify */ // eslint-disable-next-line @typescript-eslint/no-explicit-any function saveState(name, value) { command_1.issueCommand('save-state', { name }, value); } exports.saveState = saveState; /** * Gets the value of an state set by this action's main execution. * * @param name name of the state to get * @returns string */ function getState(name) { return process.env[`STATE_${name}`] || ''; } exports.getState = getState; //# sourceMappingURL=core.js.map /***/ }), /***/ 717: /***/ (function(__unused_webpack_module, exports, __nccwpck_require__) { "use strict"; // For internal use, subject to change. var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k]; result["default"] = mod; return result; }; Object.defineProperty(exports, "__esModule", ({ value: true })); // We use any as a valid input type /* eslint-disable @typescript-eslint/no-explicit-any */ const fs = __importStar(__nccwpck_require__(5747)); const os = __importStar(__nccwpck_require__(2087)); const utils_1 = __nccwpck_require__(5278); function issueCommand(command, message) { const filePath = process.env[`GITHUB_${command}`]; if (!filePath) { throw new Error(`Unable to find environment variable for file command ${command}`); } if (!fs.existsSync(filePath)) { throw new Error(`Missing file at path: ${filePath}`); } fs.appendFileSync(filePath, `${utils_1.toCommandValue(message)}${os.EOL}`, { encoding: 'utf8' }); } exports.issueCommand = issueCommand; //# sourceMappingURL=file-command.js.map /***/ }), /***/ 5278: /***/ ((__unused_webpack_module, exports) => { "use strict"; // We use any as a valid input type /* eslint-disable @typescript-eslint/no-explicit-any */ Object.defineProperty(exports, "__esModule", ({ value: true })); /** * Sanitizes an input into a string so it can be passed into issueCommand safely * @param input input to sanitize into a string */ function toCommandValue(input) { if (input === null || input === undefined) { return ''; } else if (typeof input === 'string' || input instanceof String) { return input; } return JSON.stringify(input); } exports.toCommandValue = toCommandValue; //# sourceMappingURL=utils.js.map /***/ }), /***/ 4087: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); exports.Context = void 0; const fs_1 = __nccwpck_require__(5747); const os_1 = __nccwpck_require__(2087); class Context { /** * Hydrate the context from the environment */ constructor() { this.payload = {}; if (process.env.GITHUB_EVENT_PATH) { if (fs_1.existsSync(process.env.GITHUB_EVENT_PATH)) { this.payload = JSON.parse(fs_1.readFileSync(process.env.GITHUB_EVENT_PATH, { encoding: 'utf8' })); } else { const path = process.env.GITHUB_EVENT_PATH; process.stdout.write(`GITHUB_EVENT_PATH ${path} does not exist${os_1.EOL}`); } } this.eventName = process.env.GITHUB_EVENT_NAME; this.sha = process.env.GITHUB_SHA; this.ref = process.env.GITHUB_REF; this.workflow = process.env.GITHUB_WORKFLOW; this.action = process.env.GITHUB_ACTION; this.actor = process.env.GITHUB_ACTOR; this.job = process.env.GITHUB_JOB; this.runNumber = parseInt(process.env.GITHUB_RUN_NUMBER, 10); this.runId = parseInt(process.env.GITHUB_RUN_ID, 10); } get issue() { const payload = this.payload; return Object.assign(Object.assign({}, this.repo), { number: (payload.issue || payload.pull_request || payload).number }); } get repo() { if (process.env.GITHUB_REPOSITORY) { const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/'); return { owner, repo }; } if (this.payload.repository) { return { owner: this.payload.repository.owner.login, repo: this.payload.repository.name }; } throw new Error("context.repo requires a GITHUB_REPOSITORY environment variable like 'owner/repo'"); } } exports.Context = Context; //# sourceMappingURL=context.js.map /***/ }), /***/ 5438: /***/ (function(__unused_webpack_module, exports, __nccwpck_require__) { "use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", ({ value: true })); exports.getOctokit = exports.context = void 0; const Context = __importStar(__nccwpck_require__(4087)); const utils_1 = __nccwpck_require__(3030); exports.context = new Context.Context(); /** * Returns a hydrated octokit ready to use for GitHub Actions * * @param token the repo PAT or GITHUB_TOKEN * @param options other options to set */ function getOctokit(token, options) { return new utils_1.GitHub(utils_1.getOctokitOptions(token, options)); } exports.getOctokit = getOctokit; //# sourceMappingURL=github.js.map /***/ }), /***/ 7914: /***/ (function(__unused_webpack_module, exports, __nccwpck_require__) { "use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", ({ value: true })); exports.getApiBaseUrl = exports.getProxyAgent = exports.getAuthString = void 0; const httpClient = __importStar(__nccwpck_require__(9925)); function getAuthString(token, options) { if (!token && !options.auth) { throw new Error('Parameter token or opts.auth is required'); } else if (token && options.auth) { throw new Error('Parameters token and opts.auth may not both be specified'); } return typeof options.auth === 'string' ? options.auth : `token ${token}`; } exports.getAuthString = getAuthString; function getProxyAgent(destinationUrl) { const hc = new httpClient.HttpClient(); return hc.getAgent(destinationUrl); } exports.getProxyAgent = getProxyAgent; function getApiBaseUrl() { return process.env['GITHUB_API_URL'] || 'https://api.github.com'; } exports.getApiBaseUrl = getApiBaseUrl; //# sourceMappingURL=utils.js.map /***/ }), /***/ 3030: /***/ (function(__unused_webpack_module, exports, __nccwpck_require__) { "use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } }); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", ({ value: true })); exports.getOctokitOptions = exports.GitHub = exports.context = void 0; const Context = __importStar(__nccwpck_require__(4087)); const Utils = __importStar(__nccwpck_require__(7914)); // octokit + plugins const core_1 = __nccwpck_require__(6762); const plugin_rest_endpoint_methods_1 = __nccwpck_require__(3044); const plugin_paginate_rest_1 = __nccwpck_require__(4193); exports.context = new Context.Context(); const baseUrl = Utils.getApiBaseUrl(); const defaults = { baseUrl, request: { agent: Utils.getProxyAgent(baseUrl) } }; exports.GitHub = core_1.Octokit.plugin(plugin_rest_endpoint_methods_1.restEndpointMethods, plugin_paginate_rest_1.paginateRest).defaults(defaults); /** * Convience function to correctly format Octokit Options to pass into the constructor. * * @param token the repo PAT or GITHUB_TOKEN * @param options other options to set */ function getOctokitOptions(token, options) { const opts = Object.assign({}, options || {}); // Shallow clone - don't mutate the object provided by the caller // Auth const auth = Utils.getAuthString(token, opts); if (auth) { opts.auth = auth; } return opts; } exports.getOctokitOptions = getOctokitOptions; //# sourceMappingURL=utils.js.map /***/ }), /***/ 9925: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); const http = __nccwpck_require__(8605); const https = __nccwpck_require__(7211); const pm = __nccwpck_require__(6443); let tunnel; var HttpCodes; (function (HttpCodes) { HttpCodes[HttpCodes["OK"] = 200] = "OK"; HttpCodes[HttpCodes["MultipleChoices"] = 300] = "MultipleChoices"; HttpCodes[HttpCodes["MovedPermanently"] = 301] = "MovedPermanently"; HttpCodes[HttpCodes["ResourceMoved"] = 302] = "ResourceMoved"; HttpCodes[HttpCodes["SeeOther"] = 303] = "SeeOther"; HttpCodes[HttpCodes["NotModified"] = 304] = "NotModified"; HttpCodes[HttpCodes["UseProxy"] = 305] = "UseProxy"; HttpCodes[HttpCodes["SwitchProxy"] = 306] = "SwitchProxy"; HttpCodes[HttpCodes["TemporaryRedirect"] = 307] = "TemporaryRedirect"; HttpCodes[HttpCodes["PermanentRedirect"] = 308] = "PermanentRedirect"; HttpCodes[HttpCodes["BadRequest"] = 400] = "BadRequest"; HttpCodes[HttpCodes["Unauthorized"] = 401] = "Unauthorized"; HttpCodes[HttpCodes["PaymentRequired"] = 402] = "PaymentRequired"; HttpCodes[HttpCodes["Forbidden"] = 403] = "Forbidden"; HttpCodes[HttpCodes["NotFound"] = 404] = "NotFound"; HttpCodes[HttpCodes["MethodNotAllowed"] = 405] = "MethodNotAllowed"; HttpCodes[HttpCodes["NotAcceptable"] = 406] = "NotAcceptable"; HttpCodes[HttpCodes["ProxyAuthenticationRequired"] = 407] = "ProxyAuthenticationRequired"; HttpCodes[HttpCodes["RequestTimeout"] = 408] = "RequestTimeout"; HttpCodes[HttpCodes["Conflict"] = 409] = "Conflict"; HttpCodes[HttpCodes["Gone"] = 410] = "Gone"; HttpCodes[HttpCodes["TooManyRequests"] = 429] = "TooManyRequests"; HttpCodes[HttpCodes["InternalServerError"] = 500] = "InternalServerError"; HttpCodes[HttpCodes["NotImplemented"] = 501] = "NotImplemented"; HttpCodes[HttpCodes["BadGateway"] = 502] = "BadGateway"; HttpCodes[HttpCodes["ServiceUnavailable"] = 503] = "ServiceUnavailable"; HttpCodes[HttpCodes["GatewayTimeout"] = 504] = "GatewayTimeout"; })(HttpCodes = exports.HttpCodes || (exports.HttpCodes = {})); var Headers; (function (Headers) { Headers["Accept"] = "accept"; Headers["ContentType"] = "content-type"; })(Headers = exports.Headers || (exports.Headers = {})); var MediaTypes; (function (MediaTypes) { MediaTypes["ApplicationJson"] = "application/json"; })(MediaTypes = exports.MediaTypes || (exports.MediaTypes = {})); /** * Returns the proxy URL, depending upon the supplied url and proxy environment variables. * @param serverUrl The server URL where the request will be sent. For example, https://api.github.com */ function getProxyUrl(serverUrl) { let proxyUrl = pm.getProxyUrl(new URL(serverUrl)); return proxyUrl ? proxyUrl.href : ''; } exports.getProxyUrl = getProxyUrl; const HttpRedirectCodes = [ HttpCodes.MovedPermanently, HttpCodes.ResourceMoved, HttpCodes.SeeOther, HttpCodes.TemporaryRedirect, HttpCodes.PermanentRedirect ]; const HttpResponseRetryCodes = [ HttpCodes.BadGateway, HttpCodes.ServiceUnavailable, HttpCodes.GatewayTimeout ]; const RetryableHttpVerbs = ['OPTIONS', 'GET', 'DELETE', 'HEAD']; const ExponentialBackoffCeiling = 10; const ExponentialBackoffTimeSlice = 5; class HttpClientError extends Error { constructor(message, statusCode) { super(message); this.name = 'HttpClientError'; this.statusCode = statusCode; Object.setPrototypeOf(this, HttpClientError.prototype); } } exports.HttpClientError = HttpClientError; class HttpClientResponse { constructor(message) { this.message = message; } readBody() { return new Promise(async (resolve, reject) => { let output = Buffer.alloc(0); this.message.on('data', (chunk) => { output = Buffer.concat([output, chunk]); }); this.message.on('end', () => { resolve(output.toString()); }); }); } } exports.HttpClientResponse = HttpClientResponse; function isHttps(requestUrl) { let parsedUrl = new URL(requestUrl); return parsedUrl.protocol === 'https:'; } exports.isHttps = isHttps; class HttpClient { constructor(userAgent, handlers, requestOptions) { this._ignoreSslError = false; this._allowRedirects = true; this._allowRedirectDowngrade = false; this._maxRedirects = 50; this._allowRetries = false; this._maxRetries = 1; this._keepAlive = false; this._disposed = false; this.userAgent = userAgent; this.handlers = handlers || []; this.requestOptions = requestOptions; if (requestOptions) { if (requestOptions.ignoreSslError != null) { this._ignoreSslError = requestOptions.ignoreSslError; } this._socketTimeout = requestOptions.socketTimeout; if (requestOptions.allowRedirects != null) { this._allowRedirects = requestOptions.allowRedirects; } if (requestOptions.allowRedirectDowngrade != null) { this._allowRedirectDowngrade = requestOptions.allowRedirectDowngrade; } if (requestOptions.maxRedirects != null) { this._maxRedirects = Math.max(requestOptions.maxRedirects, 0); } if (requestOptions.keepAlive != null) { this._keepAlive = requestOptions.keepAlive; } if (requestOptions.allowRetries != null) { this._allowRetries = requestOptions.allowRetries; } if (requestOptions.maxRetries != null) { this._maxRetries = requestOptions.maxRetries; } } } options(requestUrl, additionalHeaders) { return this.request('OPTIONS', requestUrl, null, additionalHeaders || {}); } get(requestUrl, additionalHeaders) { return this.request('GET', requestUrl, null, additionalHeaders || {}); } del(requestUrl, additionalHeaders) { return this.request('DELETE', requestUrl, null, additionalHeaders || {}); } post(requestUrl, data, additionalHeaders) { return this.request('POST', requestUrl, data, additionalHeaders || {}); } patch(requestUrl, data, additionalHeaders) { return this.request('PATCH', requestUrl, data, additionalHeaders || {}); } put(requestUrl, data, additionalHeaders) { return this.request('PUT', requestUrl, data, additionalHeaders || {}); } head(requestUrl, additionalHeaders) { return this.request('HEAD', requestUrl, null, additionalHeaders || {}); } sendStream(verb, requestUrl, stream, additionalHeaders) { return this.request(verb, requestUrl, stream, additionalHeaders); } /** * Gets a typed object from an endpoint * Be aware that not found returns a null. Other errors (4xx, 5xx) reject the promise */ async getJson(requestUrl, additionalHeaders = {}) { additionalHeaders[Headers.Accept] = this._getExistingOrDefaultHeader(additionalHeaders, Headers.Accept, MediaTypes.ApplicationJson); let res = await this.get(requestUrl, additionalHeaders); return this._processResponse(res, this.requestOptions); } async postJson(requestUrl, obj, additionalHeaders = {}) { let data = JSON.stringify(obj, null, 2); additionalHeaders[Headers.Accept] = this._getExistingOrDefaultHeader(additionalHeaders, Headers.Accept, MediaTypes.ApplicationJson); additionalHeaders[Headers.ContentType] = this._getExistingOrDefaultHeader(additionalHeaders, Headers.ContentType, MediaTypes.ApplicationJson); let res = await this.post(requestUrl, data, additionalHeaders); return this._processResponse(res, this.requestOptions); } async putJson(requestUrl, obj, additionalHeaders = {}) { let data = JSON.stringify(obj, null, 2); additionalHeaders[Headers.Accept] = this._getExistingOrDefaultHeader(additionalHeaders, Headers.Accept, MediaTypes.ApplicationJson); additionalHeaders[Headers.ContentType] = this._getExistingOrDefaultHeader(additionalHeaders, Headers.ContentType, MediaTypes.ApplicationJson); let res = await this.put(requestUrl, data, additionalHeaders); return this._processResponse(res, this.requestOptions); } async patchJson(requestUrl, obj, additionalHeaders = {}) { let data = JSON.stringify(obj, null, 2); additionalHeaders[Headers.Accept] = this._getExistingOrDefaultHeader(additionalHeaders, Headers.Accept, MediaTypes.ApplicationJson); additionalHeaders[Headers.ContentType] = this._getExistingOrDefaultHeader(additionalHeaders, Headers.ContentType, MediaTypes.ApplicationJson); let res = await this.patch(requestUrl, data, additionalHeaders); return this._processResponse(res, this.requestOptions); } /** * Makes a raw http request. * All other methods such as get, post, patch, and request ultimately call this. * Prefer get, del, post and patch */ async request(verb, requestUrl, data, headers) { if (this._disposed) { throw new Error('Client has already been disposed.'); } let parsedUrl = new URL(requestUrl); let info = this._prepareRequest(verb, parsedUrl, headers); // Only perform retries on reads since writes may not be idempotent. let maxTries = this._allowRetries && RetryableHttpVerbs.indexOf(verb) != -1 ? this._maxRetries + 1 : 1; let numTries = 0; let response; while (numTries < maxTries) { response = await this.requestRaw(info, data); // Check if it's an authentication challenge if (response && response.message && response.message.statusCode === HttpCodes.Unauthorized) { let authenticationHandler; for (let i = 0; i < this.handlers.length; i++) { if (this.handlers[i].canHandleAuthentication(response)) { authenticationHandler = this.handlers[i]; break; } } if (authenticationHandler) { return authenticationHandler.handleAuthentication(this, info, data); } else { // We have received an unauthorized response but have no handlers to handle it. // Let the response return to the caller. return response; } } let redirectsRemaining = this._maxRedirects; while (HttpRedirectCodes.indexOf(response.message.statusCode) != -1 && this._allowRedirects && redirectsRemaining > 0) { const redirectUrl = response.message.headers['location']; if (!redirectUrl) { // if there's no location to redirect to, we won't break; } let parsedRedirectUrl = new URL(redirectUrl); if (parsedUrl.protocol == 'https:' && parsedUrl.protocol != parsedRedirectUrl.protocol && !this._allowRedirectDowngrade) { throw new Error('Redirect from HTTPS to HTTP protocol. This downgrade is not allowed for security reasons. If you want to allow this behavior, set the allowRedirectDowngrade option to true.'); } // we need to finish reading the response before reassigning response // which will leak the open socket. await response.readBody(); // strip authorization header if redirected to a different hostname if (parsedRedirectUrl.hostname !== parsedUrl.hostname) { for (let header in headers) { // header names are case insensitive if (header.toLowerCase() === 'authorization') { delete headers[header]; } } } // let's make the request with the new redirectUrl info = this._prepareRequest(verb, parsedRedirectUrl, headers); response = await this.requestRaw(info, data); redirectsRemaining--; } if (HttpResponseRetryCodes.indexOf(response.message.statusCode) == -1) { // If not a retry code, return immediately instead of retrying return response; } numTries += 1; if (numTries < maxTries) { await response.readBody(); await this._performExponentialBackoff(numTries); } } return response; } /** * Needs to be called if keepAlive is set to true in request options. */ dispose() { if (this._agent) { this._agent.destroy(); } this._disposed = true; } /** * Raw request. * @param info * @param data */ requestRaw(info, data) { return new Promise((resolve, reject) => { let callbackForResult = function (err, res) { if (err) { reject(err); } resolve(res); }; this.requestRawWithCallback(info, data, callbackForResult); }); } /** * Raw request with callback. * @param info * @param data * @param onResult */ requestRawWithCallback(info, data, onResult) { let socket; if (typeof data === 'string') { info.options.headers['Content-Length'] = Buffer.byteLength(data, 'utf8'); } let callbackCalled = false; let handleResult = (err, res) => { if (!callbackCalled) { callbackCalled = true; onResult(err, res); } }; let req = info.httpModule.request(info.options, (msg) => { let res = new HttpClientResponse(msg); handleResult(null, res); }); req.on('socket', sock => { socket = sock; }); // If we ever get disconnected, we want the socket to timeout eventually req.setTimeout(this._socketTimeout || 3 * 60000, () => { if (socket) { socket.end(); } handleResult(new Error('Request timeout: ' + info.options.path), null); }); req.on('error', function (err) { // err has statusCode property // res should have headers handleResult(err, null); }); if (data && typeof data === 'string') { req.write(data, 'utf8'); } if (data && typeof data !== 'string') { data.on('close', function () { req.end(); }); data.pipe(req); } else { req.end(); } } /** * Gets an http agent. This function is useful when you need an http agent that handles * routing through a proxy server - depending upon the url and proxy environment variables. * @param serverUrl The server URL where the request will be sent. For example, https://api.github.com */ getAgent(serverUrl) { let parsedUrl = new URL(serverUrl); return this._getAgent(parsedUrl); } _prepareRequest(method, requestUrl, headers) { const info = {}; info.parsedUrl = requestUrl; const usingSsl = info.parsedUrl.protocol === 'https:'; info.httpModule = usingSsl ? https : http; const defaultPort = usingSsl ? 443 : 80; info.options = {}; info.options.host = info.parsedUrl.hostname; info.options.port = info.parsedUrl.port ? parseInt(info.parsedUrl.port) : defaultPort; info.options.path = (info.parsedUrl.pathname || '') + (info.parsedUrl.search || ''); info.options.method = method; info.options.headers = this._mergeHeaders(headers); if (this.userAgent != null) { info.options.headers['user-agent'] = this.userAgent; } info.options.agent = this._getAgent(info.parsedUrl); // gives handlers an opportunity to participate if (this.handlers) { this.handlers.forEach(handler => { handler.prepareRequest(info.options); }); } return info; } _mergeHeaders(headers) { const lowercaseKeys = obj => Object.keys(obj).reduce((c, k) => ((c[k.toLowerCase()] = obj[k]), c), {}); if (this.requestOptions && this.requestOptions.headers) { return Object.assign({}, lowercaseKeys(this.requestOptions.headers), lowercaseKeys(headers)); } return lowercaseKeys(headers || {}); } _getExistingOrDefaultHeader(additionalHeaders, header, _default) { const lowercaseKeys = obj => Object.keys(obj).reduce((c, k) => ((c[k.toLowerCase()] = obj[k]), c), {}); let clientHeader; if (this.requestOptions && this.requestOptions.headers) { clientHeader = lowercaseKeys(this.requestOptions.headers)[header]; } return additionalHeaders[header] || clientHeader || _default; } _getAgent(parsedUrl) { let agent; let proxyUrl = pm.getProxyUrl(parsedUrl); let useProxy = proxyUrl && proxyUrl.hostname; if (this._keepAlive && useProxy) { agent = this._proxyAgent; } if (this._keepAlive && !useProxy) { agent = this._agent; } // if agent is already assigned use that agent. if (!!agent) { return agent; } const usingSsl = parsedUrl.protocol === 'https:'; let maxSockets = 100; if (!!this.requestOptions) { maxSockets = this.requestOptions.maxSockets || http.globalAgent.maxSockets; } if (useProxy) { // If using proxy, need tunnel if (!tunnel) { tunnel = __nccwpck_require__(4294); } const agentOptions = { maxSockets: maxSockets, keepAlive: this._keepAlive, proxy: { ...((proxyUrl.username || proxyUrl.password) && { proxyAuth: `${proxyUrl.username}:${proxyUrl.password}` }), host: proxyUrl.hostname, port: proxyUrl.port } }; let tunnelAgent; const overHttps = proxyUrl.protocol === 'https:'; if (usingSsl) { tunnelAgent = overHttps ? tunnel.httpsOverHttps : tunnel.httpsOverHttp; } else { tunnelAgent = overHttps ? tunnel.httpOverHttps : tunnel.httpOverHttp; } agent = tunnelAgent(agentOptions); this._proxyAgent = agent; } // if reusing agent across request and tunneling agent isn't assigned create a new agent if (this._keepAlive && !agent) { const options = { keepAlive: this._keepAlive, maxSockets: maxSockets }; agent = usingSsl ? new https.Agent(options) : new http.Agent(options); this._agent = agent; } // if not using private agent and tunnel agent isn't setup then use global agent if (!agent) { agent = usingSsl ? https.globalAgent : http.globalAgent; } if (usingSsl && this._ignoreSslError) { // we don't want to set NODE_TLS_REJECT_UNAUTHORIZED=0 since that will affect request for entire process // http.RequestOptions doesn't expose a way to modify RequestOptions.agent.options // we have to cast it to any and change it directly agent.options = Object.assign(agent.options || {}, { rejectUnauthorized: false }); } return agent; } _performExponentialBackoff(retryNumber) { retryNumber = Math.min(ExponentialBackoffCeiling, retryNumber); const ms = ExponentialBackoffTimeSlice * Math.pow(2, retryNumber); return new Promise(resolve => setTimeout(() => resolve(), ms)); } static dateTimeDeserializer(key, value) { if (typeof value === 'string') { let a = new Date(value); if (!isNaN(a.valueOf())) { return a; } } return value; } async _processResponse(res, options) { return new Promise(async (resolve, reject) => { const statusCode = res.message.statusCode; const response = { statusCode: statusCode, result: null, headers: {} }; // not found leads to null obj returned if (statusCode == HttpCodes.NotFound) { resolve(response); } let obj; let contents; // get the result from the body try { contents = await res.readBody(); if (contents && contents.length > 0) { if (options && options.deserializeDates) { obj = JSON.parse(contents, HttpClient.dateTimeDeserializer); } else { obj = JSON.parse(contents); } response.result = obj; } response.headers = res.message.headers; } catch (err) { // Invalid resource (contents not json); leaving result obj null } // note that 3xx redirects are handled by the http layer. if (statusCode > 299) { let msg; // if exception/error in body, attempt to get better error if (obj && obj.message) { msg = obj.message; } else if (contents && contents.length > 0) { // it may be the case that the exception is in the body message as string msg = contents; } else { msg = 'Failed request: (' + statusCode + ')'; } let err = new HttpClientError(msg, statusCode); err.result = response.result; reject(err); } else { resolve(response); } }); } } exports.HttpClient = HttpClient; /***/ }), /***/ 6443: /***/ ((__unused_webpack_module, exports) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); function getProxyUrl(reqUrl) { let usingSsl = reqUrl.protocol === 'https:'; let proxyUrl; if (checkBypass(reqUrl)) { return proxyUrl; } let proxyVar; if (usingSsl) { proxyVar = process.env['https_proxy'] || process.env['HTTPS_PROXY']; } else { proxyVar = process.env['http_proxy'] || process.env['HTTP_PROXY']; } if (proxyVar) { proxyUrl = new URL(proxyVar); } return proxyUrl; } exports.getProxyUrl = getProxyUrl; function checkBypass(reqUrl) { if (!reqUrl.hostname) { return false; } let noProxy = process.env['no_proxy'] || process.env['NO_PROXY'] || ''; if (!noProxy) { return false; } // Determine the request port let reqPort; if (reqUrl.port) { reqPort = Number(reqUrl.port); } else if (reqUrl.protocol === 'http:') { reqPort = 80; } else if (reqUrl.protocol === 'https:') { reqPort = 443; } // Format the request hostname and hostname with port let upperReqHosts = [reqUrl.hostname.toUpperCase()]; if (typeof reqPort === 'number') { upperReqHosts.push(`${upperReqHosts[0]}:${reqPort}`); } // Compare request host against noproxy for (let upperNoProxyItem of noProxy .split(',') .map(x => x.trim().toUpperCase()) .filter(x => x)) { if (upperReqHosts.some(x => x === upperNoProxyItem)) { return true; } } return false; } exports.checkBypass = checkBypass; /***/ }), /***/ 334: /***/ ((__unused_webpack_module, exports) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); async function auth(token) { const tokenType = token.split(/\./).length === 3 ? "app" : /^v\d+\./.test(token) ? "installation" : "oauth"; return { type: "token", token: token, tokenType }; } /** * Prefix token for usage in the Authorization header * * @param token OAuth token or JSON Web Token */ function withAuthorizationPrefix(token) { if (token.split(/\./).length === 3) { return `bearer ${token}`; } return `token ${token}`; } async function hook(token, request, route, parameters) { const endpoint = request.endpoint.merge(route, parameters); endpoint.headers.authorization = withAuthorizationPrefix(token); return request(endpoint); } const createTokenAuth = function createTokenAuth(token) { if (!token) { throw new Error("[@octokit/auth-token] No token passed to createTokenAuth"); } if (typeof token !== "string") { throw new Error("[@octokit/auth-token] Token passed to createTokenAuth is not a string"); } token = token.replace(/^(token|bearer) +/i, ""); return Object.assign(auth.bind(null, token), { hook: hook.bind(null, token) }); }; exports.createTokenAuth = createTokenAuth; //# sourceMappingURL=index.js.map /***/ }), /***/ 6762: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); var universalUserAgent = __nccwpck_require__(5030); var beforeAfterHook = __nccwpck_require__(3682); var request = __nccwpck_require__(6234); var graphql = __nccwpck_require__(8467); var authToken = __nccwpck_require__(334); function _objectWithoutPropertiesLoose(source, excluded) { if (source == null) return {}; var target = {}; var sourceKeys = Object.keys(source); var key, i; for (i = 0; i < sourceKeys.length; i++) { key = sourceKeys[i]; if (excluded.indexOf(key) >= 0) continue; target[key] = source[key]; } return target; } function _objectWithoutProperties(source, excluded) { if (source == null) return {}; var target = _objectWithoutPropertiesLoose(source, excluded); var key, i; if (Object.getOwnPropertySymbols) { var sourceSymbolKeys = Object.getOwnPropertySymbols(source); for (i = 0; i < sourceSymbolKeys.length; i++) { key = sourceSymbolKeys[i]; if (excluded.indexOf(key) >= 0) continue; if (!Object.prototype.propertyIsEnumerable.call(source, key)) continue; target[key] = source[key]; } } return target; } const VERSION = "3.4.0"; class Octokit { constructor(options = {}) { const hook = new beforeAfterHook.Collection(); const requestDefaults = { baseUrl: request.request.endpoint.DEFAULTS.baseUrl, headers: {}, request: Object.assign({}, options.request, { // @ts-ignore internal usage only, no need to type hook: hook.bind(null, "request") }), mediaType: { previews: [], format: "" } }; // prepend default user agent with `options.userAgent` if set requestDefaults.headers["user-agent"] = [options.userAgent, `octokit-core.js/${VERSION} ${universalUserAgent.getUserAgent()}`].filter(Boolean).join(" "); if (options.baseUrl) { requestDefaults.baseUrl = options.baseUrl; } if (options.previews) { requestDefaults.mediaType.previews = options.previews; } if (options.timeZone) { requestDefaults.headers["time-zone"] = options.timeZone; } this.request = request.request.defaults(requestDefaults); this.graphql = graphql.withCustomRequest(this.request).defaults(requestDefaults); this.log = Object.assign({ debug: () => {}, info: () => {}, warn: console.warn.bind(console), error: console.error.bind(console) }, options.log); this.hook = hook; // (1) If neither `options.authStrategy` nor `options.auth` are set, the `octokit` instance // is unauthenticated. The `this.auth()` method is a no-op and no request hook is registered. // (2) If only `options.auth` is set, use the default token authentication strategy. // (3) If `options.authStrategy` is set then use it and pass in `options.auth`. Always pass own request as many strategies accept a custom request instance. // TODO: type `options.auth` based on `options.authStrategy`. if (!options.authStrategy) { if (!options.auth) { // (1) this.auth = async () => ({ type: "unauthenticated" }); } else { // (2) const auth = authToken.createTokenAuth(options.auth); // @ts-ignore ¯\_(ツ)_/¯ hook.wrap("request", auth.hook); this.auth = auth; } } else { const { authStrategy } = options, otherOptions = _objectWithoutProperties(options, ["authStrategy"]); const auth = authStrategy(Object.assign({ request: this.request, log: this.log, // we pass the current octokit instance as well as its constructor options // to allow for authentication strategies that return a new octokit instance // that shares the same internal state as the current one. The original // requirement for this was the "event-octokit" authentication strategy // of https://github.com/probot/octokit-auth-probot. octokit: this, octokitOptions: otherOptions }, options.auth)); // @ts-ignore ¯\_(ツ)_/¯ hook.wrap("request", auth.hook); this.auth = auth; } // apply plugins // https://stackoverflow.com/a/16345172 const classConstructor = this.constructor; classConstructor.plugins.forEach(plugin => { Object.assign(this, plugin(this, options)); }); } static defaults(defaults) { const OctokitWithDefaults = class extends this { constructor(...args) { const options = args[0] || {}; if (typeof defaults === "function") { super(defaults(options)); return; } super(Object.assign({}, defaults, options, options.userAgent && defaults.userAgent ? { userAgent: `${options.userAgent} ${defaults.userAgent}` } : null)); } }; return OctokitWithDefaults; } /** * Attach a plugin (or many) to your Octokit instance. * * @example * const API = Octokit.plugin(plugin1, plugin2, plugin3, ...) */ static plugin(...newPlugins) { var _a; const currentPlugins = this.plugins; const NewOctokit = (_a = class extends this {}, _a.plugins = currentPlugins.concat(newPlugins.filter(plugin => !currentPlugins.includes(plugin))), _a); return NewOctokit; } } Octokit.VERSION = VERSION; Octokit.plugins = []; exports.Octokit = Octokit; //# sourceMappingURL=index.js.map /***/ }), /***/ 9440: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); var isPlainObject = __nccwpck_require__(3287); var universalUserAgent = __nccwpck_require__(5030); function lowercaseKeys(object) { if (!object) { return {}; } return Object.keys(object).reduce((newObj, key) => { newObj[key.toLowerCase()] = object[key]; return newObj; }, {}); } function mergeDeep(defaults, options) { const result = Object.assign({}, defaults); Object.keys(options).forEach(key => { if (isPlainObject.isPlainObject(options[key])) { if (!(key in defaults)) Object.assign(result, { [key]: options[key] });else result[key] = mergeDeep(defaults[key], options[key]); } else { Object.assign(result, { [key]: options[key] }); } }); return result; } function removeUndefinedProperties(obj) { for (const key in obj) { if (obj[key] === undefined) { delete obj[key]; } } return obj; } function merge(defaults, route, options) { if (typeof route === "string") { let [method, url] = route.split(" "); options = Object.assign(url ? { method, url } : { url: method }, options); } else { options = Object.assign({}, route); } // lowercase header names before merging with defaults to avoid duplicates options.headers = lowercaseKeys(options.headers); // remove properties with undefined values before merging removeUndefinedProperties(options); removeUndefinedProperties(options.headers); const mergedOptions = mergeDeep(defaults || {}, options); // mediaType.previews arrays are merged, instead of overwritten if (defaults && defaults.mediaType.previews.length) { mergedOptions.mediaType.previews = defaults.mediaType.previews.filter(preview => !mergedOptions.mediaType.previews.includes(preview)).concat(mergedOptions.mediaType.previews); } mergedOptions.mediaType.previews = mergedOptions.mediaType.previews.map(preview => preview.replace(/-preview/, "")); return mergedOptions; } function addQueryParameters(url, parameters) { const separator = /\?/.test(url) ? "&" : "?"; const names = Object.keys(parameters); if (names.length === 0) { return url; } return url + separator + names.map(name => { if (name === "q") { return "q=" + parameters.q.split("+").map(encodeURIComponent).join("+"); } return `${name}=${encodeURIComponent(parameters[name])}`; }).join("&"); } const urlVariableRegex = /\{[^}]+\}/g; function removeNonChars(variableName) { return variableName.replace(/^\W+|\W+$/g, "").split(/,/); } function extractUrlVariableNames(url) { const matches = url.match(urlVariableRegex); if (!matches) { return []; } return matches.map(removeNonChars).reduce((a, b) => a.concat(b), []); } function omit(object, keysToOmit) { return Object.keys(object).filter(option => !keysToOmit.includes(option)).reduce((obj, key) => { obj[key] = object[key]; return obj; }, {}); } // Based on https://github.com/bramstein/url-template, licensed under BSD // TODO: create separate package. // // Copyright (c) 2012-2014, Bram Stein // All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO // EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, // INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* istanbul ignore file */ function encodeReserved(str) { return str.split(/(%[0-9A-Fa-f]{2})/g).map(function (part) { if (!/%[0-9A-Fa-f]/.test(part)) { part = encodeURI(part).replace(/%5B/g, "[").replace(/%5D/g, "]"); } return part; }).join(""); } function encodeUnreserved(str) { return encodeURIComponent(str).replace(/[!'()*]/g, function (c) { return "%" + c.charCodeAt(0).toString(16).toUpperCase(); }); } function encodeValue(operator, value, key) { value = operator === "+" || operator === "#" ? encodeReserved(value) : encodeUnreserved(value); if (key) { return encodeUnreserved(key) + "=" + value; } else { return value; } } function isDefined(value) { return value !== undefined && value !== null; } function isKeyOperator(operator) { return operator === ";" || operator === "&" || operator === "?"; } function getValues(context, operator, key, modifier) { var value = context[key], result = []; if (isDefined(value) && value !== "") { if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") { value = value.toString(); if (modifier && modifier !== "*") { value = value.substring(0, parseInt(modifier, 10)); } result.push(encodeValue(operator, value, isKeyOperator(operator) ? key : "")); } else { if (modifier === "*") { if (Array.isArray(value)) { value.filter(isDefined).forEach(function (value) { result.push(encodeValue(operator, value, isKeyOperator(operator) ? key : "")); }); } else { Object.keys(value).forEach(function (k) { if (isDefined(value[k])) { result.push(encodeValue(operator, value[k], k)); } }); } } else { const tmp = []; if (Array.isArray(value)) { value.filter(isDefined).forEach(function (value) { tmp.push(encodeValue(operator, value)); }); } else { Object.keys(value).forEach(function (k) { if (isDefined(value[k])) { tmp.push(encodeUnreserved(k)); tmp.push(encodeValue(operator, value[k].toString())); } }); } if (isKeyOperator(operator)) { result.push(encodeUnreserved(key) + "=" + tmp.join(",")); } else if (tmp.length !== 0) { result.push(tmp.join(",")); } } } } else { if (operator === ";") { if (isDefined(value)) { result.push(encodeUnreserved(key)); } } else if (value === "" && (operator === "&" || operator === "?")) { result.push(encodeUnreserved(key) + "="); } else if (value === "") { result.push(""); } } return result; } function parseUrl(template) { return { expand: expand.bind(null, template) }; } function expand(template, context) { var operators = ["+", "#", ".", "/", ";", "?", "&"]; return template.replace(/\{([^\{\}]+)\}|([^\{\}]+)/g, function (_, expression, literal) { if (expression) { let operator = ""; const values = []; if (operators.indexOf(expression.charAt(0)) !== -1) { operator = expression.charAt(0); expression = expression.substr(1); } expression.split(/,/g).forEach(function (variable) { var tmp = /([^:\*]*)(?::(\d+)|(\*))?/.exec(variable); values.push(getValues(context, operator, tmp[1], tmp[2] || tmp[3])); }); if (operator && operator !== "+") { var separator = ","; if (operator === "?") { separator = "&"; } else if (operator !== "#") { separator = operator; } return (values.length !== 0 ? operator : "") + values.join(separator); } else { return values.join(","); } } else { return encodeReserved(literal); } }); } function parse(options) { // https://fetch.spec.whatwg.org/#methods let method = options.method.toUpperCase(); // replace :varname with {varname} to make it RFC 6570 compatible let url = (options.url || "/").replace(/:([a-z]\w+)/g, "{$1}"); let headers = Object.assign({}, options.headers); let body; let parameters = omit(options, ["method", "baseUrl", "url", "headers", "request", "mediaType"]); // extract variable names from URL to calculate remaining variables later const urlVariableNames = extractUrlVariableNames(url); url = parseUrl(url).expand(parameters); if (!/^http/.test(url)) { url = options.baseUrl + url; } const omittedParameters = Object.keys(options).filter(option => urlVariableNames.includes(option)).concat("baseUrl"); const remainingParameters = omit(parameters, omittedParameters); const isBinaryRequest = /application\/octet-stream/i.test(headers.accept); if (!isBinaryRequest) { if (options.mediaType.format) { // e.g. application/vnd.github.v3+json => application/vnd.github.v3.raw headers.accept = headers.accept.split(/,/).map(preview => preview.replace(/application\/vnd(\.\w+)(\.v3)?(\.\w+)?(\+json)?$/, `application/vnd$1$2.${options.mediaType.format}`)).join(","); } if (options.mediaType.previews.length) { const previewsFromAcceptHeader = headers.accept.match(/[\w-]+(?=-preview)/g) || []; headers.accept = previewsFromAcceptHeader.concat(options.mediaType.previews).map(preview => { const format = options.mediaType.format ? `.${options.mediaType.format}` : "+json"; return `application/vnd.github.${preview}-preview${format}`; }).join(","); } } // for GET/HEAD requests, set URL query parameters from remaining parameters // for PATCH/POST/PUT/DELETE requests, set request body from remaining parameters if (["GET", "HEAD"].includes(method)) { url = addQueryParameters(url, remainingParameters); } else { if ("data" in remainingParameters) { body = remainingParameters.data; } else { if (Object.keys(remainingParameters).length) { body = remainingParameters; } else { headers["content-length"] = 0; } } } // default content-type for JSON if body is set if (!headers["content-type"] && typeof body !== "undefined") { headers["content-type"] = "application/json; charset=utf-8"; } // GitHub expects 'content-length: 0' header for PUT/PATCH requests without body. // fetch does not allow to set `content-length` header, but we can set body to an empty string if (["PATCH", "PUT"].includes(method) && typeof body === "undefined") { body = ""; } // Only return body/request keys if present return Object.assign({ method, url, headers }, typeof body !== "undefined" ? { body } : null, options.request ? { request: options.request } : null); } function endpointWithDefaults(defaults, route, options) { return parse(merge(defaults, route, options)); } function withDefaults(oldDefaults, newDefaults) { const DEFAULTS = merge(oldDefaults, newDefaults); const endpoint = endpointWithDefaults.bind(null, DEFAULTS); return Object.assign(endpoint, { DEFAULTS, defaults: withDefaults.bind(null, DEFAULTS), merge: merge.bind(null, DEFAULTS), parse }); } const VERSION = "6.0.11"; const userAgent = `octokit-endpoint.js/${VERSION} ${universalUserAgent.getUserAgent()}`; // DEFAULTS has all properties set that EndpointOptions has, except url. // So we use RequestParameters and add method as additional required property. const DEFAULTS = { method: "GET", baseUrl: "https://api.github.com", headers: { accept: "application/vnd.github.v3+json", "user-agent": userAgent }, mediaType: { format: "", previews: [] } }; const endpoint = withDefaults(null, DEFAULTS); exports.endpoint = endpoint; //# sourceMappingURL=index.js.map /***/ }), /***/ 8467: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); var request = __nccwpck_require__(6234); var universalUserAgent = __nccwpck_require__(5030); const VERSION = "4.6.1"; class GraphqlError extends Error { constructor(request, response) { const message = response.data.errors[0].message; super(message); Object.assign(this, response.data); Object.assign(this, { headers: response.headers }); this.name = "GraphqlError"; this.request = request; // Maintains proper stack trace (only available on V8) /* istanbul ignore next */ if (Error.captureStackTrace) { Error.captureStackTrace(this, this.constructor); } } } const NON_VARIABLE_OPTIONS = ["method", "baseUrl", "url", "headers", "request", "query", "mediaType"]; const FORBIDDEN_VARIABLE_OPTIONS = ["query", "method", "url"]; const GHES_V3_SUFFIX_REGEX = /\/api\/v3\/?$/; function graphql(request, query, options) { if (options) { if (typeof query === "string" && "query" in options) { return Promise.reject(new Error(`[@octokit/graphql] "query" cannot be used as variable name`)); } for (const key in options) { if (!FORBIDDEN_VARIABLE_OPTIONS.includes(key)) continue; return Promise.reject(new Error(`[@octokit/graphql] "${key}" cannot be used as variable name`)); } } const parsedOptions = typeof query === "string" ? Object.assign({ query }, options) : query; const requestOptions = Object.keys(parsedOptions).reduce((result, key) => { if (NON_VARIABLE_OPTIONS.includes(key)) { result[key] = parsedOptions[key]; return result; } if (!result.variables) { result.variables = {}; } result.variables[key] = parsedOptions[key]; return result; }, {}); // workaround for GitHub Enterprise baseUrl set with /api/v3 suffix // https://github.com/octokit/auth-app.js/issues/111#issuecomment-657610451 const baseUrl = parsedOptions.baseUrl || request.endpoint.DEFAULTS.baseUrl; if (GHES_V3_SUFFIX_REGEX.test(baseUrl)) { requestOptions.url = baseUrl.replace(GHES_V3_SUFFIX_REGEX, "/api/graphql"); } return request(requestOptions).then(response => { if (response.data.errors) { const headers = {}; for (const key of Object.keys(response.headers)) { headers[key] = response.headers[key]; } throw new GraphqlError(requestOptions, { headers, data: response.data }); } return response.data.data; }); } function withDefaults(request$1, newDefaults) { const newRequest = request$1.defaults(newDefaults); const newApi = (query, options) => { return graphql(newRequest, query, options); }; return Object.assign(newApi, { defaults: withDefaults.bind(null, newRequest), endpoint: request.request.endpoint }); } const graphql$1 = withDefaults(request.request, { headers: { "user-agent": `octokit-graphql.js/${VERSION} ${universalUserAgent.getUserAgent()}` }, method: "POST", url: "/graphql" }); function withCustomRequest(customRequest) { return withDefaults(customRequest, { method: "POST", url: "/graphql" }); } exports.graphql = graphql$1; exports.withCustomRequest = withCustomRequest; //# sourceMappingURL=index.js.map /***/ }), /***/ 4193: /***/ ((__unused_webpack_module, exports) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); const VERSION = "2.13.3"; /** * Some “list” response that can be paginated have a different response structure * * They have a `total_count` key in the response (search also has `incomplete_results`, * /installation/repositories also has `repository_selection`), as well as a key with * the list of the items which name varies from endpoint to endpoint. * * Octokit normalizes these responses so that paginated results are always returned following * the same structure. One challenge is that if the list response has only one page, no Link * header is provided, so this header alone is not sufficient to check wether a response is * paginated or not. * * We check if a "total_count" key is present in the response data, but also make sure that * a "url" property is not, as the "Get the combined status for a specific ref" endpoint would * otherwise match: https://developer.github.com/v3/repos/statuses/#get-the-combined-status-for-a-specific-ref */ function normalizePaginatedListResponse(response) { const responseNeedsNormalization = "total_count" in response.data && !("url" in response.data); if (!responseNeedsNormalization) return response; // keep the additional properties intact as there is currently no other way // to retrieve the same information. const incompleteResults = response.data.incomplete_results; const repositorySelection = response.data.repository_selection; const totalCount = response.data.total_count; delete response.data.incomplete_results; delete response.data.repository_selection; delete response.data.total_count; const namespaceKey = Object.keys(response.data)[0]; const data = response.data[namespaceKey]; response.data = data; if (typeof incompleteResults !== "undefined") { response.data.incomplete_results = incompleteResults; } if (typeof repositorySelection !== "undefined") { response.data.repository_selection = repositorySelection; } response.data.total_count = totalCount; return response; } function iterator(octokit, route, parameters) { const options = typeof route === "function" ? route.endpoint(parameters) : octokit.request.endpoint(route, parameters); const requestMethod = typeof route === "function" ? route : octokit.request; const method = options.method; const headers = options.headers; let url = options.url; return { [Symbol.asyncIterator]: () => ({ async next() { if (!url) return { done: true }; const response = await requestMethod({ method, url, headers }); const normalizedResponse = normalizePaginatedListResponse(response); // `response.headers.link` format: // '; rel="next", ; rel="last"' // sets `url` to undefined if "next" URL is not present or `link` header is not set url = ((normalizedResponse.headers.link || "").match(/<([^>]+)>;\s*rel="next"/) || [])[1]; return { value: normalizedResponse }; } }) }; } function paginate(octokit, route, parameters, mapFn) { if (typeof parameters === "function") { mapFn = parameters; parameters = undefined; } return gather(octokit, [], iterator(octokit, route, parameters)[Symbol.asyncIterator](), mapFn); } function gather(octokit, results, iterator, mapFn) { return iterator.next().then(result => { if (result.done) { return results; } let earlyExit = false; function done() { earlyExit = true; } results = results.concat(mapFn ? mapFn(result.value, done) : result.value.data); if (earlyExit) { return results; } return gather(octokit, results, iterator, mapFn); }); } const composePaginateRest = Object.assign(paginate, { iterator }); const paginatingEndpoints = ["GET /app/installations", "GET /applications/grants", "GET /authorizations", "GET /enterprises/{enterprise}/actions/permissions/organizations", "GET /enterprises/{enterprise}/actions/runner-groups", "GET /enterprises/{enterprise}/actions/runner-groups/{runner_group_id}/organizations", "GET /enterprises/{enterprise}/actions/runner-groups/{runner_group_id}/runners", "GET /enterprises/{enterprise}/actions/runners", "GET /enterprises/{enterprise}/actions/runners/downloads", "GET /events", "GET /gists", "GET /gists/public", "GET /gists/starred", "GET /gists/{gist_id}/comments", "GET /gists/{gist_id}/commits", "GET /gists/{gist_id}/forks", "GET /installation/repositories", "GET /issues", "GET /marketplace_listing/plans", "GET /marketplace_listing/plans/{plan_id}/accounts", "GET /marketplace_listing/stubbed/plans", "GET /marketplace_listing/stubbed/plans/{plan_id}/accounts", "GET /networks/{owner}/{repo}/events", "GET /notifications", "GET /organizations", "GET /orgs/{org}/actions/permissions/repositories", "GET /orgs/{org}/actions/runner-groups", "GET /orgs/{org}/actions/runner-groups/{runner_group_id}/repositories", "GET /orgs/{org}/actions/runner-groups/{runner_group_id}/runners", "GET /orgs/{org}/actions/runners", "GET /orgs/{org}/actions/runners/downloads", "GET /orgs/{org}/actions/secrets", "GET /orgs/{org}/actions/secrets/{secret_name}/repositories", "GET /orgs/{org}/blocks", "GET /orgs/{org}/credential-authorizations", "GET /orgs/{org}/events", "GET /orgs/{org}/failed_invitations", "GET /orgs/{org}/hooks", "GET /orgs/{org}/installations", "GET /orgs/{org}/invitations", "GET /orgs/{org}/invitations/{invitation_id}/teams", "GET /orgs/{org}/issues", "GET /orgs/{org}/members", "GET /orgs/{org}/migrations", "GET /orgs/{org}/migrations/{migration_id}/repositories", "GET /orgs/{org}/outside_collaborators", "GET /orgs/{org}/projects", "GET /orgs/{org}/public_members", "GET /orgs/{org}/repos", "GET /orgs/{org}/team-sync/groups", "GET /orgs/{org}/teams", "GET /orgs/{org}/teams/{team_slug}/discussions", "GET /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments", "GET /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments/{comment_number}/reactions", "GET /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/reactions", "GET /orgs/{org}/teams/{team_slug}/invitations", "GET /orgs/{org}/teams/{team_slug}/members", "GET /orgs/{org}/teams/{team_slug}/projects", "GET /orgs/{org}/teams/{team_slug}/repos", "GET /orgs/{org}/teams/{team_slug}/team-sync/group-mappings", "GET /orgs/{org}/teams/{team_slug}/teams", "GET /projects/columns/{column_id}/cards", "GET /projects/{project_id}/collaborators", "GET /projects/{project_id}/columns", "GET /repos/{owner}/{repo}/actions/artifacts", "GET /repos/{owner}/{repo}/actions/runners", "GET /repos/{owner}/{repo}/actions/runners/downloads", "GET /repos/{owner}/{repo}/actions/runs", "GET /repos/{owner}/{repo}/actions/runs/{run_id}/artifacts", "GET /repos/{owner}/{repo}/actions/runs/{run_id}/jobs", "GET /repos/{owner}/{repo}/actions/secrets", "GET /repos/{owner}/{repo}/actions/workflows", "GET /repos/{owner}/{repo}/actions/workflows/{workflow_id}/runs", "GET /repos/{owner}/{repo}/assignees", "GET /repos/{owner}/{repo}/branches", "GET /repos/{owner}/{repo}/check-runs/{check_run_id}/annotations", "GET /repos/{owner}/{repo}/check-suites/{check_suite_id}/check-runs", "GET /repos/{owner}/{repo}/code-scanning/alerts", "GET /repos/{owner}/{repo}/code-scanning/alerts/{alert_number}/instances", "GET /repos/{owner}/{repo}/code-scanning/analyses", "GET /repos/{owner}/{repo}/collaborators", "GET /repos/{owner}/{repo}/comments", "GET /repos/{owner}/{repo}/comments/{comment_id}/reactions", "GET /repos/{owner}/{repo}/commits", "GET /repos/{owner}/{repo}/commits/{commit_sha}/branches-where-head", "GET /repos/{owner}/{repo}/commits/{commit_sha}/comments", "GET /repos/{owner}/{repo}/commits/{commit_sha}/pulls", "GET /repos/{owner}/{repo}/commits/{ref}/check-runs", "GET /repos/{owner}/{repo}/commits/{ref}/check-suites", "GET /repos/{owner}/{repo}/commits/{ref}/statuses", "GET /repos/{owner}/{repo}/contributors", "GET /repos/{owner}/{repo}/deployments", "GET /repos/{owner}/{repo}/deployments/{deployment_id}/statuses", "GET /repos/{owner}/{repo}/events", "GET /repos/{owner}/{repo}/forks", "GET /repos/{owner}/{repo}/git/matching-refs/{ref}", "GET /repos/{owner}/{repo}/hooks", "GET /repos/{owner}/{repo}/invitations", "GET /repos/{owner}/{repo}/issues", "GET /repos/{owner}/{repo}/issues/comments", "GET /repos/{owner}/{repo}/issues/comments/{comment_id}/reactions", "GET /repos/{owner}/{repo}/issues/events", "GET /repos/{owner}/{repo}/issues/{issue_number}/comments", "GET /repos/{owner}/{repo}/issues/{issue_number}/events", "GET /repos/{owner}/{repo}/issues/{issue_number}/labels", "GET /repos/{owner}/{repo}/issues/{issue_number}/reactions", "GET /repos/{owner}/{repo}/issues/{issue_number}/timeline", "GET /repos/{owner}/{repo}/keys", "GET /repos/{owner}/{repo}/labels", "GET /repos/{owner}/{repo}/milestones", "GET /repos/{owner}/{repo}/milestones/{milestone_number}/labels", "GET /repos/{owner}/{repo}/notifications", "GET /repos/{owner}/{repo}/pages/builds", "GET /repos/{owner}/{repo}/projects", "GET /repos/{owner}/{repo}/pulls", "GET /repos/{owner}/{repo}/pulls/comments", "GET /repos/{owner}/{repo}/pulls/comments/{comment_id}/reactions", "GET /repos/{owner}/{repo}/pulls/{pull_number}/comments", "GET /repos/{owner}/{repo}/pulls/{pull_number}/commits", "GET /repos/{owner}/{repo}/pulls/{pull_number}/files", "GET /repos/{owner}/{repo}/pulls/{pull_number}/requested_reviewers", "GET /repos/{owner}/{repo}/pulls/{pull_number}/reviews", "GET /repos/{owner}/{repo}/pulls/{pull_number}/reviews/{review_id}/comments", "GET /repos/{owner}/{repo}/releases", "GET /repos/{owner}/{repo}/releases/{release_id}/assets", "GET /repos/{owner}/{repo}/secret-scanning/alerts", "GET /repos/{owner}/{repo}/stargazers", "GET /repos/{owner}/{repo}/subscribers", "GET /repos/{owner}/{repo}/tags", "GET /repos/{owner}/{repo}/teams", "GET /repositories", "GET /repositories/{repository_id}/environments/{environment_name}/secrets", "GET /scim/v2/enterprises/{enterprise}/Groups", "GET /scim/v2/enterprises/{enterprise}/Users", "GET /scim/v2/organizations/{org}/Users", "GET /search/code", "GET /search/commits", "GET /search/issues", "GET /search/labels", "GET /search/repositories", "GET /search/topics", "GET /search/users", "GET /teams/{team_id}/discussions", "GET /teams/{team_id}/discussions/{discussion_number}/comments", "GET /teams/{team_id}/discussions/{discussion_number}/comments/{comment_number}/reactions", "GET /teams/{team_id}/discussions/{discussion_number}/reactions", "GET /teams/{team_id}/invitations", "GET /teams/{team_id}/members", "GET /teams/{team_id}/projects", "GET /teams/{team_id}/repos", "GET /teams/{team_id}/team-sync/group-mappings", "GET /teams/{team_id}/teams", "GET /user/blocks", "GET /user/emails", "GET /user/followers", "GET /user/following", "GET /user/gpg_keys", "GET /user/installations", "GET /user/installations/{installation_id}/repositories", "GET /user/issues", "GET /user/keys", "GET /user/marketplace_purchases", "GET /user/marketplace_purchases/stubbed", "GET /user/memberships/orgs", "GET /user/migrations", "GET /user/migrations/{migration_id}/repositories", "GET /user/orgs", "GET /user/public_emails", "GET /user/repos", "GET /user/repository_invitations", "GET /user/starred", "GET /user/subscriptions", "GET /user/teams", "GET /users", "GET /users/{username}/events", "GET /users/{username}/events/orgs/{org}", "GET /users/{username}/events/public", "GET /users/{username}/followers", "GET /users/{username}/following", "GET /users/{username}/gists", "GET /users/{username}/gpg_keys", "GET /users/{username}/keys", "GET /users/{username}/orgs", "GET /users/{username}/projects", "GET /users/{username}/received_events", "GET /users/{username}/received_events/public", "GET /users/{username}/repos", "GET /users/{username}/starred", "GET /users/{username}/subscriptions"]; function isPaginatingEndpoint(arg) { if (typeof arg === "string") { return paginatingEndpoints.includes(arg); } else { return false; } } /** * @param octokit Octokit instance * @param options Options passed to Octokit constructor */ function paginateRest(octokit) { return { paginate: Object.assign(paginate.bind(null, octokit), { iterator: iterator.bind(null, octokit) }) }; } paginateRest.VERSION = VERSION; exports.composePaginateRest = composePaginateRest; exports.isPaginatingEndpoint = isPaginatingEndpoint; exports.paginateRest = paginateRest; exports.paginatingEndpoints = paginatingEndpoints; //# sourceMappingURL=index.js.map /***/ }), /***/ 3044: /***/ ((__unused_webpack_module, exports) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; } function _objectSpread2(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; } const Endpoints = { actions: { addSelectedRepoToOrgSecret: ["PUT /orgs/{org}/actions/secrets/{secret_name}/repositories/{repository_id}"], cancelWorkflowRun: ["POST /repos/{owner}/{repo}/actions/runs/{run_id}/cancel"], createOrUpdateEnvironmentSecret: ["PUT /repositories/{repository_id}/environments/{environment_name}/secrets/{secret_name}"], createOrUpdateOrgSecret: ["PUT /orgs/{org}/actions/secrets/{secret_name}"], createOrUpdateRepoSecret: ["PUT /repos/{owner}/{repo}/actions/secrets/{secret_name}"], createRegistrationTokenForOrg: ["POST /orgs/{org}/actions/runners/registration-token"], createRegistrationTokenForRepo: ["POST /repos/{owner}/{repo}/actions/runners/registration-token"], createRemoveTokenForOrg: ["POST /orgs/{org}/actions/runners/remove-token"], createRemoveTokenForRepo: ["POST /repos/{owner}/{repo}/actions/runners/remove-token"], createWorkflowDispatch: ["POST /repos/{owner}/{repo}/actions/workflows/{workflow_id}/dispatches"], deleteArtifact: ["DELETE /repos/{owner}/{repo}/actions/artifacts/{artifact_id}"], deleteEnvironmentSecret: ["DELETE /repositories/{repository_id}/environments/{environment_name}/secrets/{secret_name}"], deleteOrgSecret: ["DELETE /orgs/{org}/actions/secrets/{secret_name}"], deleteRepoSecret: ["DELETE /repos/{owner}/{repo}/actions/secrets/{secret_name}"], deleteSelfHostedRunnerFromOrg: ["DELETE /orgs/{org}/actions/runners/{runner_id}"], deleteSelfHostedRunnerFromRepo: ["DELETE /repos/{owner}/{repo}/actions/runners/{runner_id}"], deleteWorkflowRun: ["DELETE /repos/{owner}/{repo}/actions/runs/{run_id}"], deleteWorkflowRunLogs: ["DELETE /repos/{owner}/{repo}/actions/runs/{run_id}/logs"], disableSelectedRepositoryGithubActionsOrganization: ["DELETE /orgs/{org}/actions/permissions/repositories/{repository_id}"], disableWorkflow: ["PUT /repos/{owner}/{repo}/actions/workflows/{workflow_id}/disable"], downloadArtifact: ["GET /repos/{owner}/{repo}/actions/artifacts/{artifact_id}/{archive_format}"], downloadJobLogsForWorkflowRun: ["GET /repos/{owner}/{repo}/actions/jobs/{job_id}/logs"], downloadWorkflowRunLogs: ["GET /repos/{owner}/{repo}/actions/runs/{run_id}/logs"], enableSelectedRepositoryGithubActionsOrganization: ["PUT /orgs/{org}/actions/permissions/repositories/{repository_id}"], enableWorkflow: ["PUT /repos/{owner}/{repo}/actions/workflows/{workflow_id}/enable"], getAllowedActionsOrganization: ["GET /orgs/{org}/actions/permissions/selected-actions"], getAllowedActionsRepository: ["GET /repos/{owner}/{repo}/actions/permissions/selected-actions"], getArtifact: ["GET /repos/{owner}/{repo}/actions/artifacts/{artifact_id}"], getEnvironmentPublicKey: ["GET /repositories/{repository_id}/environments/{environment_name}/secrets/public-key"], getEnvironmentSecret: ["GET /repositories/{repository_id}/environments/{environment_name}/secrets/{secret_name}"], getGithubActionsPermissionsOrganization: ["GET /orgs/{org}/actions/permissions"], getGithubActionsPermissionsRepository: ["GET /repos/{owner}/{repo}/actions/permissions"], getJobForWorkflowRun: ["GET /repos/{owner}/{repo}/actions/jobs/{job_id}"], getOrgPublicKey: ["GET /orgs/{org}/actions/secrets/public-key"], getOrgSecret: ["GET /orgs/{org}/actions/secrets/{secret_name}"], getPendingDeploymentsForRun: ["GET /repos/{owner}/{repo}/actions/runs/{run_id}/pending_deployments"], getRepoPermissions: ["GET /repos/{owner}/{repo}/actions/permissions", {}, { renamed: ["actions", "getGithubActionsPermissionsRepository"] }], getRepoPublicKey: ["GET /repos/{owner}/{repo}/actions/secrets/public-key"], getRepoSecret: ["GET /repos/{owner}/{repo}/actions/secrets/{secret_name}"], getReviewsForRun: ["GET /repos/{owner}/{repo}/actions/runs/{run_id}/approvals"], getSelfHostedRunnerForOrg: ["GET /orgs/{org}/actions/runners/{runner_id}"], getSelfHostedRunnerForRepo: ["GET /repos/{owner}/{repo}/actions/runners/{runner_id}"], getWorkflow: ["GET /repos/{owner}/{repo}/actions/workflows/{workflow_id}"], getWorkflowRun: ["GET /repos/{owner}/{repo}/actions/runs/{run_id}"], getWorkflowRunUsage: ["GET /repos/{owner}/{repo}/actions/runs/{run_id}/timing"], getWorkflowUsage: ["GET /repos/{owner}/{repo}/actions/workflows/{workflow_id}/timing"], listArtifactsForRepo: ["GET /repos/{owner}/{repo}/actions/artifacts"], listEnvironmentSecrets: ["GET /repositories/{repository_id}/environments/{environment_name}/secrets"], listJobsForWorkflowRun: ["GET /repos/{owner}/{repo}/actions/runs/{run_id}/jobs"], listOrgSecrets: ["GET /orgs/{org}/actions/secrets"], listRepoSecrets: ["GET /repos/{owner}/{repo}/actions/secrets"], listRepoWorkflows: ["GET /repos/{owner}/{repo}/actions/workflows"], listRunnerApplicationsForOrg: ["GET /orgs/{org}/actions/runners/downloads"], listRunnerApplicationsForRepo: ["GET /repos/{owner}/{repo}/actions/runners/downloads"], listSelectedReposForOrgSecret: ["GET /orgs/{org}/actions/secrets/{secret_name}/repositories"], listSelectedRepositoriesEnabledGithubActionsOrganization: ["GET /orgs/{org}/actions/permissions/repositories"], listSelfHostedRunnersForOrg: ["GET /orgs/{org}/actions/runners"], listSelfHostedRunnersForRepo: ["GET /repos/{owner}/{repo}/actions/runners"], listWorkflowRunArtifacts: ["GET /repos/{owner}/{repo}/actions/runs/{run_id}/artifacts"], listWorkflowRuns: ["GET /repos/{owner}/{repo}/actions/workflows/{workflow_id}/runs"], listWorkflowRunsForRepo: ["GET /repos/{owner}/{repo}/actions/runs"], reRunWorkflow: ["POST /repos/{owner}/{repo}/actions/runs/{run_id}/rerun"], removeSelectedRepoFromOrgSecret: ["DELETE /orgs/{org}/actions/secrets/{secret_name}/repositories/{repository_id}"], reviewPendingDeploymentsForRun: ["POST /repos/{owner}/{repo}/actions/runs/{run_id}/pending_deployments"], setAllowedActionsOrganization: ["PUT /orgs/{org}/actions/permissions/selected-actions"], setAllowedActionsRepository: ["PUT /repos/{owner}/{repo}/actions/permissions/selected-actions"], setGithubActionsPermissionsOrganization: ["PUT /orgs/{org}/actions/permissions"], setGithubActionsPermissionsRepository: ["PUT /repos/{owner}/{repo}/actions/permissions"], setSelectedReposForOrgSecret: ["PUT /orgs/{org}/actions/secrets/{secret_name}/repositories"], setSelectedRepositoriesEnabledGithubActionsOrganization: ["PUT /orgs/{org}/actions/permissions/repositories"] }, activity: { checkRepoIsStarredByAuthenticatedUser: ["GET /user/starred/{owner}/{repo}"], deleteRepoSubscription: ["DELETE /repos/{owner}/{repo}/subscription"], deleteThreadSubscription: ["DELETE /notifications/threads/{thread_id}/subscription"], getFeeds: ["GET /feeds"], getRepoSubscription: ["GET /repos/{owner}/{repo}/subscription"], getThread: ["GET /notifications/threads/{thread_id}"], getThreadSubscriptionForAuthenticatedUser: ["GET /notifications/threads/{thread_id}/subscription"], listEventsForAuthenticatedUser: ["GET /users/{username}/events"], listNotificationsForAuthenticatedUser: ["GET /notifications"], listOrgEventsForAuthenticatedUser: ["GET /users/{username}/events/orgs/{org}"], listPublicEvents: ["GET /events"], listPublicEventsForRepoNetwork: ["GET /networks/{owner}/{repo}/events"], listPublicEventsForUser: ["GET /users/{username}/events/public"], listPublicOrgEvents: ["GET /orgs/{org}/events"], listReceivedEventsForUser: ["GET /users/{username}/received_events"], listReceivedPublicEventsForUser: ["GET /users/{username}/received_events/public"], listRepoEvents: ["GET /repos/{owner}/{repo}/events"], listRepoNotificationsForAuthenticatedUser: ["GET /repos/{owner}/{repo}/notifications"], listReposStarredByAuthenticatedUser: ["GET /user/starred"], listReposStarredByUser: ["GET /users/{username}/starred"], listReposWatchedByUser: ["GET /users/{username}/subscriptions"], listStargazersForRepo: ["GET /repos/{owner}/{repo}/stargazers"], listWatchedReposForAuthenticatedUser: ["GET /user/subscriptions"], listWatchersForRepo: ["GET /repos/{owner}/{repo}/subscribers"], markNotificationsAsRead: ["PUT /notifications"], markRepoNotificationsAsRead: ["PUT /repos/{owner}/{repo}/notifications"], markThreadAsRead: ["PATCH /notifications/threads/{thread_id}"], setRepoSubscription: ["PUT /repos/{owner}/{repo}/subscription"], setThreadSubscription: ["PUT /notifications/threads/{thread_id}/subscription"], starRepoForAuthenticatedUser: ["PUT /user/starred/{owner}/{repo}"], unstarRepoForAuthenticatedUser: ["DELETE /user/starred/{owner}/{repo}"] }, apps: { addRepoToInstallation: ["PUT /user/installations/{installation_id}/repositories/{repository_id}"], checkToken: ["POST /applications/{client_id}/token"], createContentAttachment: ["POST /content_references/{content_reference_id}/attachments", { mediaType: { previews: ["corsair"] } }], createFromManifest: ["POST /app-manifests/{code}/conversions"], createInstallationAccessToken: ["POST /app/installations/{installation_id}/access_tokens"], deleteAuthorization: ["DELETE /applications/{client_id}/grant"], deleteInstallation: ["DELETE /app/installations/{installation_id}"], deleteToken: ["DELETE /applications/{client_id}/token"], getAuthenticated: ["GET /app"], getBySlug: ["GET /apps/{app_slug}"], getInstallation: ["GET /app/installations/{installation_id}"], getOrgInstallation: ["GET /orgs/{org}/installation"], getRepoInstallation: ["GET /repos/{owner}/{repo}/installation"], getSubscriptionPlanForAccount: ["GET /marketplace_listing/accounts/{account_id}"], getSubscriptionPlanForAccountStubbed: ["GET /marketplace_listing/stubbed/accounts/{account_id}"], getUserInstallation: ["GET /users/{username}/installation"], getWebhookConfigForApp: ["GET /app/hook/config"], listAccountsForPlan: ["GET /marketplace_listing/plans/{plan_id}/accounts"], listAccountsForPlanStubbed: ["GET /marketplace_listing/stubbed/plans/{plan_id}/accounts"], listInstallationReposForAuthenticatedUser: ["GET /user/installations/{installation_id}/repositories"], listInstallations: ["GET /app/installations"], listInstallationsForAuthenticatedUser: ["GET /user/installations"], listPlans: ["GET /marketplace_listing/plans"], listPlansStubbed: ["GET /marketplace_listing/stubbed/plans"], listReposAccessibleToInstallation: ["GET /installation/repositories"], listSubscriptionsForAuthenticatedUser: ["GET /user/marketplace_purchases"], listSubscriptionsForAuthenticatedUserStubbed: ["GET /user/marketplace_purchases/stubbed"], removeRepoFromInstallation: ["DELETE /user/installations/{installation_id}/repositories/{repository_id}"], resetToken: ["PATCH /applications/{client_id}/token"], revokeInstallationAccessToken: ["DELETE /installation/token"], scopeToken: ["POST /applications/{client_id}/token/scoped"], suspendInstallation: ["PUT /app/installations/{installation_id}/suspended"], unsuspendInstallation: ["DELETE /app/installations/{installation_id}/suspended"], updateWebhookConfigForApp: ["PATCH /app/hook/config"] }, billing: { getGithubActionsBillingOrg: ["GET /orgs/{org}/settings/billing/actions"], getGithubActionsBillingUser: ["GET /users/{username}/settings/billing/actions"], getGithubPackagesBillingOrg: ["GET /orgs/{org}/settings/billing/packages"], getGithubPackagesBillingUser: ["GET /users/{username}/settings/billing/packages"], getSharedStorageBillingOrg: ["GET /orgs/{org}/settings/billing/shared-storage"], getSharedStorageBillingUser: ["GET /users/{username}/settings/billing/shared-storage"] }, checks: { create: ["POST /repos/{owner}/{repo}/check-runs"], createSuite: ["POST /repos/{owner}/{repo}/check-suites"], get: ["GET /repos/{owner}/{repo}/check-runs/{check_run_id}"], getSuite: ["GET /repos/{owner}/{repo}/check-suites/{check_suite_id}"], listAnnotations: ["GET /repos/{owner}/{repo}/check-runs/{check_run_id}/annotations"], listForRef: ["GET /repos/{owner}/{repo}/commits/{ref}/check-runs"], listForSuite: ["GET /repos/{owner}/{repo}/check-suites/{check_suite_id}/check-runs"], listSuitesForRef: ["GET /repos/{owner}/{repo}/commits/{ref}/check-suites"], rerequestSuite: ["POST /repos/{owner}/{repo}/check-suites/{check_suite_id}/rerequest"], setSuitesPreferences: ["PATCH /repos/{owner}/{repo}/check-suites/preferences"], update: ["PATCH /repos/{owner}/{repo}/check-runs/{check_run_id}"] }, codeScanning: { deleteAnalysis: ["DELETE /repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}{?confirm_delete}"], getAlert: ["GET /repos/{owner}/{repo}/code-scanning/alerts/{alert_number}", {}, { renamedParameters: { alert_id: "alert_number" } }], getAnalysis: ["GET /repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}"], getSarif: ["GET /repos/{owner}/{repo}/code-scanning/sarifs/{sarif_id}"], listAlertsForRepo: ["GET /repos/{owner}/{repo}/code-scanning/alerts"], listAlertsInstances: ["GET /repos/{owner}/{repo}/code-scanning/alerts/{alert_number}/instances"], listRecentAnalyses: ["GET /repos/{owner}/{repo}/code-scanning/analyses"], updateAlert: ["PATCH /repos/{owner}/{repo}/code-scanning/alerts/{alert_number}"], uploadSarif: ["POST /repos/{owner}/{repo}/code-scanning/sarifs"] }, codesOfConduct: { getAllCodesOfConduct: ["GET /codes_of_conduct", { mediaType: { previews: ["scarlet-witch"] } }], getConductCode: ["GET /codes_of_conduct/{key}", { mediaType: { previews: ["scarlet-witch"] } }], getForRepo: ["GET /repos/{owner}/{repo}/community/code_of_conduct", { mediaType: { previews: ["scarlet-witch"] } }] }, emojis: { get: ["GET /emojis"] }, enterpriseAdmin: { disableSelectedOrganizationGithubActionsEnterprise: ["DELETE /enterprises/{enterprise}/actions/permissions/organizations/{org_id}"], enableSelectedOrganizationGithubActionsEnterprise: ["PUT /enterprises/{enterprise}/actions/permissions/organizations/{org_id}"], getAllowedActionsEnterprise: ["GET /enterprises/{enterprise}/actions/permissions/selected-actions"], getGithubActionsPermissionsEnterprise: ["GET /enterprises/{enterprise}/actions/permissions"], listSelectedOrganizationsEnabledGithubActionsEnterprise: ["GET /enterprises/{enterprise}/actions/permissions/organizations"], setAllowedActionsEnterprise: ["PUT /enterprises/{enterprise}/actions/permissions/selected-actions"], setGithubActionsPermissionsEnterprise: ["PUT /enterprises/{enterprise}/actions/permissions"], setSelectedOrganizationsEnabledGithubActionsEnterprise: ["PUT /enterprises/{enterprise}/actions/permissions/organizations"] }, gists: { checkIsStarred: ["GET /gists/{gist_id}/star"], create: ["POST /gists"], createComment: ["POST /gists/{gist_id}/comments"], delete: ["DELETE /gists/{gist_id}"], deleteComment: ["DELETE /gists/{gist_id}/comments/{comment_id}"], fork: ["POST /gists/{gist_id}/forks"], get: ["GET /gists/{gist_id}"], getComment: ["GET /gists/{gist_id}/comments/{comment_id}"], getRevision: ["GET /gists/{gist_id}/{sha}"], list: ["GET /gists"], listComments: ["GET /gists/{gist_id}/comments"], listCommits: ["GET /gists/{gist_id}/commits"], listForUser: ["GET /users/{username}/gists"], listForks: ["GET /gists/{gist_id}/forks"], listPublic: ["GET /gists/public"], listStarred: ["GET /gists/starred"], star: ["PUT /gists/{gist_id}/star"], unstar: ["DELETE /gists/{gist_id}/star"], update: ["PATCH /gists/{gist_id}"], updateComment: ["PATCH /gists/{gist_id}/comments/{comment_id}"] }, git: { createBlob: ["POST /repos/{owner}/{repo}/git/blobs"], createCommit: ["POST /repos/{owner}/{repo}/git/commits"], createRef: ["POST /repos/{owner}/{repo}/git/refs"], createTag: ["POST /repos/{owner}/{repo}/git/tags"], createTree: ["POST /repos/{owner}/{repo}/git/trees"], deleteRef: ["DELETE /repos/{owner}/{repo}/git/refs/{ref}"], getBlob: ["GET /repos/{owner}/{repo}/git/blobs/{file_sha}"], getCommit: ["GET /repos/{owner}/{repo}/git/commits/{commit_sha}"], getRef: ["GET /repos/{owner}/{repo}/git/ref/{ref}"], getTag: ["GET /repos/{owner}/{repo}/git/tags/{tag_sha}"], getTree: ["GET /repos/{owner}/{repo}/git/trees/{tree_sha}"], listMatchingRefs: ["GET /repos/{owner}/{repo}/git/matching-refs/{ref}"], updateRef: ["PATCH /repos/{owner}/{repo}/git/refs/{ref}"] }, gitignore: { getAllTemplates: ["GET /gitignore/templates"], getTemplate: ["GET /gitignore/templates/{name}"] }, interactions: { getRestrictionsForAuthenticatedUser: ["GET /user/interaction-limits"], getRestrictionsForOrg: ["GET /orgs/{org}/interaction-limits"], getRestrictionsForRepo: ["GET /repos/{owner}/{repo}/interaction-limits"], getRestrictionsForYourPublicRepos: ["GET /user/interaction-limits", {}, { renamed: ["interactions", "getRestrictionsForAuthenticatedUser"] }], removeRestrictionsForAuthenticatedUser: ["DELETE /user/interaction-limits"], removeRestrictionsForOrg: ["DELETE /orgs/{org}/interaction-limits"], removeRestrictionsForRepo: ["DELETE /repos/{owner}/{repo}/interaction-limits"], removeRestrictionsForYourPublicRepos: ["DELETE /user/interaction-limits", {}, { renamed: ["interactions", "removeRestrictionsForAuthenticatedUser"] }], setRestrictionsForAuthenticatedUser: ["PUT /user/interaction-limits"], setRestrictionsForOrg: ["PUT /orgs/{org}/interaction-limits"], setRestrictionsForRepo: ["PUT /repos/{owner}/{repo}/interaction-limits"], setRestrictionsForYourPublicRepos: ["PUT /user/interaction-limits", {}, { renamed: ["interactions", "setRestrictionsForAuthenticatedUser"] }] }, issues: { addAssignees: ["POST /repos/{owner}/{repo}/issues/{issue_number}/assignees"], addLabels: ["POST /repos/{owner}/{repo}/issues/{issue_number}/labels"], checkUserCanBeAssigned: ["GET /repos/{owner}/{repo}/assignees/{assignee}"], create: ["POST /repos/{owner}/{repo}/issues"], createComment: ["POST /repos/{owner}/{repo}/issues/{issue_number}/comments"], createLabel: ["POST /repos/{owner}/{repo}/labels"], createMilestone: ["POST /repos/{owner}/{repo}/milestones"], deleteComment: ["DELETE /repos/{owner}/{repo}/issues/comments/{comment_id}"], deleteLabel: ["DELETE /repos/{owner}/{repo}/labels/{name}"], deleteMilestone: ["DELETE /repos/{owner}/{repo}/milestones/{milestone_number}"], get: ["GET /repos/{owner}/{repo}/issues/{issue_number}"], getComment: ["GET /repos/{owner}/{repo}/issues/comments/{comment_id}"], getEvent: ["GET /repos/{owner}/{repo}/issues/events/{event_id}"], getLabel: ["GET /repos/{owner}/{repo}/labels/{name}"], getMilestone: ["GET /repos/{owner}/{repo}/milestones/{milestone_number}"], list: ["GET /issues"], listAssignees: ["GET /repos/{owner}/{repo}/assignees"], listComments: ["GET /repos/{owner}/{repo}/issues/{issue_number}/comments"], listCommentsForRepo: ["GET /repos/{owner}/{repo}/issues/comments"], listEvents: ["GET /repos/{owner}/{repo}/issues/{issue_number}/events"], listEventsForRepo: ["GET /repos/{owner}/{repo}/issues/events"], listEventsForTimeline: ["GET /repos/{owner}/{repo}/issues/{issue_number}/timeline", { mediaType: { previews: ["mockingbird"] } }], listForAuthenticatedUser: ["GET /user/issues"], listForOrg: ["GET /orgs/{org}/issues"], listForRepo: ["GET /repos/{owner}/{repo}/issues"], listLabelsForMilestone: ["GET /repos/{owner}/{repo}/milestones/{milestone_number}/labels"], listLabelsForRepo: ["GET /repos/{owner}/{repo}/labels"], listLabelsOnIssue: ["GET /repos/{owner}/{repo}/issues/{issue_number}/labels"], listMilestones: ["GET /repos/{owner}/{repo}/milestones"], lock: ["PUT /repos/{owner}/{repo}/issues/{issue_number}/lock"], removeAllLabels: ["DELETE /repos/{owner}/{repo}/issues/{issue_number}/labels"], removeAssignees: ["DELETE /repos/{owner}/{repo}/issues/{issue_number}/assignees"], removeLabel: ["DELETE /repos/{owner}/{repo}/issues/{issue_number}/labels/{name}"], setLabels: ["PUT /repos/{owner}/{repo}/issues/{issue_number}/labels"], unlock: ["DELETE /repos/{owner}/{repo}/issues/{issue_number}/lock"], update: ["PATCH /repos/{owner}/{repo}/issues/{issue_number}"], updateComment: ["PATCH /repos/{owner}/{repo}/issues/comments/{comment_id}"], updateLabel: ["PATCH /repos/{owner}/{repo}/labels/{name}"], updateMilestone: ["PATCH /repos/{owner}/{repo}/milestones/{milestone_number}"] }, licenses: { get: ["GET /licenses/{license}"], getAllCommonlyUsed: ["GET /licenses"], getForRepo: ["GET /repos/{owner}/{repo}/license"] }, markdown: { render: ["POST /markdown"], renderRaw: ["POST /markdown/raw", { headers: { "content-type": "text/plain; charset=utf-8" } }] }, meta: { get: ["GET /meta"], getOctocat: ["GET /octocat"], getZen: ["GET /zen"], root: ["GET /"] }, migrations: { cancelImport: ["DELETE /repos/{owner}/{repo}/import"], deleteArchiveForAuthenticatedUser: ["DELETE /user/migrations/{migration_id}/archive", { mediaType: { previews: ["wyandotte"] } }], deleteArchiveForOrg: ["DELETE /orgs/{org}/migrations/{migration_id}/archive", { mediaType: { previews: ["wyandotte"] } }], downloadArchiveForOrg: ["GET /orgs/{org}/migrations/{migration_id}/archive", { mediaType: { previews: ["wyandotte"] } }], getArchiveForAuthenticatedUser: ["GET /user/migrations/{migration_id}/archive", { mediaType: { previews: ["wyandotte"] } }], getCommitAuthors: ["GET /repos/{owner}/{repo}/import/authors"], getImportStatus: ["GET /repos/{owner}/{repo}/import"], getLargeFiles: ["GET /repos/{owner}/{repo}/import/large_files"], getStatusForAuthenticatedUser: ["GET /user/migrations/{migration_id}", { mediaType: { previews: ["wyandotte"] } }], getStatusForOrg: ["GET /orgs/{org}/migrations/{migration_id}", { mediaType: { previews: ["wyandotte"] } }], listForAuthenticatedUser: ["GET /user/migrations", { mediaType: { previews: ["wyandotte"] } }], listForOrg: ["GET /orgs/{org}/migrations", { mediaType: { previews: ["wyandotte"] } }], listReposForOrg: ["GET /orgs/{org}/migrations/{migration_id}/repositories", { mediaType: { previews: ["wyandotte"] } }], listReposForUser: ["GET /user/migrations/{migration_id}/repositories", { mediaType: { previews: ["wyandotte"] } }], mapCommitAuthor: ["PATCH /repos/{owner}/{repo}/import/authors/{author_id}"], setLfsPreference: ["PATCH /repos/{owner}/{repo}/import/lfs"], startForAuthenticatedUser: ["POST /user/migrations"], startForOrg: ["POST /orgs/{org}/migrations"], startImport: ["PUT /repos/{owner}/{repo}/import"], unlockRepoForAuthenticatedUser: ["DELETE /user/migrations/{migration_id}/repos/{repo_name}/lock", { mediaType: { previews: ["wyandotte"] } }], unlockRepoForOrg: ["DELETE /orgs/{org}/migrations/{migration_id}/repos/{repo_name}/lock", { mediaType: { previews: ["wyandotte"] } }], updateImport: ["PATCH /repos/{owner}/{repo}/import"] }, orgs: { blockUser: ["PUT /orgs/{org}/blocks/{username}"], cancelInvitation: ["DELETE /orgs/{org}/invitations/{invitation_id}"], checkBlockedUser: ["GET /orgs/{org}/blocks/{username}"], checkMembershipForUser: ["GET /orgs/{org}/members/{username}"], checkPublicMembershipForUser: ["GET /orgs/{org}/public_members/{username}"], convertMemberToOutsideCollaborator: ["PUT /orgs/{org}/outside_collaborators/{username}"], createInvitation: ["POST /orgs/{org}/invitations"], createWebhook: ["POST /orgs/{org}/hooks"], deleteWebhook: ["DELETE /orgs/{org}/hooks/{hook_id}"], get: ["GET /orgs/{org}"], getMembershipForAuthenticatedUser: ["GET /user/memberships/orgs/{org}"], getMembershipForUser: ["GET /orgs/{org}/memberships/{username}"], getWebhook: ["GET /orgs/{org}/hooks/{hook_id}"], getWebhookConfigForOrg: ["GET /orgs/{org}/hooks/{hook_id}/config"], list: ["GET /organizations"], listAppInstallations: ["GET /orgs/{org}/installations"], listBlockedUsers: ["GET /orgs/{org}/blocks"], listFailedInvitations: ["GET /orgs/{org}/failed_invitations"], listForAuthenticatedUser: ["GET /user/orgs"], listForUser: ["GET /users/{username}/orgs"], listInvitationTeams: ["GET /orgs/{org}/invitations/{invitation_id}/teams"], listMembers: ["GET /orgs/{org}/members"], listMembershipsForAuthenticatedUser: ["GET /user/memberships/orgs"], listOutsideCollaborators: ["GET /orgs/{org}/outside_collaborators"], listPendingInvitations: ["GET /orgs/{org}/invitations"], listPublicMembers: ["GET /orgs/{org}/public_members"], listWebhooks: ["GET /orgs/{org}/hooks"], pingWebhook: ["POST /orgs/{org}/hooks/{hook_id}/pings"], removeMember: ["DELETE /orgs/{org}/members/{username}"], removeMembershipForUser: ["DELETE /orgs/{org}/memberships/{username}"], removeOutsideCollaborator: ["DELETE /orgs/{org}/outside_collaborators/{username}"], removePublicMembershipForAuthenticatedUser: ["DELETE /orgs/{org}/public_members/{username}"], setMembershipForUser: ["PUT /orgs/{org}/memberships/{username}"], setPublicMembershipForAuthenticatedUser: ["PUT /orgs/{org}/public_members/{username}"], unblockUser: ["DELETE /orgs/{org}/blocks/{username}"], update: ["PATCH /orgs/{org}"], updateMembershipForAuthenticatedUser: ["PATCH /user/memberships/orgs/{org}"], updateWebhook: ["PATCH /orgs/{org}/hooks/{hook_id}"], updateWebhookConfigForOrg: ["PATCH /orgs/{org}/hooks/{hook_id}/config"] }, packages: { deletePackageForAuthenticatedUser: ["DELETE /user/packages/{package_type}/{package_name}"], deletePackageForOrg: ["DELETE /orgs/{org}/packages/{package_type}/{package_name}"], deletePackageVersionForAuthenticatedUser: ["DELETE /user/packages/{package_type}/{package_name}/versions/{package_version_id}"], deletePackageVersionForOrg: ["DELETE /orgs/{org}/packages/{package_type}/{package_name}/versions/{package_version_id}"], getAllPackageVersionsForAPackageOwnedByAnOrg: ["GET /orgs/{org}/packages/{package_type}/{package_name}/versions", {}, { renamed: ["packages", "getAllPackageVersionsForPackageOwnedByOrg"] }], getAllPackageVersionsForAPackageOwnedByTheAuthenticatedUser: ["GET /user/packages/{package_type}/{package_name}/versions", {}, { renamed: ["packages", "getAllPackageVersionsForPackageOwnedByAuthenticatedUser"] }], getAllPackageVersionsForPackageOwnedByAuthenticatedUser: ["GET /user/packages/{package_type}/{package_name}/versions"], getAllPackageVersionsForPackageOwnedByOrg: ["GET /orgs/{org}/packages/{package_type}/{package_name}/versions"], getAllPackageVersionsForPackageOwnedByUser: ["GET /users/{username}/packages/{package_type}/{package_name}/versions"], getPackageForAuthenticatedUser: ["GET /user/packages/{package_type}/{package_name}"], getPackageForOrganization: ["GET /orgs/{org}/packages/{package_type}/{package_name}"], getPackageForUser: ["GET /users/{username}/packages/{package_type}/{package_name}"], getPackageVersionForAuthenticatedUser: ["GET /user/packages/{package_type}/{package_name}/versions/{package_version_id}"], getPackageVersionForOrganization: ["GET /orgs/{org}/packages/{package_type}/{package_name}/versions/{package_version_id}"], getPackageVersionForUser: ["GET /users/{username}/packages/{package_type}/{package_name}/versions/{package_version_id}"], restorePackageForAuthenticatedUser: ["POST /user/packages/{package_type}/{package_name}/restore{?token}"], restorePackageForOrg: ["POST /orgs/{org}/packages/{package_type}/{package_name}/restore{?token}"], restorePackageVersionForAuthenticatedUser: ["POST /user/packages/{package_type}/{package_name}/versions/{package_version_id}/restore"], restorePackageVersionForOrg: ["POST /orgs/{org}/packages/{package_type}/{package_name}/versions/{package_version_id}/restore"] }, projects: { addCollaborator: ["PUT /projects/{project_id}/collaborators/{username}", { mediaType: { previews: ["inertia"] } }], createCard: ["POST /projects/columns/{column_id}/cards", { mediaType: { previews: ["inertia"] } }], createColumn: ["POST /projects/{project_id}/columns", { mediaType: { previews: ["inertia"] } }], createForAuthenticatedUser: ["POST /user/projects", { mediaType: { previews: ["inertia"] } }], createForOrg: ["POST /orgs/{org}/projects", { mediaType: { previews: ["inertia"] } }], createForRepo: ["POST /repos/{owner}/{repo}/projects", { mediaType: { previews: ["inertia"] } }], delete: ["DELETE /projects/{project_id}", { mediaType: { previews: ["inertia"] } }], deleteCard: ["DELETE /projects/columns/cards/{card_id}", { mediaType: { previews: ["inertia"] } }], deleteColumn: ["DELETE /projects/columns/{column_id}", { mediaType: { previews: ["inertia"] } }], get: ["GET /projects/{project_id}", { mediaType: { previews: ["inertia"] } }], getCard: ["GET /projects/columns/cards/{card_id}", { mediaType: { previews: ["inertia"] } }], getColumn: ["GET /projects/columns/{column_id}", { mediaType: { previews: ["inertia"] } }], getPermissionForUser: ["GET /projects/{project_id}/collaborators/{username}/permission", { mediaType: { previews: ["inertia"] } }], listCards: ["GET /projects/columns/{column_id}/cards", { mediaType: { previews: ["inertia"] } }], listCollaborators: ["GET /projects/{project_id}/collaborators", { mediaType: { previews: ["inertia"] } }], listColumns: ["GET /projects/{project_id}/columns", { mediaType: { previews: ["inertia"] } }], listForOrg: ["GET /orgs/{org}/projects", { mediaType: { previews: ["inertia"] } }], listForRepo: ["GET /repos/{owner}/{repo}/projects", { mediaType: { previews: ["inertia"] } }], listForUser: ["GET /users/{username}/projects", { mediaType: { previews: ["inertia"] } }], moveCard: ["POST /projects/columns/cards/{card_id}/moves", { mediaType: { previews: ["inertia"] } }], moveColumn: ["POST /projects/columns/{column_id}/moves", { mediaType: { previews: ["inertia"] } }], removeCollaborator: ["DELETE /projects/{project_id}/collaborators/{username}", { mediaType: { previews: ["inertia"] } }], update: ["PATCH /projects/{project_id}", { mediaType: { previews: ["inertia"] } }], updateCard: ["PATCH /projects/columns/cards/{card_id}", { mediaType: { previews: ["inertia"] } }], updateColumn: ["PATCH /projects/columns/{column_id}", { mediaType: { previews: ["inertia"] } }] }, pulls: { checkIfMerged: ["GET /repos/{owner}/{repo}/pulls/{pull_number}/merge"], create: ["POST /repos/{owner}/{repo}/pulls"], createReplyForReviewComment: ["POST /repos/{owner}/{repo}/pulls/{pull_number}/comments/{comment_id}/replies"], createReview: ["POST /repos/{owner}/{repo}/pulls/{pull_number}/reviews"], createReviewComment: ["POST /repos/{owner}/{repo}/pulls/{pull_number}/comments"], deletePendingReview: ["DELETE /repos/{owner}/{repo}/pulls/{pull_number}/reviews/{review_id}"], deleteReviewComment: ["DELETE /repos/{owner}/{repo}/pulls/comments/{comment_id}"], dismissReview: ["PUT /repos/{owner}/{repo}/pulls/{pull_number}/reviews/{review_id}/dismissals"], get: ["GET /repos/{owner}/{repo}/pulls/{pull_number}"], getReview: ["GET /repos/{owner}/{repo}/pulls/{pull_number}/reviews/{review_id}"], getReviewComment: ["GET /repos/{owner}/{repo}/pulls/comments/{comment_id}"], list: ["GET /repos/{owner}/{repo}/pulls"], listCommentsForReview: ["GET /repos/{owner}/{repo}/pulls/{pull_number}/reviews/{review_id}/comments"], listCommits: ["GET /repos/{owner}/{repo}/pulls/{pull_number}/commits"], listFiles: ["GET /repos/{owner}/{repo}/pulls/{pull_number}/files"], listRequestedReviewers: ["GET /repos/{owner}/{repo}/pulls/{pull_number}/requested_reviewers"], listReviewComments: ["GET /repos/{owner}/{repo}/pulls/{pull_number}/comments"], listReviewCommentsForRepo: ["GET /repos/{owner}/{repo}/pulls/comments"], listReviews: ["GET /repos/{owner}/{repo}/pulls/{pull_number}/reviews"], merge: ["PUT /repos/{owner}/{repo}/pulls/{pull_number}/merge"], removeRequestedReviewers: ["DELETE /repos/{owner}/{repo}/pulls/{pull_number}/requested_reviewers"], requestReviewers: ["POST /repos/{owner}/{repo}/pulls/{pull_number}/requested_reviewers"], submitReview: ["POST /repos/{owner}/{repo}/pulls/{pull_number}/reviews/{review_id}/events"], update: ["PATCH /repos/{owner}/{repo}/pulls/{pull_number}"], updateBranch: ["PUT /repos/{owner}/{repo}/pulls/{pull_number}/update-branch", { mediaType: { previews: ["lydian"] } }], updateReview: ["PUT /repos/{owner}/{repo}/pulls/{pull_number}/reviews/{review_id}"], updateReviewComment: ["PATCH /repos/{owner}/{repo}/pulls/comments/{comment_id}"] }, rateLimit: { get: ["GET /rate_limit"] }, reactions: { createForCommitComment: ["POST /repos/{owner}/{repo}/comments/{comment_id}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], createForIssue: ["POST /repos/{owner}/{repo}/issues/{issue_number}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], createForIssueComment: ["POST /repos/{owner}/{repo}/issues/comments/{comment_id}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], createForPullRequestReviewComment: ["POST /repos/{owner}/{repo}/pulls/comments/{comment_id}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], createForTeamDiscussionCommentInOrg: ["POST /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments/{comment_number}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], createForTeamDiscussionInOrg: ["POST /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], deleteForCommitComment: ["DELETE /repos/{owner}/{repo}/comments/{comment_id}/reactions/{reaction_id}", { mediaType: { previews: ["squirrel-girl"] } }], deleteForIssue: ["DELETE /repos/{owner}/{repo}/issues/{issue_number}/reactions/{reaction_id}", { mediaType: { previews: ["squirrel-girl"] } }], deleteForIssueComment: ["DELETE /repos/{owner}/{repo}/issues/comments/{comment_id}/reactions/{reaction_id}", { mediaType: { previews: ["squirrel-girl"] } }], deleteForPullRequestComment: ["DELETE /repos/{owner}/{repo}/pulls/comments/{comment_id}/reactions/{reaction_id}", { mediaType: { previews: ["squirrel-girl"] } }], deleteForTeamDiscussion: ["DELETE /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/reactions/{reaction_id}", { mediaType: { previews: ["squirrel-girl"] } }], deleteForTeamDiscussionComment: ["DELETE /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments/{comment_number}/reactions/{reaction_id}", { mediaType: { previews: ["squirrel-girl"] } }], deleteLegacy: ["DELETE /reactions/{reaction_id}", { mediaType: { previews: ["squirrel-girl"] } }, { deprecated: "octokit.reactions.deleteLegacy() is deprecated, see https://docs.github.com/rest/reference/reactions/#delete-a-reaction-legacy" }], listForCommitComment: ["GET /repos/{owner}/{repo}/comments/{comment_id}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], listForIssue: ["GET /repos/{owner}/{repo}/issues/{issue_number}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], listForIssueComment: ["GET /repos/{owner}/{repo}/issues/comments/{comment_id}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], listForPullRequestReviewComment: ["GET /repos/{owner}/{repo}/pulls/comments/{comment_id}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], listForTeamDiscussionCommentInOrg: ["GET /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments/{comment_number}/reactions", { mediaType: { previews: ["squirrel-girl"] } }], listForTeamDiscussionInOrg: ["GET /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/reactions", { mediaType: { previews: ["squirrel-girl"] } }] }, repos: { acceptInvitation: ["PATCH /user/repository_invitations/{invitation_id}"], addAppAccessRestrictions: ["POST /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/apps", {}, { mapToData: "apps" }], addCollaborator: ["PUT /repos/{owner}/{repo}/collaborators/{username}"], addStatusCheckContexts: ["POST /repos/{owner}/{repo}/branches/{branch}/protection/required_status_checks/contexts", {}, { mapToData: "contexts" }], addTeamAccessRestrictions: ["POST /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/teams", {}, { mapToData: "teams" }], addUserAccessRestrictions: ["POST /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/users", {}, { mapToData: "users" }], checkCollaborator: ["GET /repos/{owner}/{repo}/collaborators/{username}"], checkVulnerabilityAlerts: ["GET /repos/{owner}/{repo}/vulnerability-alerts", { mediaType: { previews: ["dorian"] } }], compareCommits: ["GET /repos/{owner}/{repo}/compare/{base}...{head}"], createCommitComment: ["POST /repos/{owner}/{repo}/commits/{commit_sha}/comments"], createCommitSignatureProtection: ["POST /repos/{owner}/{repo}/branches/{branch}/protection/required_signatures", { mediaType: { previews: ["zzzax"] } }], createCommitStatus: ["POST /repos/{owner}/{repo}/statuses/{sha}"], createDeployKey: ["POST /repos/{owner}/{repo}/keys"], createDeployment: ["POST /repos/{owner}/{repo}/deployments"], createDeploymentStatus: ["POST /repos/{owner}/{repo}/deployments/{deployment_id}/statuses"], createDispatchEvent: ["POST /repos/{owner}/{repo}/dispatches"], createForAuthenticatedUser: ["POST /user/repos"], createFork: ["POST /repos/{owner}/{repo}/forks{?org,organization}"], createInOrg: ["POST /orgs/{org}/repos"], createOrUpdateEnvironment: ["PUT /repos/{owner}/{repo}/environments/{environment_name}"], createOrUpdateFileContents: ["PUT /repos/{owner}/{repo}/contents/{path}"], createPagesSite: ["POST /repos/{owner}/{repo}/pages", { mediaType: { previews: ["switcheroo"] } }], createRelease: ["POST /repos/{owner}/{repo}/releases"], createUsingTemplate: ["POST /repos/{template_owner}/{template_repo}/generate", { mediaType: { previews: ["baptiste"] } }], createWebhook: ["POST /repos/{owner}/{repo}/hooks"], declineInvitation: ["DELETE /user/repository_invitations/{invitation_id}"], delete: ["DELETE /repos/{owner}/{repo}"], deleteAccessRestrictions: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection/restrictions"], deleteAdminBranchProtection: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection/enforce_admins"], deleteAnEnvironment: ["DELETE /repos/{owner}/{repo}/environments/{environment_name}"], deleteBranchProtection: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection"], deleteCommitComment: ["DELETE /repos/{owner}/{repo}/comments/{comment_id}"], deleteCommitSignatureProtection: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection/required_signatures", { mediaType: { previews: ["zzzax"] } }], deleteDeployKey: ["DELETE /repos/{owner}/{repo}/keys/{key_id}"], deleteDeployment: ["DELETE /repos/{owner}/{repo}/deployments/{deployment_id}"], deleteFile: ["DELETE /repos/{owner}/{repo}/contents/{path}"], deleteInvitation: ["DELETE /repos/{owner}/{repo}/invitations/{invitation_id}"], deletePagesSite: ["DELETE /repos/{owner}/{repo}/pages", { mediaType: { previews: ["switcheroo"] } }], deletePullRequestReviewProtection: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection/required_pull_request_reviews"], deleteRelease: ["DELETE /repos/{owner}/{repo}/releases/{release_id}"], deleteReleaseAsset: ["DELETE /repos/{owner}/{repo}/releases/assets/{asset_id}"], deleteWebhook: ["DELETE /repos/{owner}/{repo}/hooks/{hook_id}"], disableAutomatedSecurityFixes: ["DELETE /repos/{owner}/{repo}/automated-security-fixes", { mediaType: { previews: ["london"] } }], disableVulnerabilityAlerts: ["DELETE /repos/{owner}/{repo}/vulnerability-alerts", { mediaType: { previews: ["dorian"] } }], downloadArchive: ["GET /repos/{owner}/{repo}/zipball/{ref}", {}, { renamed: ["repos", "downloadZipballArchive"] }], downloadTarballArchive: ["GET /repos/{owner}/{repo}/tarball/{ref}"], downloadZipballArchive: ["GET /repos/{owner}/{repo}/zipball/{ref}"], enableAutomatedSecurityFixes: ["PUT /repos/{owner}/{repo}/automated-security-fixes", { mediaType: { previews: ["london"] } }], enableVulnerabilityAlerts: ["PUT /repos/{owner}/{repo}/vulnerability-alerts", { mediaType: { previews: ["dorian"] } }], get: ["GET /repos/{owner}/{repo}"], getAccessRestrictions: ["GET /repos/{owner}/{repo}/branches/{branch}/protection/restrictions"], getAdminBranchProtection: ["GET /repos/{owner}/{repo}/branches/{branch}/protection/enforce_admins"], getAllEnvironments: ["GET /repos/{owner}/{repo}/environments"], getAllStatusCheckContexts: ["GET /repos/{owner}/{repo}/branches/{branch}/protection/required_status_checks/contexts"], getAllTopics: ["GET /repos/{owner}/{repo}/topics", { mediaType: { previews: ["mercy"] } }], getAppsWithAccessToProtectedBranch: ["GET /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/apps"], getBranch: ["GET /repos/{owner}/{repo}/branches/{branch}"], getBranchProtection: ["GET /repos/{owner}/{repo}/branches/{branch}/protection"], getClones: ["GET /repos/{owner}/{repo}/traffic/clones"], getCodeFrequencyStats: ["GET /repos/{owner}/{repo}/stats/code_frequency"], getCollaboratorPermissionLevel: ["GET /repos/{owner}/{repo}/collaborators/{username}/permission"], getCombinedStatusForRef: ["GET /repos/{owner}/{repo}/commits/{ref}/status"], getCommit: ["GET /repos/{owner}/{repo}/commits/{ref}"], getCommitActivityStats: ["GET /repos/{owner}/{repo}/stats/commit_activity"], getCommitComment: ["GET /repos/{owner}/{repo}/comments/{comment_id}"], getCommitSignatureProtection: ["GET /repos/{owner}/{repo}/branches/{branch}/protection/required_signatures", { mediaType: { previews: ["zzzax"] } }], getCommunityProfileMetrics: ["GET /repos/{owner}/{repo}/community/profile"], getContent: ["GET /repos/{owner}/{repo}/contents/{path}"], getContributorsStats: ["GET /repos/{owner}/{repo}/stats/contributors"], getDeployKey: ["GET /repos/{owner}/{repo}/keys/{key_id}"], getDeployment: ["GET /repos/{owner}/{repo}/deployments/{deployment_id}"], getDeploymentStatus: ["GET /repos/{owner}/{repo}/deployments/{deployment_id}/statuses/{status_id}"], getEnvironment: ["GET /repos/{owner}/{repo}/environments/{environment_name}"], getLatestPagesBuild: ["GET /repos/{owner}/{repo}/pages/builds/latest"], getLatestRelease: ["GET /repos/{owner}/{repo}/releases/latest"], getPages: ["GET /repos/{owner}/{repo}/pages"], getPagesBuild: ["GET /repos/{owner}/{repo}/pages/builds/{build_id}"], getParticipationStats: ["GET /repos/{owner}/{repo}/stats/participation"], getPullRequestReviewProtection: ["GET /repos/{owner}/{repo}/branches/{branch}/protection/required_pull_request_reviews"], getPunchCardStats: ["GET /repos/{owner}/{repo}/stats/punch_card"], getReadme: ["GET /repos/{owner}/{repo}/readme"], getReadmeInDirectory: ["GET /repos/{owner}/{repo}/readme/{dir}"], getRelease: ["GET /repos/{owner}/{repo}/releases/{release_id}"], getReleaseAsset: ["GET /repos/{owner}/{repo}/releases/assets/{asset_id}"], getReleaseByTag: ["GET /repos/{owner}/{repo}/releases/tags/{tag}"], getStatusChecksProtection: ["GET /repos/{owner}/{repo}/branches/{branch}/protection/required_status_checks"], getTeamsWithAccessToProtectedBranch: ["GET /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/teams"], getTopPaths: ["GET /repos/{owner}/{repo}/traffic/popular/paths"], getTopReferrers: ["GET /repos/{owner}/{repo}/traffic/popular/referrers"], getUsersWithAccessToProtectedBranch: ["GET /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/users"], getViews: ["GET /repos/{owner}/{repo}/traffic/views"], getWebhook: ["GET /repos/{owner}/{repo}/hooks/{hook_id}"], getWebhookConfigForRepo: ["GET /repos/{owner}/{repo}/hooks/{hook_id}/config"], listBranches: ["GET /repos/{owner}/{repo}/branches"], listBranchesForHeadCommit: ["GET /repos/{owner}/{repo}/commits/{commit_sha}/branches-where-head", { mediaType: { previews: ["groot"] } }], listCollaborators: ["GET /repos/{owner}/{repo}/collaborators"], listCommentsForCommit: ["GET /repos/{owner}/{repo}/commits/{commit_sha}/comments"], listCommitCommentsForRepo: ["GET /repos/{owner}/{repo}/comments"], listCommitStatusesForRef: ["GET /repos/{owner}/{repo}/commits/{ref}/statuses"], listCommits: ["GET /repos/{owner}/{repo}/commits"], listContributors: ["GET /repos/{owner}/{repo}/contributors"], listDeployKeys: ["GET /repos/{owner}/{repo}/keys"], listDeploymentStatuses: ["GET /repos/{owner}/{repo}/deployments/{deployment_id}/statuses"], listDeployments: ["GET /repos/{owner}/{repo}/deployments"], listForAuthenticatedUser: ["GET /user/repos"], listForOrg: ["GET /orgs/{org}/repos"], listForUser: ["GET /users/{username}/repos"], listForks: ["GET /repos/{owner}/{repo}/forks"], listInvitations: ["GET /repos/{owner}/{repo}/invitations"], listInvitationsForAuthenticatedUser: ["GET /user/repository_invitations"], listLanguages: ["GET /repos/{owner}/{repo}/languages"], listPagesBuilds: ["GET /repos/{owner}/{repo}/pages/builds"], listPublic: ["GET /repositories"], listPullRequestsAssociatedWithCommit: ["GET /repos/{owner}/{repo}/commits/{commit_sha}/pulls", { mediaType: { previews: ["groot"] } }], listReleaseAssets: ["GET /repos/{owner}/{repo}/releases/{release_id}/assets"], listReleases: ["GET /repos/{owner}/{repo}/releases"], listTags: ["GET /repos/{owner}/{repo}/tags"], listTeams: ["GET /repos/{owner}/{repo}/teams"], listWebhooks: ["GET /repos/{owner}/{repo}/hooks"], merge: ["POST /repos/{owner}/{repo}/merges"], pingWebhook: ["POST /repos/{owner}/{repo}/hooks/{hook_id}/pings"], removeAppAccessRestrictions: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/apps", {}, { mapToData: "apps" }], removeCollaborator: ["DELETE /repos/{owner}/{repo}/collaborators/{username}"], removeStatusCheckContexts: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection/required_status_checks/contexts", {}, { mapToData: "contexts" }], removeStatusCheckProtection: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection/required_status_checks"], removeTeamAccessRestrictions: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/teams", {}, { mapToData: "teams" }], removeUserAccessRestrictions: ["DELETE /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/users", {}, { mapToData: "users" }], renameBranch: ["POST /repos/{owner}/{repo}/branches/{branch}/rename"], replaceAllTopics: ["PUT /repos/{owner}/{repo}/topics", { mediaType: { previews: ["mercy"] } }], requestPagesBuild: ["POST /repos/{owner}/{repo}/pages/builds"], setAdminBranchProtection: ["POST /repos/{owner}/{repo}/branches/{branch}/protection/enforce_admins"], setAppAccessRestrictions: ["PUT /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/apps", {}, { mapToData: "apps" }], setStatusCheckContexts: ["PUT /repos/{owner}/{repo}/branches/{branch}/protection/required_status_checks/contexts", {}, { mapToData: "contexts" }], setTeamAccessRestrictions: ["PUT /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/teams", {}, { mapToData: "teams" }], setUserAccessRestrictions: ["PUT /repos/{owner}/{repo}/branches/{branch}/protection/restrictions/users", {}, { mapToData: "users" }], testPushWebhook: ["POST /repos/{owner}/{repo}/hooks/{hook_id}/tests"], transfer: ["POST /repos/{owner}/{repo}/transfer"], update: ["PATCH /repos/{owner}/{repo}"], updateBranchProtection: ["PUT /repos/{owner}/{repo}/branches/{branch}/protection"], updateCommitComment: ["PATCH /repos/{owner}/{repo}/comments/{comment_id}"], updateInformationAboutPagesSite: ["PUT /repos/{owner}/{repo}/pages"], updateInvitation: ["PATCH /repos/{owner}/{repo}/invitations/{invitation_id}"], updatePullRequestReviewProtection: ["PATCH /repos/{owner}/{repo}/branches/{branch}/protection/required_pull_request_reviews"], updateRelease: ["PATCH /repos/{owner}/{repo}/releases/{release_id}"], updateReleaseAsset: ["PATCH /repos/{owner}/{repo}/releases/assets/{asset_id}"], updateStatusCheckPotection: ["PATCH /repos/{owner}/{repo}/branches/{branch}/protection/required_status_checks", {}, { renamed: ["repos", "updateStatusCheckProtection"] }], updateStatusCheckProtection: ["PATCH /repos/{owner}/{repo}/branches/{branch}/protection/required_status_checks"], updateWebhook: ["PATCH /repos/{owner}/{repo}/hooks/{hook_id}"], updateWebhookConfigForRepo: ["PATCH /repos/{owner}/{repo}/hooks/{hook_id}/config"], uploadReleaseAsset: ["POST /repos/{owner}/{repo}/releases/{release_id}/assets{?name,label}", { baseUrl: "https://uploads.github.com" }] }, search: { code: ["GET /search/code"], commits: ["GET /search/commits", { mediaType: { previews: ["cloak"] } }], issuesAndPullRequests: ["GET /search/issues"], labels: ["GET /search/labels"], repos: ["GET /search/repositories"], topics: ["GET /search/topics", { mediaType: { previews: ["mercy"] } }], users: ["GET /search/users"] }, secretScanning: { getAlert: ["GET /repos/{owner}/{repo}/secret-scanning/alerts/{alert_number}"], listAlertsForRepo: ["GET /repos/{owner}/{repo}/secret-scanning/alerts"], updateAlert: ["PATCH /repos/{owner}/{repo}/secret-scanning/alerts/{alert_number}"] }, teams: { addOrUpdateMembershipForUserInOrg: ["PUT /orgs/{org}/teams/{team_slug}/memberships/{username}"], addOrUpdateProjectPermissionsInOrg: ["PUT /orgs/{org}/teams/{team_slug}/projects/{project_id}", { mediaType: { previews: ["inertia"] } }], addOrUpdateRepoPermissionsInOrg: ["PUT /orgs/{org}/teams/{team_slug}/repos/{owner}/{repo}"], checkPermissionsForProjectInOrg: ["GET /orgs/{org}/teams/{team_slug}/projects/{project_id}", { mediaType: { previews: ["inertia"] } }], checkPermissionsForRepoInOrg: ["GET /orgs/{org}/teams/{team_slug}/repos/{owner}/{repo}"], create: ["POST /orgs/{org}/teams"], createDiscussionCommentInOrg: ["POST /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments"], createDiscussionInOrg: ["POST /orgs/{org}/teams/{team_slug}/discussions"], deleteDiscussionCommentInOrg: ["DELETE /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments/{comment_number}"], deleteDiscussionInOrg: ["DELETE /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}"], deleteInOrg: ["DELETE /orgs/{org}/teams/{team_slug}"], getByName: ["GET /orgs/{org}/teams/{team_slug}"], getDiscussionCommentInOrg: ["GET /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments/{comment_number}"], getDiscussionInOrg: ["GET /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}"], getMembershipForUserInOrg: ["GET /orgs/{org}/teams/{team_slug}/memberships/{username}"], list: ["GET /orgs/{org}/teams"], listChildInOrg: ["GET /orgs/{org}/teams/{team_slug}/teams"], listDiscussionCommentsInOrg: ["GET /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments"], listDiscussionsInOrg: ["GET /orgs/{org}/teams/{team_slug}/discussions"], listForAuthenticatedUser: ["GET /user/teams"], listMembersInOrg: ["GET /orgs/{org}/teams/{team_slug}/members"], listPendingInvitationsInOrg: ["GET /orgs/{org}/teams/{team_slug}/invitations"], listProjectsInOrg: ["GET /orgs/{org}/teams/{team_slug}/projects", { mediaType: { previews: ["inertia"] } }], listReposInOrg: ["GET /orgs/{org}/teams/{team_slug}/repos"], removeMembershipForUserInOrg: ["DELETE /orgs/{org}/teams/{team_slug}/memberships/{username}"], removeProjectInOrg: ["DELETE /orgs/{org}/teams/{team_slug}/projects/{project_id}"], removeRepoInOrg: ["DELETE /orgs/{org}/teams/{team_slug}/repos/{owner}/{repo}"], updateDiscussionCommentInOrg: ["PATCH /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments/{comment_number}"], updateDiscussionInOrg: ["PATCH /orgs/{org}/teams/{team_slug}/discussions/{discussion_number}"], updateInOrg: ["PATCH /orgs/{org}/teams/{team_slug}"] }, users: { addEmailForAuthenticated: ["POST /user/emails"], block: ["PUT /user/blocks/{username}"], checkBlocked: ["GET /user/blocks/{username}"], checkFollowingForUser: ["GET /users/{username}/following/{target_user}"], checkPersonIsFollowedByAuthenticated: ["GET /user/following/{username}"], createGpgKeyForAuthenticated: ["POST /user/gpg_keys"], createPublicSshKeyForAuthenticated: ["POST /user/keys"], deleteEmailForAuthenticated: ["DELETE /user/emails"], deleteGpgKeyForAuthenticated: ["DELETE /user/gpg_keys/{gpg_key_id}"], deletePublicSshKeyForAuthenticated: ["DELETE /user/keys/{key_id}"], follow: ["PUT /user/following/{username}"], getAuthenticated: ["GET /user"], getByUsername: ["GET /users/{username}"], getContextForUser: ["GET /users/{username}/hovercard"], getGpgKeyForAuthenticated: ["GET /user/gpg_keys/{gpg_key_id}"], getPublicSshKeyForAuthenticated: ["GET /user/keys/{key_id}"], list: ["GET /users"], listBlockedByAuthenticated: ["GET /user/blocks"], listEmailsForAuthenticated: ["GET /user/emails"], listFollowedByAuthenticated: ["GET /user/following"], listFollowersForAuthenticatedUser: ["GET /user/followers"], listFollowersForUser: ["GET /users/{username}/followers"], listFollowingForUser: ["GET /users/{username}/following"], listGpgKeysForAuthenticated: ["GET /user/gpg_keys"], listGpgKeysForUser: ["GET /users/{username}/gpg_keys"], listPublicEmailsForAuthenticated: ["GET /user/public_emails"], listPublicKeysForUser: ["GET /users/{username}/keys"], listPublicSshKeysForAuthenticated: ["GET /user/keys"], setPrimaryEmailVisibilityForAuthenticated: ["PATCH /user/email/visibility"], unblock: ["DELETE /user/blocks/{username}"], unfollow: ["DELETE /user/following/{username}"], updateAuthenticated: ["PATCH /user"] } }; const VERSION = "4.15.0"; function endpointsToMethods(octokit, endpointsMap) { const newMethods = {}; for (const [scope, endpoints] of Object.entries(endpointsMap)) { for (const [methodName, endpoint] of Object.entries(endpoints)) { const [route, defaults, decorations] = endpoint; const [method, url] = route.split(/ /); const endpointDefaults = Object.assign({ method, url }, defaults); if (!newMethods[scope]) { newMethods[scope] = {}; } const scopeMethods = newMethods[scope]; if (decorations) { scopeMethods[methodName] = decorate(octokit, scope, methodName, endpointDefaults, decorations); continue; } scopeMethods[methodName] = octokit.request.defaults(endpointDefaults); } } return newMethods; } function decorate(octokit, scope, methodName, defaults, decorations) { const requestWithDefaults = octokit.request.defaults(defaults); /* istanbul ignore next */ function withDecorations(...args) { // @ts-ignore https://github.com/microsoft/TypeScript/issues/25488 let options = requestWithDefaults.endpoint.merge(...args); // There are currently no other decorations than `.mapToData` if (decorations.mapToData) { options = Object.assign({}, options, { data: options[decorations.mapToData], [decorations.mapToData]: undefined }); return requestWithDefaults(options); } if (decorations.renamed) { const [newScope, newMethodName] = decorations.renamed; octokit.log.warn(`octokit.${scope}.${methodName}() has been renamed to octokit.${newScope}.${newMethodName}()`); } if (decorations.deprecated) { octokit.log.warn(decorations.deprecated); } if (decorations.renamedParameters) { // @ts-ignore https://github.com/microsoft/TypeScript/issues/25488 const options = requestWithDefaults.endpoint.merge(...args); for (const [name, alias] of Object.entries(decorations.renamedParameters)) { if (name in options) { octokit.log.warn(`"${name}" parameter is deprecated for "octokit.${scope}.${methodName}()". Use "${alias}" instead`); if (!(alias in options)) { options[alias] = options[name]; } delete options[name]; } } return requestWithDefaults(options); } // @ts-ignore https://github.com/microsoft/TypeScript/issues/25488 return requestWithDefaults(...args); } return Object.assign(withDecorations, requestWithDefaults); } function restEndpointMethods(octokit) { const api = endpointsToMethods(octokit, Endpoints); return _objectSpread2(_objectSpread2({}, api), {}, { rest: api }); } restEndpointMethods.VERSION = VERSION; exports.restEndpointMethods = restEndpointMethods; //# sourceMappingURL=index.js.map /***/ }), /***/ 9968: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; } var BottleneckLight = _interopDefault(__nccwpck_require__(1174)); function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; } function _objectSpread2(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; } const VERSION = "3.4.1"; const noop = () => Promise.resolve(); // @ts-ignore function wrapRequest(state, request, options) { return state.retryLimiter.schedule(doRequest, state, request, options); } // @ts-ignore async function doRequest(state, request, options) { const isWrite = options.method !== "GET" && options.method !== "HEAD"; const isSearch = options.method === "GET" && options.url.startsWith("/search/"); const isGraphQL = options.url.startsWith("/graphql"); const retryCount = ~~options.request.retryCount; const jobOptions = retryCount > 0 ? { priority: 0, weight: 0 } : {}; if (state.clustering) { // Remove a job from Redis if it has not completed or failed within 60s // Examples: Node process terminated, client disconnected, etc. // @ts-ignore jobOptions.expiration = 1000 * 60; } // Guarantee at least 1000ms between writes // GraphQL can also trigger writes if (isWrite || isGraphQL) { await state.write.key(state.id).schedule(jobOptions, noop); } // Guarantee at least 3000ms between requests that trigger notifications if (isWrite && state.triggersNotification(options.url)) { await state.notifications.key(state.id).schedule(jobOptions, noop); } // Guarantee at least 2000ms between search requests if (isSearch) { await state.search.key(state.id).schedule(jobOptions, noop); } const req = state.global.key(state.id).schedule(jobOptions, request, options); if (isGraphQL) { const res = await req; if (res.data.errors != null && // @ts-ignore res.data.errors.some(error => error.type === "RATE_LIMITED")) { const error = Object.assign(new Error("GraphQL Rate Limit Exceeded"), { headers: res.headers, data: res.data }); throw error; } } return req; } var triggersNotificationPaths = ["/orgs/{org}/invitations", "/orgs/{org}/invitations/{invitation_id}", "/orgs/{org}/teams/{team_slug}/discussions", "/orgs/{org}/teams/{team_slug}/discussions/{discussion_number}/comments", "/repos/{owner}/{repo}/collaborators/{username}", "/repos/{owner}/{repo}/commits/{commit_sha}/comments", "/repos/{owner}/{repo}/issues", "/repos/{owner}/{repo}/issues/{issue_number}/comments", "/repos/{owner}/{repo}/pulls", "/repos/{owner}/{repo}/pulls/{pull_number}/comments", "/repos/{owner}/{repo}/pulls/{pull_number}/comments/{comment_id}/replies", "/repos/{owner}/{repo}/pulls/{pull_number}/merge", "/repos/{owner}/{repo}/pulls/{pull_number}/requested_reviewers", "/repos/{owner}/{repo}/pulls/{pull_number}/reviews", "/repos/{owner}/{repo}/releases", "/teams/{team_id}/discussions", "/teams/{team_id}/discussions/{discussion_number}/comments"]; // @ts-ignore function routeMatcher(paths) { // EXAMPLE. For the following paths: /* [ "/orgs/{org}/invitations", "/repos/{owner}/{repo}/collaborators/{username}" ] */ // @ts-ignore const regexes = paths.map(path => path.split("/") // @ts-ignore .map(c => c.startsWith("{") ? "(?:.+?)" : c).join("/")); // 'regexes' would contain: /* [ '/orgs/(?:.+?)/invitations', '/repos/(?:.+?)/(?:.+?)/collaborators/(?:.+?)' ] */ // @ts-ignore const regex = `^(?:${regexes.map(r => `(?:${r})`).join("|")})[^/]*$`; // 'regex' would contain: /* ^(?:(?:\/orgs\/(?:.+?)\/invitations)|(?:\/repos\/(?:.+?)\/(?:.+?)\/collaborators\/(?:.+?)))[^\/]*$ It may look scary, but paste it into https://www.debuggex.com/ and it will make a lot more sense! */ return new RegExp(regex, "i"); } const regex = routeMatcher(triggersNotificationPaths); const triggersNotification = regex.test.bind(regex); const groups = {}; // @ts-ignore const createGroups = function (Bottleneck, common) { // @ts-ignore groups.global = new Bottleneck.Group(_objectSpread2({ id: "octokit-global", maxConcurrent: 10 }, common)); // @ts-ignore groups.search = new Bottleneck.Group(_objectSpread2({ id: "octokit-search", maxConcurrent: 1, minTime: 2000 }, common)); // @ts-ignore groups.write = new Bottleneck.Group(_objectSpread2({ id: "octokit-write", maxConcurrent: 1, minTime: 1000 }, common)); // @ts-ignore groups.notifications = new Bottleneck.Group(_objectSpread2({ id: "octokit-notifications", maxConcurrent: 1, minTime: 3000 }, common)); }; function throttling(octokit, octokitOptions = {}) { const { enabled = true, Bottleneck = BottleneckLight, id = "no-id", timeout = 1000 * 60 * 2, // Redis TTL: 2 minutes connection } = octokitOptions.throttle || {}; if (!enabled) { return; } const common = { connection, timeout }; // @ts-ignore if (groups.global == null) { createGroups(Bottleneck, common); } const state = Object.assign(_objectSpread2({ clustering: connection != null, triggersNotification, minimumAbuseRetryAfter: 5, retryAfterBaseValue: 1000, retryLimiter: new Bottleneck(), id }, groups), // @ts-ignore octokitOptions.throttle); if (typeof state.onAbuseLimit !== "function" || typeof state.onRateLimit !== "function") { throw new Error(`octokit/plugin-throttling error: You must pass the onAbuseLimit and onRateLimit error handlers. See https://github.com/octokit/rest.js#throttling const octokit = new Octokit({ throttle: { onAbuseLimit: (retryAfter, options) => {/* ... */}, onRateLimit: (retryAfter, options) => {/* ... */} } }) `); } const events = {}; const emitter = new Bottleneck.Events(events); // @ts-ignore events.on("abuse-limit", state.onAbuseLimit); // @ts-ignore events.on("rate-limit", state.onRateLimit); // @ts-ignore events.on("error", e => console.warn("Error in throttling-plugin limit handler", e)); // @ts-ignore state.retryLimiter.on("failed", async function (error, info) { const options = info.args[info.args.length - 1]; const shouldRetryGraphQL = options.url.startsWith("/graphql") && error.status !== 401; if (!(shouldRetryGraphQL || error.status === 403)) { return; } const retryCount = ~~options.request.retryCount; options.request.retryCount = retryCount; const { wantRetry, retryAfter } = await async function () { if (/\babuse\b/i.test(error.message)) { // The user has hit the abuse rate limit. (REST and GraphQL) // https://docs.github.com/en/rest/overview/resources-in-the-rest-api#abuse-rate-limits // The Retry-After header can sometimes be blank when hitting an abuse limit, // but is always present after 2-3s, so make sure to set `retryAfter` to at least 5s by default. const retryAfter = Math.max(~~error.headers["retry-after"], state.minimumAbuseRetryAfter); const wantRetry = await emitter.trigger("abuse-limit", retryAfter, options, octokit); return { wantRetry, retryAfter }; } if (error.headers != null && error.headers["x-ratelimit-remaining"] === "0") { // The user has used all their allowed calls for the current time period (REST and GraphQL) // https://docs.github.com/en/rest/reference/rate-limit (REST) // https://docs.github.com/en/graphql/overview/resource-limitations#rate-limit (GraphQL) const rateLimitReset = new Date(~~error.headers["x-ratelimit-reset"] * 1000).getTime(); const retryAfter = Math.max(Math.ceil((rateLimitReset - Date.now()) / 1000), 0); const wantRetry = await emitter.trigger("rate-limit", retryAfter, options, octokit); return { wantRetry, retryAfter }; } return {}; }(); if (wantRetry) { options.request.retryCount++; // @ts-ignore return retryAfter * state.retryAfterBaseValue; } }); octokit.hook.wrap("request", wrapRequest.bind(null, state)); } throttling.VERSION = VERSION; throttling.triggersNotification = triggersNotification; exports.throttling = throttling; //# sourceMappingURL=index.js.map /***/ }), /***/ 537: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; } var deprecation = __nccwpck_require__(8932); var once = _interopDefault(__nccwpck_require__(1223)); const logOnce = once(deprecation => console.warn(deprecation)); /** * Error with extra properties to help with debugging */ class RequestError extends Error { constructor(message, statusCode, options) { super(message); // Maintains proper stack trace (only available on V8) /* istanbul ignore next */ if (Error.captureStackTrace) { Error.captureStackTrace(this, this.constructor); } this.name = "HttpError"; this.status = statusCode; Object.defineProperty(this, "code", { get() { logOnce(new deprecation.Deprecation("[@octokit/request-error] `error.code` is deprecated, use `error.status`.")); return statusCode; } }); this.headers = options.headers || {}; // redact request credentials without mutating original request options const requestCopy = Object.assign({}, options.request); if (options.request.headers.authorization) { requestCopy.headers = Object.assign({}, options.request.headers, { authorization: options.request.headers.authorization.replace(/ .*$/, " [REDACTED]") }); } requestCopy.url = requestCopy.url // client_id & client_secret can be passed as URL query parameters to increase rate limit // see https://developer.github.com/v3/#increasing-the-unauthenticated-rate-limit-for-oauth-applications .replace(/\bclient_secret=\w+/g, "client_secret=[REDACTED]") // OAuth tokens can be passed as URL query parameters, although it is not recommended // see https://developer.github.com/v3/#oauth2-token-sent-in-a-header .replace(/\baccess_token=\w+/g, "access_token=[REDACTED]"); this.request = requestCopy; } } exports.RequestError = RequestError; //# sourceMappingURL=index.js.map /***/ }), /***/ 6234: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; } var endpoint = __nccwpck_require__(9440); var universalUserAgent = __nccwpck_require__(5030); var isPlainObject = __nccwpck_require__(3287); var nodeFetch = _interopDefault(__nccwpck_require__(467)); var requestError = __nccwpck_require__(537); const VERSION = "5.4.15"; function getBufferResponse(response) { return response.arrayBuffer(); } function fetchWrapper(requestOptions) { if (isPlainObject.isPlainObject(requestOptions.body) || Array.isArray(requestOptions.body)) { requestOptions.body = JSON.stringify(requestOptions.body); } let headers = {}; let status; let url; const fetch = requestOptions.request && requestOptions.request.fetch || nodeFetch; return fetch(requestOptions.url, Object.assign({ method: requestOptions.method, body: requestOptions.body, headers: requestOptions.headers, redirect: requestOptions.redirect }, // `requestOptions.request.agent` type is incompatible // see https://github.com/octokit/types.ts/pull/264 requestOptions.request)).then(response => { url = response.url; status = response.status; for (const keyAndValue of response.headers) { headers[keyAndValue[0]] = keyAndValue[1]; } if (status === 204 || status === 205) { return; } // GitHub API returns 200 for HEAD requests if (requestOptions.method === "HEAD") { if (status < 400) { return; } throw new requestError.RequestError(response.statusText, status, { headers, request: requestOptions }); } if (status === 304) { throw new requestError.RequestError("Not modified", status, { headers, request: requestOptions }); } if (status >= 400) { return response.text().then(message => { const error = new requestError.RequestError(message, status, { headers, request: requestOptions }); try { let responseBody = JSON.parse(error.message); Object.assign(error, responseBody); let errors = responseBody.errors; // Assumption `errors` would always be in Array format error.message = error.message + ": " + errors.map(JSON.stringify).join(", "); } catch (e) {// ignore, see octokit/rest.js#684 } throw error; }); } const contentType = response.headers.get("content-type"); if (/application\/json/.test(contentType)) { return response.json(); } if (!contentType || /^text\/|charset=utf-8$/.test(contentType)) { return response.text(); } return getBufferResponse(response); }).then(data => { return { status, url, headers, data }; }).catch(error => { if (error instanceof requestError.RequestError) { throw error; } throw new requestError.RequestError(error.message, 500, { headers, request: requestOptions }); }); } function withDefaults(oldEndpoint, newDefaults) { const endpoint = oldEndpoint.defaults(newDefaults); const newApi = function (route, parameters) { const endpointOptions = endpoint.merge(route, parameters); if (!endpointOptions.request || !endpointOptions.request.hook) { return fetchWrapper(endpoint.parse(endpointOptions)); } const request = (route, parameters) => { return fetchWrapper(endpoint.parse(endpoint.merge(route, parameters))); }; Object.assign(request, { endpoint, defaults: withDefaults.bind(null, endpoint) }); return endpointOptions.request.hook(request, endpointOptions); }; return Object.assign(newApi, { endpoint, defaults: withDefaults.bind(null, endpoint) }); } const request = withDefaults(endpoint.endpoint, { headers: { "user-agent": `octokit-request.js/${VERSION} ${universalUserAgent.getUserAgent()}` } }); exports.request = request; //# sourceMappingURL=index.js.map /***/ }), /***/ 6761: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { const Utils = __nccwpck_require__(5182); const pth = __nccwpck_require__(5622); const ZipEntry = __nccwpck_require__(4057); const ZipFile = __nccwpck_require__(7744); const fs = Utils.FileSystem.require(); fs.existsSync = fs.existsSync || pth.existsSync; const defaultOptions = { // read entries during load (initial loading may be slower) readEntries: false, // default method is none method: Utils.Constants.NONE } function canonical(p) { // trick normalize think path is absolute var safeSuffix = pth.posix.normalize("/" + p.split("\\").join("/")); return pth.join(".", safeSuffix); } module.exports = function (/**String*/input, /** object */options) { let inBuffer = null; // create object based default options, allowing them to be overwritten const opts = Object.assign(Object.create( null ), defaultOptions); // test input variable if (input && "object" === typeof input){ // if value is not buffer we accept it to be object with options if (!(input instanceof Uint8Array)){ Object.assign(opts, input); input = opts.input ? opts.input : undefined; if (opts.input) delete opts.input; } // if input is buffer if (input instanceof Uint8Array){ inBuffer = input; opts.method = Utils.Constants.BUFFER; input = undefined; } } // assign options Object.assign(opts, options); // if input is file name we retrieve its content if (input && "string" === typeof input) { // load zip file if (fs.existsSync(input)) { opts.method = Utils.Constants.FILE; opts.filename = input; inBuffer = fs.readFileSync(input); } else { throw new Error(Utils.Errors.INVALID_FILENAME); } } // create variable const _zip = new ZipFile(inBuffer, opts); function sanitize(prefix, name) { prefix = pth.resolve(pth.normalize(prefix)); var parts = name.split('/'); for (var i = 0, l = parts.length; i < l; i++) { var path = pth.normalize(pth.join(prefix, parts.slice(i, l).join(pth.sep))); if (path.indexOf(prefix) === 0) { return path; } } return pth.normalize(pth.join(prefix, pth.basename(name))); } function getEntry(/**Object*/entry) { if (entry && _zip) { var item; // If entry was given as a file name if (typeof entry === "string") item = _zip.getEntry(entry); // if entry was given as a ZipEntry object if (typeof entry === "object" && typeof entry.entryName !== "undefined" && typeof entry.header !== "undefined") item = _zip.getEntry(entry.entryName); if (item) { return item; } } return null; } function fixPath(zipPath){ const { join, normalize, sep } = pth.posix; // convert windows file separators and normalize return join(".", normalize(sep + zipPath.split("\\").join(sep) + sep)); } return { /** * Extracts the given entry from the archive and returns the content as a Buffer object * @param entry ZipEntry object or String with the full path of the entry * * @return Buffer or Null in case of error */ readFile: function (/**Object*/entry, /*String, Buffer*/pass) { var item = getEntry(entry); return item && item.getData(pass) || null; }, /** * Asynchronous readFile * @param entry ZipEntry object or String with the full path of the entry * @param callback * * @return Buffer or Null in case of error */ readFileAsync: function (/**Object*/entry, /**Function*/callback) { var item = getEntry(entry); if (item) { item.getDataAsync(callback); } else { callback(null, "getEntry failed for:" + entry) } }, /** * Extracts the given entry from the archive and returns the content as plain text in the given encoding * @param entry ZipEntry object or String with the full path of the entry * @param encoding Optional. If no encoding is specified utf8 is used * * @return String */ readAsText: function (/**Object*/entry, /**String=*/encoding) { var item = getEntry(entry); if (item) { var data = item.getData(); if (data && data.length) { return data.toString(encoding || "utf8"); } } return ""; }, /** * Asynchronous readAsText * @param entry ZipEntry object or String with the full path of the entry * @param callback * @param encoding Optional. If no encoding is specified utf8 is used * * @return String */ readAsTextAsync: function (/**Object*/entry, /**Function*/callback, /**String=*/encoding) { var item = getEntry(entry); if (item) { item.getDataAsync(function (data, err) { if (err) { callback(data, err); return; } if (data && data.length) { callback(data.toString(encoding || "utf8")); } else { callback(""); } }) } else { callback(""); } }, /** * Remove the entry from the file or the entry and all it's nested directories and files if the given entry is a directory * * @param entry */ deleteFile: function (/**Object*/entry) { // @TODO: test deleteFile var item = getEntry(entry); if (item) { _zip.deleteEntry(item.entryName); } }, /** * Adds a comment to the zip. The zip must be rewritten after adding the comment. * * @param comment */ addZipComment: function (/**String*/comment) { // @TODO: test addZipComment _zip.comment = comment; }, /** * Returns the zip comment * * @return String */ getZipComment: function () { return _zip.comment || ''; }, /** * Adds a comment to a specified zipEntry. The zip must be rewritten after adding the comment * The comment cannot exceed 65535 characters in length * * @param entry * @param comment */ addZipEntryComment: function (/**Object*/entry, /**String*/comment) { var item = getEntry(entry); if (item) { item.comment = comment; } }, /** * Returns the comment of the specified entry * * @param entry * @return String */ getZipEntryComment: function (/**Object*/entry) { var item = getEntry(entry); if (item) { return item.comment || ''; } return '' }, /** * Updates the content of an existing entry inside the archive. The zip must be rewritten after updating the content * * @param entry * @param content */ updateFile: function (/**Object*/entry, /**Buffer*/content) { var item = getEntry(entry); if (item) { item.setData(content); } }, /** * Adds a file from the disk to the archive * * @param localPath File to add to zip * @param zipPath Optional path inside the zip * @param zipName Optional name for the file */ addLocalFile: function (/**String*/localPath, /**String=*/zipPath, /**String=*/zipName, /**String*/comment) { if (fs.existsSync(localPath)) { // fix ZipPath zipPath = (zipPath) ? fixPath(zipPath) : ""; // p - local file name var p = localPath.split("\\").join("/").split("/").pop(); // add file name into zippath zipPath += (zipName) ? zipName : p; // read file attributes const _attr = fs.statSync(localPath); // add file into zip file this.addFile(zipPath, fs.readFileSync(localPath), comment, _attr) } else { throw new Error(Utils.Errors.FILE_NOT_FOUND.replace("%s", localPath)); } }, /** * Adds a local directory and all its nested files and directories to the archive * * @param localPath * @param zipPath optional path inside zip * @param filter optional RegExp or Function if files match will * be included. */ addLocalFolder: function (/**String*/localPath, /**String=*/zipPath, /**=RegExp|Function*/filter) { // Prepare filter if (filter instanceof RegExp) { // if filter is RegExp wrap it filter = (function (rx){ return function (filename) { return rx.test(filename); } })(filter); } else if ('function' !== typeof filter) { // if filter is not function we will replace it filter = function () { return true; }; } // fix ZipPath zipPath = (zipPath) ? fixPath(zipPath) : ""; // normalize the path first localPath = pth.normalize(localPath); if (fs.existsSync(localPath)) { var items = Utils.findFiles(localPath), self = this; if (items.length) { items.forEach(function (filepath) { var p = pth.relative(localPath, filepath).split("\\").join("/"); //windows fix if (filter(p)) { var stats = fs.statSync(filepath); if (stats.isFile()) { self.addFile(zipPath + p, fs.readFileSync(filepath), "", stats); } else { self.addFile(zipPath + p + '/', Buffer.alloc(0), "", stats); } } }); } } else { throw new Error(Utils.Errors.FILE_NOT_FOUND.replace("%s", localPath)); } }, /** * Asynchronous addLocalFile * @param localPath * @param callback * @param zipPath optional path inside zip * @param filter optional RegExp or Function if files match will * be included. */ addLocalFolderAsync: function (/*String*/localPath, /*Function*/callback, /*String*/zipPath, /*RegExp|Function*/filter) { if (filter instanceof RegExp) { filter = (function (rx) { return function (filename) { return rx.test(filename); }; })(filter); } else if ("function" !== typeof filter) { filter = function () { return true; }; } // fix ZipPath zipPath = zipPath ? fixPath(zipPath) : ""; // normalize the path first localPath = pth.normalize(localPath); var self = this; fs.open(localPath, 'r', function (err) { if (err && err.code === 'ENOENT') { callback(undefined, Utils.Errors.FILE_NOT_FOUND.replace("%s", localPath)); } else if (err) { callback(undefined, err); } else { var items = Utils.findFiles(localPath); var i = -1; var next = function () { i += 1; if (i < items.length) { var filepath = items[i]; var p = pth.relative(localPath, filepath).split("\\").join("/"); //windows fix p = p.normalize('NFD').replace(/[\u0300-\u036f]/g, '').replace(/[^\x20-\x7E]/g, '') // accent fix if (filter(p)) { fs.stat(filepath, function (er0, stats) { if (er0) callback(undefined, er0); if (stats.isFile()) { fs.readFile(filepath, function (er1, data) { if (er1) { callback(undefined, er1); } else { self.addFile(zipPath + p, data, "", stats); next(); } }); } else { self.addFile(zipPath + p + "/", Buffer.alloc(0), "", stats); next(); } }); } else { next(); } } else { callback(true, undefined); } } next(); } }); }, addLocalFolderPromise: function (/*String*/ localPath, /* object */ options) { return new Promise((resolve, reject) => { const { filter, zipPath } = Object.assign({}, options); this.addLocalFolderAsync(localPath, (done, err) => { if (err) reject(err); if (done) resolve(this); }, zipPath, filter ); }); }, /** * Allows you to create a entry (file or directory) in the zip file. * If you want to create a directory the entryName must end in / and a null buffer should be provided. * Comment and attributes are optional * * @param {string} entryName * @param {Buffer | string} content - file content as buffer or utf8 coded string * @param {string} comment - file comment * @param {number | object} attr - number as unix file permissions, object as filesystem Stats object */ addFile: function (/**String*/ entryName, /**Buffer*/ content, /**String*/ comment, /**Number*/ attr) { let entry = getEntry(entryName); const update = entry != null; // prepare new entry if (!update){ entry = new ZipEntry(); entry.entryName = entryName; } entry.comment = comment || ""; const isStat = ('object' === typeof attr) && (attr instanceof fs.Stats); // last modification time from file stats if (isStat){ entry.header.time = attr.mtime; } // Set file attribute var fileattr = (entry.isDirectory) ? 0x10 : 0; // (MS-DOS directory flag) // extended attributes field for Unix if('win32' !== process.platform){ // set file type either S_IFDIR / S_IFREG let unix = (entry.isDirectory) ? 0x4000 : 0x8000; if (isStat) { // File attributes from file stats unix |= (0xfff & attr.mode); }else if ('number' === typeof attr){ // attr from given attr values unix |= (0xfff & attr); }else{ // Default values: unix |= (entry.isDirectory) ? 0o755 : 0o644; // permissions (drwxr-xr-x) or (-r-wr--r--) } fileattr = (fileattr | (unix << 16)) >>> 0; // add attributes } entry.attr = fileattr; entry.setData(content); if (!update) _zip.setEntry(entry); }, /** * Returns an array of ZipEntry objects representing the files and folders inside the archive * * @return Array */ getEntries: function () { if (_zip) { return _zip.entries; } else { return []; } }, /** * Returns a ZipEntry object representing the file or folder specified by ``name``. * * @param name * @return ZipEntry */ getEntry: function (/**String*/name) { return getEntry(name); }, getEntryCount: function() { return _zip.getEntryCount(); }, forEach: function(callback) { return _zip.forEach(callback); }, /** * Extracts the given entry to the given targetPath * If the entry is a directory inside the archive, the entire directory and it's subdirectories will be extracted * * @param entry ZipEntry object or String with the full path of the entry * @param targetPath Target folder where to write the file * @param maintainEntryPath If maintainEntryPath is true and the entry is inside a folder, the entry folder * will be created in targetPath as well. Default is TRUE * @param overwrite If the file already exists at the target path, the file will be overwriten if this is true. * Default is FALSE * @param outFileName String If set will override the filename of the extracted file (Only works if the entry is a file) * * @return Boolean */ extractEntryTo: function (/**Object*/entry, /**String*/targetPath, /**Boolean*/maintainEntryPath, /**Boolean*/overwrite, /**String**/outFileName) { overwrite = overwrite || false; maintainEntryPath = typeof maintainEntryPath === "undefined" ? true : maintainEntryPath; var item = getEntry(entry); if (!item) { throw new Error(Utils.Errors.NO_ENTRY); } var entryName = canonical(item.entryName); var target = sanitize(targetPath,outFileName && !item.isDirectory ? outFileName : (maintainEntryPath ? entryName : pth.basename(entryName))); if (item.isDirectory) { target = pth.resolve(target, ".."); var children = _zip.getEntryChildren(item); children.forEach(function (child) { if (child.isDirectory) return; var content = child.getData(); if (!content) { throw new Error(Utils.Errors.CANT_EXTRACT_FILE); } var name = canonical(child.entryName) var childName = sanitize(targetPath, maintainEntryPath ? name : pth.basename(name)); // The reverse operation for attr depend on method addFile() var fileAttr = child.attr ? (((child.attr >>> 0) | 0) >> 16) & 0xfff : 0; Utils.writeFileTo(childName, content, overwrite, fileAttr); }); return true; } var content = item.getData(); if (!content) throw new Error(Utils.Errors.CANT_EXTRACT_FILE); if (fs.existsSync(target) && !overwrite) { throw new Error(Utils.Errors.CANT_OVERRIDE); } // The reverse operation for attr depend on method addFile() var fileAttr = item.attr ? (((item.attr >>> 0) | 0) >> 16) & 0xfff : 0; Utils.writeFileTo(target, content, overwrite, fileAttr); return true; }, /** * Test the archive * */ test: function (pass) { if (!_zip) { return false; } for (var entry in _zip.entries) { try { if (entry.isDirectory) { continue; } var content = _zip.entries[entry].getData(pass); if (!content) { return false; } } catch (err) { return false; } } return true; }, /** * Extracts the entire archive to the given location * * @param targetPath Target location * @param overwrite If the file already exists at the target path, the file will be overwriten if this is true. * Default is FALSE */ extractAllTo: function (/**String*/targetPath, /**Boolean*/overwrite, /*String, Buffer*/pass) { overwrite = overwrite || false; if (!_zip) { throw new Error(Utils.Errors.NO_ZIP); } _zip.entries.forEach(function (entry) { var entryName = sanitize(targetPath, canonical(entry.entryName.toString())); if (entry.isDirectory) { Utils.makeDir(entryName); return; } var content = entry.getData(pass); if (!content) { throw new Error(Utils.Errors.CANT_EXTRACT_FILE); } // The reverse operation for attr depend on method addFile() var fileAttr = entry.attr ? (((entry.attr >>> 0) | 0) >> 16) & 0xfff : 0; Utils.writeFileTo(entryName, content, overwrite, fileAttr); try { fs.utimesSync(entryName, entry.header.time, entry.header.time) } catch (err) { throw new Error(Utils.Errors.CANT_EXTRACT_FILE); } }) }, /** * Asynchronous extractAllTo * * @param targetPath Target location * @param overwrite If the file already exists at the target path, the file will be overwriten if this is true. * Default is FALSE * @param callback */ extractAllToAsync: function (/**String*/targetPath, /**Boolean*/overwrite, /**Function*/callback) { if (!callback) { callback = function() {} } overwrite = overwrite || false; if (!_zip) { callback(new Error(Utils.Errors.NO_ZIP)); return; } var entries = _zip.entries; var i = entries.length; entries.forEach(function (entry) { if (i <= 0) return; // Had an error already var entryName = pth.normalize(canonical(entry.entryName.toString())); if (entry.isDirectory) { Utils.makeDir(sanitize(targetPath, entryName)); if (--i === 0) callback(undefined); return; } entry.getDataAsync(function (content, err) { if (i <= 0) return; if (err) { callback(new Error(err)); return; } if (!content) { i = 0; callback(new Error(Utils.Errors.CANT_EXTRACT_FILE)); return; } // The reverse operation for attr depend on method addFile() var fileAttr = entry.attr ? (((entry.attr >>> 0) | 0) >> 16) & 0xfff : 0; Utils.writeFileToAsync(sanitize(targetPath, entryName), content, overwrite, fileAttr, function (succ) { try { fs.utimesSync(pth.resolve(targetPath, entryName), entry.header.time, entry.header.time); } catch (err) { callback(new Error('Unable to set utimes')); } if (i <= 0) return; if (!succ) { i = 0; callback(new Error('Unable to write')); return; } if (--i === 0) callback(undefined); }); }); }) }, /** * Writes the newly created zip file to disk at the specified location or if a zip was opened and no ``targetFileName`` is provided, it will overwrite the opened zip * * @param targetFileName * @param callback */ writeZip: function (/**String*/targetFileName, /**Function*/callback) { if (arguments.length === 1) { if (typeof targetFileName === "function") { callback = targetFileName; targetFileName = ""; } } if (!targetFileName && opts.filename) { targetFileName = opts.filename; } if (!targetFileName) return; var zipData = _zip.compressToBuffer(); if (zipData) { var ok = Utils.writeFileTo(targetFileName, zipData, true); if (typeof callback === 'function') callback(!ok ? new Error("failed") : null, ""); } }, writeZipPromise: function (/**String*/ targetFileName, /* object */ options) { const { overwrite, perm } = Object.assign({ overwrite: true }, options); return new Promise((resolve, reject) => { // find file name if (!targetFileName && opts.filename) targetFileName = opts.filename; if (!targetFileName) reject("ADM-ZIP: ZIP File Name Missing"); this.toBufferPromise().then((zipData) => { const ret = (done) => (done ? resolve(done) : reject("ADM-ZIP: Wasn't able to write zip file")); Utils.writeFileToAsync(targetFileName, zipData, overwrite, perm, ret); }, reject); }); }, toBufferPromise: function () { return new Promise((resolve, reject) => { _zip.toAsyncBuffer(resolve, reject); }); }, /** * Returns the content of the entire zip file as a Buffer object * * @return Buffer */ toBuffer: function (/**Function=*/onSuccess, /**Function=*/onFail, /**Function=*/onItemStart, /**Function=*/onItemEnd) { this.valueOf = 2; if (typeof onSuccess === "function") { _zip.toAsyncBuffer(onSuccess, onFail, onItemStart, onItemEnd); return null; } return _zip.compressToBuffer() } } }; /***/ }), /***/ 9032: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { var Utils = __nccwpck_require__(5182), Constants = Utils.Constants; /* The central directory file header */ module.exports = function () { var _verMade = 0x14, _version = 0x0A, _flags = 0, _method = 0, _time = 0, _crc = 0, _compressedSize = 0, _size = 0, _fnameLen = 0, _extraLen = 0, _comLen = 0, _diskStart = 0, _inattr = 0, _attr = 0, _offset = 0; switch(process.platform){ case 'win32': _verMade |= 0x0A00; default: _verMade |= 0x0300; } var _dataHeader = {}; function setTime(val) { val = new Date(val); _time = (val.getFullYear() - 1980 & 0x7f) << 25 // b09-16 years from 1980 | (val.getMonth() + 1) << 21 // b05-08 month | val.getDate() << 16 // b00-04 hour // 2 bytes time | val.getHours() << 11 // b11-15 hour | val.getMinutes() << 5 // b05-10 minute | val.getSeconds() >> 1; // b00-04 seconds divided by 2 } setTime(+new Date()); return { get made () { return _verMade; }, set made (val) { _verMade = val; }, get version () { return _version; }, set version (val) { _version = val }, get flags () { return _flags }, set flags (val) { _flags = val; }, get method () { return _method; }, set method (val) { switch (val){ case Constants.STORED: this.version = 10; case Constants.DEFLATED: default: this.version = 20; } _method = val; }, get time () { return new Date( ((_time >> 25) & 0x7f) + 1980, ((_time >> 21) & 0x0f) - 1, (_time >> 16) & 0x1f, (_time >> 11) & 0x1f, (_time >> 5) & 0x3f, (_time & 0x1f) << 1 ); }, set time (val) { setTime(val); }, get crc () { return _crc; }, set crc (val) { _crc = val; }, get compressedSize () { return _compressedSize; }, set compressedSize (val) { _compressedSize = val; }, get size () { return _size; }, set size (val) { _size = val; }, get fileNameLength () { return _fnameLen; }, set fileNameLength (val) { _fnameLen = val; }, get extraLength () { return _extraLen }, set extraLength (val) { _extraLen = val; }, get commentLength () { return _comLen }, set commentLength (val) { _comLen = val }, get diskNumStart () { return _diskStart }, set diskNumStart (val) { _diskStart = val }, get inAttr () { return _inattr }, set inAttr (val) { _inattr = val }, get attr () { return _attr }, set attr (val) { _attr = val }, get offset () { return _offset }, set offset (val) { _offset = val }, get encripted () { return (_flags & 1) === 1 }, get entryHeaderSize () { return Constants.CENHDR + _fnameLen + _extraLen + _comLen; }, get realDataOffset () { return _offset + Constants.LOCHDR + _dataHeader.fnameLen + _dataHeader.extraLen; }, get dataHeader () { return _dataHeader; }, loadDataHeaderFromBinary : function(/*Buffer*/input) { var data = input.slice(_offset, _offset + Constants.LOCHDR); // 30 bytes and should start with "PK\003\004" if (data.readUInt32LE(0) !== Constants.LOCSIG) { throw new Error(Utils.Errors.INVALID_LOC); } _dataHeader = { // version needed to extract version : data.readUInt16LE(Constants.LOCVER), // general purpose bit flag flags : data.readUInt16LE(Constants.LOCFLG), // compression method method : data.readUInt16LE(Constants.LOCHOW), // modification time (2 bytes time, 2 bytes date) time : data.readUInt32LE(Constants.LOCTIM), // uncompressed file crc-32 value crc : data.readUInt32LE(Constants.LOCCRC), // compressed size compressedSize : data.readUInt32LE(Constants.LOCSIZ), // uncompressed size size : data.readUInt32LE(Constants.LOCLEN), // filename length fnameLen : data.readUInt16LE(Constants.LOCNAM), // extra field length extraLen : data.readUInt16LE(Constants.LOCEXT) } }, loadFromBinary : function(/*Buffer*/data) { // data should be 46 bytes and start with "PK 01 02" if (data.length !== Constants.CENHDR || data.readUInt32LE(0) !== Constants.CENSIG) { throw new Error(Utils.Errors.INVALID_CEN); } // version made by _verMade = data.readUInt16LE(Constants.CENVEM); // version needed to extract _version = data.readUInt16LE(Constants.CENVER); // encrypt, decrypt flags _flags = data.readUInt16LE(Constants.CENFLG); // compression method _method = data.readUInt16LE(Constants.CENHOW); // modification time (2 bytes time, 2 bytes date) _time = data.readUInt32LE(Constants.CENTIM); // uncompressed file crc-32 value _crc = data.readUInt32LE(Constants.CENCRC); // compressed size _compressedSize = data.readUInt32LE(Constants.CENSIZ); // uncompressed size _size = data.readUInt32LE(Constants.CENLEN); // filename length _fnameLen = data.readUInt16LE(Constants.CENNAM); // extra field length _extraLen = data.readUInt16LE(Constants.CENEXT); // file comment length _comLen = data.readUInt16LE(Constants.CENCOM); // volume number start _diskStart = data.readUInt16LE(Constants.CENDSK); // internal file attributes _inattr = data.readUInt16LE(Constants.CENATT); // external file attributes _attr = data.readUInt32LE(Constants.CENATX); // LOC header offset _offset = data.readUInt32LE(Constants.CENOFF); }, dataHeaderToBinary : function() { // LOC header size (30 bytes) var data = Buffer.alloc(Constants.LOCHDR); // "PK\003\004" data.writeUInt32LE(Constants.LOCSIG, 0); // version needed to extract data.writeUInt16LE(_version, Constants.LOCVER); // general purpose bit flag data.writeUInt16LE(_flags, Constants.LOCFLG); // compression method data.writeUInt16LE(_method, Constants.LOCHOW); // modification time (2 bytes time, 2 bytes date) data.writeUInt32LE(_time, Constants.LOCTIM); // uncompressed file crc-32 value data.writeUInt32LE(_crc, Constants.LOCCRC); // compressed size data.writeUInt32LE(_compressedSize, Constants.LOCSIZ); // uncompressed size data.writeUInt32LE(_size, Constants.LOCLEN); // filename length data.writeUInt16LE(_fnameLen, Constants.LOCNAM); // extra field length data.writeUInt16LE(_extraLen, Constants.LOCEXT); return data; }, entryHeaderToBinary : function() { // CEN header size (46 bytes) var data = Buffer.alloc(Constants.CENHDR + _fnameLen + _extraLen + _comLen); // "PK\001\002" data.writeUInt32LE(Constants.CENSIG, 0); // version made by data.writeUInt16LE(_verMade, Constants.CENVEM); // version needed to extract data.writeUInt16LE(_version, Constants.CENVER); // encrypt, decrypt flags data.writeUInt16LE(_flags, Constants.CENFLG); // compression method data.writeUInt16LE(_method, Constants.CENHOW); // modification time (2 bytes time, 2 bytes date) data.writeUInt32LE(_time, Constants.CENTIM); // uncompressed file crc-32 value data.writeUInt32LE(_crc, Constants.CENCRC); // compressed size data.writeUInt32LE(_compressedSize, Constants.CENSIZ); // uncompressed size data.writeUInt32LE(_size, Constants.CENLEN); // filename length data.writeUInt16LE(_fnameLen, Constants.CENNAM); // extra field length data.writeUInt16LE(_extraLen, Constants.CENEXT); // file comment length data.writeUInt16LE(_comLen, Constants.CENCOM); // volume number start data.writeUInt16LE(_diskStart, Constants.CENDSK); // internal file attributes data.writeUInt16LE(_inattr, Constants.CENATT); // external file attributes data.writeUInt32LE(_attr, Constants.CENATX); // LOC header offset data.writeUInt32LE(_offset, Constants.CENOFF); // fill all with data.fill(0x00, Constants.CENHDR); return data; }, toString : function() { return '{\n' + '\t"made" : ' + _verMade + ",\n" + '\t"version" : ' + _version + ",\n" + '\t"flags" : ' + _flags + ",\n" + '\t"method" : ' + Utils.methodToString(_method) + ",\n" + '\t"time" : ' + this.time + ",\n" + '\t"crc" : 0x' + _crc.toString(16).toUpperCase() + ",\n" + '\t"compressedSize" : ' + _compressedSize + " bytes,\n" + '\t"size" : ' + _size + " bytes,\n" + '\t"fileNameLength" : ' + _fnameLen + ",\n" + '\t"extraLength" : ' + _extraLen + " bytes,\n" + '\t"commentLength" : ' + _comLen + " bytes,\n" + '\t"diskNumStart" : ' + _diskStart + ",\n" + '\t"inAttr" : ' + _inattr + ",\n" + '\t"attr" : ' + _attr + ",\n" + '\t"offset" : ' + _offset + ",\n" + '\t"entryHeaderSize" : ' + (Constants.CENHDR + _fnameLen + _extraLen + _comLen) + " bytes\n" + '}'; } } }; /***/ }), /***/ 4958: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { exports.EntryHeader = __nccwpck_require__(9032); exports.MainHeader = __nccwpck_require__(4408); /***/ }), /***/ 4408: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { var Utils = __nccwpck_require__(5182), Constants = Utils.Constants; /* The entries in the end of central directory */ module.exports = function () { var _volumeEntries = 0, _totalEntries = 0, _size = 0, _offset = 0, _commentLength = 0; return { get diskEntries () { return _volumeEntries }, set diskEntries (/*Number*/val) { _volumeEntries = _totalEntries = val; }, get totalEntries () { return _totalEntries }, set totalEntries (/*Number*/val) { _totalEntries = _volumeEntries = val; }, get size () { return _size }, set size (/*Number*/val) { _size = val; }, get offset () { return _offset }, set offset (/*Number*/val) { _offset = val; }, get commentLength () { return _commentLength }, set commentLength (/*Number*/val) { _commentLength = val; }, get mainHeaderSize () { return Constants.ENDHDR + _commentLength; }, loadFromBinary : function(/*Buffer*/data) { // data should be 22 bytes and start with "PK 05 06" // or be 56+ bytes and start with "PK 06 06" for Zip64 if ((data.length !== Constants.ENDHDR || data.readUInt32LE(0) !== Constants.ENDSIG) && (data.length < Constants.ZIP64HDR || data.readUInt32LE(0) !== Constants.ZIP64SIG)) { throw new Error(Utils.Errors.INVALID_END); } if (data.readUInt32LE(0) === Constants.ENDSIG) { // number of entries on this volume _volumeEntries = data.readUInt16LE(Constants.ENDSUB); // total number of entries _totalEntries = data.readUInt16LE(Constants.ENDTOT); // central directory size in bytes _size = data.readUInt32LE(Constants.ENDSIZ); // offset of first CEN header _offset = data.readUInt32LE(Constants.ENDOFF); // zip file comment length _commentLength = data.readUInt16LE(Constants.ENDCOM); } else { // number of entries on this volume _volumeEntries = Utils.readBigUInt64LE(data, Constants.ZIP64SUB); // total number of entries _totalEntries = Utils.readBigUInt64LE(data, Constants.ZIP64TOT); // central directory size in bytes _size = Utils.readBigUInt64LE(data, Constants.ZIP64SIZ); // offset of first CEN header _offset = Utils.readBigUInt64LE(data, Constants.ZIP64OFF); _commentLength = 0; } }, toBinary : function() { var b = Buffer.alloc(Constants.ENDHDR + _commentLength); // "PK 05 06" signature b.writeUInt32LE(Constants.ENDSIG, 0); b.writeUInt32LE(0, 4); // number of entries on this volume b.writeUInt16LE(_volumeEntries, Constants.ENDSUB); // total number of entries b.writeUInt16LE(_totalEntries, Constants.ENDTOT); // central directory size in bytes b.writeUInt32LE(_size, Constants.ENDSIZ); // offset of first CEN header b.writeUInt32LE(_offset, Constants.ENDOFF); // zip file comment length b.writeUInt16LE(_commentLength, Constants.ENDCOM); // fill comment memory with spaces so no garbage is left there b.fill(" ", Constants.ENDHDR); return b; }, toString : function() { return '{\n' + '\t"diskEntries" : ' + _volumeEntries + ",\n" + '\t"totalEntries" : ' + _totalEntries + ",\n" + '\t"size" : ' + _size + " bytes,\n" + '\t"offset" : 0x' + _offset.toString(16).toUpperCase() + ",\n" + '\t"commentLength" : 0x' + _commentLength + "\n" + '}'; } } }; /***/ }), /***/ 7686: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { module.exports = function (/*Buffer*/inbuf) { var zlib = __nccwpck_require__(8761); var opts = {chunkSize: (parseInt(inbuf.length / 1024) + 1) * 1024}; return { deflate: function () { return zlib.deflateRawSync(inbuf, opts); }, deflateAsync: function (/*Function*/callback) { var tmp = zlib.createDeflateRaw(opts), parts = [], total = 0; tmp.on('data', function (data) { parts.push(data); total += data.length; }); tmp.on('end', function () { var buf = Buffer.alloc(total), written = 0; buf.fill(0); for (var i = 0; i < parts.length; i++) { var part = parts[i]; part.copy(buf, written); written += part.length; } callback && callback(buf); }); tmp.end(inbuf); } } }; /***/ }), /***/ 3928: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { exports.Deflater = __nccwpck_require__(7686); exports.Inflater = __nccwpck_require__(2153); exports.ZipCrypto = __nccwpck_require__(3228); /***/ }), /***/ 2153: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { module.exports = function (/*Buffer*/inbuf) { var zlib = __nccwpck_require__(8761); return { inflate: function () { return zlib.inflateRawSync(inbuf); }, inflateAsync: function (/*Function*/callback) { var tmp = zlib.createInflateRaw(), parts = [], total = 0; tmp.on('data', function (data) { parts.push(data); total += data.length; }); tmp.on('end', function () { var buf = Buffer.alloc(total), written = 0; buf.fill(0); for (var i = 0; i < parts.length; i++) { var part = parts[i]; part.copy(buf, written); written += part.length; } callback && callback(buf); }); tmp.end(inbuf); } } }; /***/ }), /***/ 3228: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { // node crypt, we use it for generate salt const { randomFillSync } = __nccwpck_require__(6417); "use strict"; // generate CRC32 lookup table const crctable = new Uint32Array(256).map((t, crc) => { for (let j = 0; j < 8; j++) { if (0 !== (crc & 1)) { crc = (crc >>> 1) ^ 0xedb88320; } else { crc >>>= 1; } } return crc >>> 0; }); // C-style uInt32 Multiply (discards higher bits, when JS multiply discards lower bits) const uMul = (a, b) => Math.imul(a, b) >>> 0; // crc32 byte single update (actually same function is part of utils.crc32 function :) ) const crc32update = (pCrc32, bval) => { return crctable[(pCrc32 ^ bval) & 0xff] ^ (pCrc32 >>> 8); }; // function for generating salt for encrytion header const genSalt = () => { if ("function" === typeof randomFillSync) { return randomFillSync(Buffer.alloc(12)); } else { // fallback if function is not defined return genSalt.node(); } }; // salt generation with node random function (mainly as fallback) genSalt.node = () => { const salt = Buffer.alloc(12); const len = salt.length; for (let i = 0; i < len; i++) salt[i] = (Math.random() * 256) & 0xff; return salt; }; // general config const config = { genSalt }; // Class Initkeys handles same basic ops with keys function Initkeys(pw) { const pass = Buffer.isBuffer(pw) ? pw : Buffer.from(pw); this.keys = new Uint32Array([0x12345678, 0x23456789, 0x34567890]); for (let i = 0; i < pass.length; i++) { this.updateKeys(pass[i]); } } Initkeys.prototype.updateKeys = function (byteValue) { const keys = this.keys; keys[0] = crc32update(keys[0], byteValue); keys[1] += keys[0] & 0xff; keys[1] = uMul(keys[1], 134775813) + 1; keys[2] = crc32update(keys[2], keys[1] >>> 24); return byteValue; }; Initkeys.prototype.next = function () { const k = (this.keys[2] | 2) >>> 0; // key return (uMul(k, k ^ 1) >> 8) & 0xff; // decode }; function make_decrypter(/*Buffer*/ pwd) { // 1. Stage initialize key const keys = new Initkeys(pwd); // return decrypter function return function (/*Buffer*/ data) { // result - we create new Buffer for results const result = Buffer.alloc(data.length); let pos = 0; // process input data for (let c of data) { //c ^= keys.next(); //result[pos++] = c; // decode & Save Value result[pos++] = keys.updateKeys(c ^ keys.next()); // update keys with decoded byte } return result; }; } function make_encrypter(/*Buffer*/ pwd) { // 1. Stage initialize key const keys = new Initkeys(pwd); // return encrypting function, result and pos is here so we dont have to merge buffers later return function (/*Buffer*/ data, /*Buffer*/ result, /* Number */ pos = 0) { // result - we create new Buffer for results if (!result) result = Buffer.alloc(data.length); // process input data for (let c of data) { const k = keys.next(); // save key byte result[pos++] = c ^ k; // save val keys.updateKeys(c); // update keys with decoded byte } return result; }; } function decrypt(/*Buffer*/ data, /*Object*/ header, /*String, Buffer*/ pwd) { if (!data || !Buffer.isBuffer(data) || data.length < 12) { return Buffer.alloc(0); } // 1. We Initialize and generate decrypting function const decrypter = make_decrypter(pwd); // 2. decrypt salt what is always 12 bytes and is a part of file content const salt = decrypter(data.slice(0, 12)); // 3. does password meet expectations if (salt[11] !== header.crc >>> 24) { throw "ADM-ZIP: Wrong Password"; } // 4. decode content return decrypter(data.slice(12)); } // lets add way to populate salt, NOT RECOMMENDED for production but maybe useful for testing general functionality function _salter(data) { if (Buffer.isBuffer(data) && data.length >= 12) { // be aware - currently salting buffer data is modified config.genSalt = function () { return data.slice(0, 12); }; } else if (data === "node") { // test salt generation with node random function config.genSalt = genSalt.node; } else { // if value is not acceptable config gets reset. config.genSalt = genSalt; } } function encrypt(/*Buffer*/ data, /*Object*/ header, /*String, Buffer*/ pwd, /*Boolean*/ oldlike = false) { // 1. test data if data is not Buffer we make buffer from it if (data == null) data = Buffer.alloc(0); // if data is not buffer be make buffer from it if (!Buffer.isBuffer(data)) data = Buffer.from(data.toString()); // 2. We Initialize and generate encrypting function const encrypter = make_encrypter(pwd); // 3. generate salt (12-bytes of random data) const salt = config.genSalt(); salt[11] = (header.crc >>> 24) & 0xff; // old implementations (before PKZip 2.04g) used two byte check if (oldlike) salt[10] = (header.crc >>> 16) & 0xff; // 4. create output const result = Buffer.alloc(data.length + 12); encrypter(salt, result); // finally encode content return encrypter(data, result, 12); } module.exports = { decrypt, encrypt, _salter }; /***/ }), /***/ 4522: /***/ ((module) => { module.exports = { /* The local file header */ LOCHDR : 30, // LOC header size LOCSIG : 0x04034b50, // "PK\003\004" LOCVER : 4, // version needed to extract LOCFLG : 6, // general purpose bit flag LOCHOW : 8, // compression method LOCTIM : 10, // modification time (2 bytes time, 2 bytes date) LOCCRC : 14, // uncompressed file crc-32 value LOCSIZ : 18, // compressed size LOCLEN : 22, // uncompressed size LOCNAM : 26, // filename length LOCEXT : 28, // extra field length /* The Data descriptor */ EXTSIG : 0x08074b50, // "PK\007\008" EXTHDR : 16, // EXT header size EXTCRC : 4, // uncompressed file crc-32 value EXTSIZ : 8, // compressed size EXTLEN : 12, // uncompressed size /* The central directory file header */ CENHDR : 46, // CEN header size CENSIG : 0x02014b50, // "PK\001\002" CENVEM : 4, // version made by CENVER : 6, // version needed to extract CENFLG : 8, // encrypt, decrypt flags CENHOW : 10, // compression method CENTIM : 12, // modification time (2 bytes time, 2 bytes date) CENCRC : 16, // uncompressed file crc-32 value CENSIZ : 20, // compressed size CENLEN : 24, // uncompressed size CENNAM : 28, // filename length CENEXT : 30, // extra field length CENCOM : 32, // file comment length CENDSK : 34, // volume number start CENATT : 36, // internal file attributes CENATX : 38, // external file attributes (host system dependent) CENOFF : 42, // LOC header offset /* The entries in the end of central directory */ ENDHDR : 22, // END header size ENDSIG : 0x06054b50, // "PK\005\006" ENDSUB : 8, // number of entries on this disk ENDTOT : 10, // total number of entries ENDSIZ : 12, // central directory size in bytes ENDOFF : 16, // offset of first CEN header ENDCOM : 20, // zip file comment length END64HDR : 20, // zip64 END header size END64SIG : 0x07064b50, // zip64 Locator signature, "PK\006\007" END64START : 4, // number of the disk with the start of the zip64 END64OFF : 8, // relative offset of the zip64 end of central directory END64NUMDISKS : 16, // total number of disks ZIP64SIG : 0x06064b50, // zip64 signature, "PK\006\006" ZIP64HDR : 56, // zip64 record minimum size ZIP64LEAD : 12, // leading bytes at the start of the record, not counted by the value stored in ZIP64SIZE ZIP64SIZE : 4, // zip64 size of the central directory record ZIP64VEM : 12, // zip64 version made by ZIP64VER : 14, // zip64 version needed to extract ZIP64DSK : 16, // zip64 number of this disk ZIP64DSKDIR : 20, // number of the disk with the start of the record directory ZIP64SUB : 24, // number of entries on this disk ZIP64TOT : 32, // total number of entries ZIP64SIZB : 40, // zip64 central directory size in bytes ZIP64OFF : 48, // offset of start of central directory with respect to the starting disk number ZIP64EXTRA : 56, // extensible data sector /* Compression methods */ STORED : 0, // no compression SHRUNK : 1, // shrunk REDUCED1 : 2, // reduced with compression factor 1 REDUCED2 : 3, // reduced with compression factor 2 REDUCED3 : 4, // reduced with compression factor 3 REDUCED4 : 5, // reduced with compression factor 4 IMPLODED : 6, // imploded // 7 reserved DEFLATED : 8, // deflated ENHANCED_DEFLATED: 9, // enhanced deflated PKWARE : 10,// PKWare DCL imploded // 11 reserved BZIP2 : 12, // compressed using BZIP2 // 13 reserved LZMA : 14, // LZMA // 15-17 reserved IBM_TERSE : 18, // compressed using IBM TERSE IBM_LZ77 : 19, //IBM LZ77 z /* General purpose bit flag */ FLG_ENC : 0, // encripted file FLG_COMP1 : 1, // compression option FLG_COMP2 : 2, // compression option FLG_DESC : 4, // data descriptor FLG_ENH : 8, // enhanced deflation FLG_STR : 16, // strong encryption FLG_LNG : 1024, // language encoding FLG_MSK : 4096, // mask header values /* Load type */ FILE : 2, BUFFER : 1, NONE : 0, /* 4.5 Extensible data fields */ EF_ID : 0, EF_SIZE : 2, /* Header IDs */ ID_ZIP64 : 0x0001, ID_AVINFO : 0x0007, ID_PFS : 0x0008, ID_OS2 : 0x0009, ID_NTFS : 0x000a, ID_OPENVMS : 0x000c, ID_UNIX : 0x000d, ID_FORK : 0x000e, ID_PATCH : 0x000f, ID_X509_PKCS7 : 0x0014, ID_X509_CERTID_F : 0x0015, ID_X509_CERTID_C : 0x0016, ID_STRONGENC : 0x0017, ID_RECORD_MGT : 0x0018, ID_X509_PKCS7_RL : 0x0019, ID_IBM1 : 0x0065, ID_IBM2 : 0x0066, ID_POSZIP : 0x4690, EF_ZIP64_OR_32 : 0xffffffff, EF_ZIP64_OR_16 : 0xffff, EF_ZIP64_SUNCOMP : 0, EF_ZIP64_SCOMP : 8, EF_ZIP64_RHO : 16, EF_ZIP64_DSN : 24 }; /***/ }), /***/ 1255: /***/ ((module) => { module.exports = { /* Header error messages */ "INVALID_LOC" : "Invalid LOC header (bad signature)", "INVALID_CEN" : "Invalid CEN header (bad signature)", "INVALID_END" : "Invalid END header (bad signature)", /* ZipEntry error messages*/ "NO_DATA" : "Nothing to decompress", "BAD_CRC" : "CRC32 checksum failed", "FILE_IN_THE_WAY" : "There is a file in the way: %s", "UNKNOWN_METHOD" : "Invalid/unsupported compression method", /* Inflater error messages */ "AVAIL_DATA" : "inflate::Available inflate data did not terminate", "INVALID_DISTANCE" : "inflate::Invalid literal/length or distance code in fixed or dynamic block", "TO_MANY_CODES" : "inflate::Dynamic block code description: too many length or distance codes", "INVALID_REPEAT_LEN" : "inflate::Dynamic block code description: repeat more than specified lengths", "INVALID_REPEAT_FIRST" : "inflate::Dynamic block code description: repeat lengths with no first length", "INCOMPLETE_CODES" : "inflate::Dynamic block code description: code lengths codes incomplete", "INVALID_DYN_DISTANCE": "inflate::Dynamic block code description: invalid distance code lengths", "INVALID_CODES_LEN": "inflate::Dynamic block code description: invalid literal/length code lengths", "INVALID_STORE_BLOCK" : "inflate::Stored block length did not match one's complement", "INVALID_BLOCK_TYPE" : "inflate::Invalid block type (type == 3)", /* ADM-ZIP error messages */ "CANT_EXTRACT_FILE" : "Could not extract the file", "CANT_OVERRIDE" : "Target file already exists", "NO_ZIP" : "No zip file was loaded", "NO_ENTRY" : "Entry doesn't exist", "DIRECTORY_CONTENT_ERROR" : "A directory cannot have content", "FILE_NOT_FOUND" : "File not found: %s", "NOT_IMPLEMENTED" : "Not implemented", "INVALID_FILENAME" : "Invalid filename", "INVALID_FORMAT" : "Invalid or unsupported zip format. No END header found" }; /***/ }), /***/ 8321: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { var fs = __nccwpck_require__(2895).require(), pth = __nccwpck_require__(5622); fs.existsSync = fs.existsSync || pth.existsSync; module.exports = function(/*String*/path) { var _path = path || "", _permissions = 0, _obj = newAttr(), _stat = null; function newAttr() { return { directory : false, readonly : false, hidden : false, executable : false, mtime : 0, atime : 0 } } if (_path && fs.existsSync(_path)) { _stat = fs.statSync(_path); _obj.directory = _stat.isDirectory(); _obj.mtime = _stat.mtime; _obj.atime = _stat.atime; _obj.executable = (0o111 & _stat.mode) != 0; // file is executable who ever har right not just owner _obj.readonly = (0o200 & _stat.mode) == 0; // readonly if owner has no write right _obj.hidden = pth.basename(_path)[0] === "."; } else { console.warn("Invalid path: " + _path) } return { get directory () { return _obj.directory; }, get readOnly () { return _obj.readonly; }, get hidden () { return _obj.hidden; }, get mtime () { return _obj.mtime; }, get atime () { return _obj.atime; }, get executable () { return _obj.executable; }, decodeAttributes : function(val) { }, encodeAttributes : function (val) { }, toString : function() { return '{\n' + '\t"path" : "' + _path + ",\n" + '\t"isDirectory" : ' + _obj.directory + ",\n" + '\t"isReadOnly" : ' + _obj.readonly + ",\n" + '\t"isHidden" : ' + _obj.hidden + ",\n" + '\t"isExecutable" : ' + _obj.executable + ",\n" + '\t"mTime" : ' + _obj.mtime + "\n" + '\t"aTime" : ' + _obj.atime + "\n" + '}'; } } }; /***/ }), /***/ 2895: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { exports.require = function() { var fs = __nccwpck_require__(5747); if (process && process.versions && process.versions['electron']) { try { originalFs = __nccwpck_require__(2941); if (Object.keys(originalFs).length > 0) { fs = originalFs; } } catch (e) {} } return fs }; /***/ }), /***/ 5182: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { module.exports = __nccwpck_require__(1291); module.exports.FileSystem = __nccwpck_require__(2895); module.exports.Constants = __nccwpck_require__(4522); module.exports.Errors = __nccwpck_require__(1255); module.exports.FileAttr = __nccwpck_require__(8321); /***/ }), /***/ 1291: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { var fs = __nccwpck_require__(2895).require(), pth = __nccwpck_require__(5622); fs.existsSync = fs.existsSync || pth.existsSync; module.exports = (function() { var crcTable = [], Constants = __nccwpck_require__(4522), Errors = __nccwpck_require__(1255), PATH_SEPARATOR = pth.sep; function mkdirSync(/*String*/path) { var resolvedPath = path.split(PATH_SEPARATOR)[0]; path.split(PATH_SEPARATOR).forEach(function(name) { if (!name || name.substr(-1,1) === ":") return; resolvedPath += PATH_SEPARATOR + name; var stat; try { stat = fs.statSync(resolvedPath); } catch (e) { fs.mkdirSync(resolvedPath); } if (stat && stat.isFile()) throw Errors.FILE_IN_THE_WAY.replace("%s", resolvedPath); }); } function findSync(/*String*/dir, /*RegExp*/pattern, /*Boolean*/recoursive) { if (typeof pattern === 'boolean') { recoursive = pattern; pattern = undefined; } var files = []; fs.readdirSync(dir).forEach(function(file) { var path = pth.join(dir, file); if (fs.statSync(path).isDirectory() && recoursive) files = files.concat(findSync(path, pattern, recoursive)); if (!pattern || pattern.test(path)) { files.push(pth.normalize(path) + (fs.statSync(path).isDirectory() ? PATH_SEPARATOR : "")); } }); return files; } function readBigUInt64LE(/*Buffer*/buffer, /*int*/index) { var slice = Buffer.from(buffer.slice(index, index + 8)); slice.swap64(); return parseInt(`0x${ slice.toString('hex') }`); } return { makeDir : function(/*String*/path) { mkdirSync(path); }, crc32 : function(buf) { if (typeof buf === 'string') { buf = Buffer.from(buf); } var b = Buffer.alloc(4); if (!crcTable.length) { for (var n = 0; n < 256; n++) { var c = n; for (var k = 8; --k >= 0;) // if ((c & 1) !== 0) { c = 0xedb88320 ^ (c >>> 1); } else { c = c >>> 1; } if (c < 0) { b.writeInt32LE(c, 0); c = b.readUInt32LE(0); } crcTable[n] = c; } } var crc = 0, off = 0, len = buf.length, c1 = ~crc; while(--len >= 0) c1 = crcTable[(c1 ^ buf[off++]) & 0xff] ^ (c1 >>> 8); crc = ~c1; b.writeInt32LE(crc & 0xffffffff, 0); return b.readUInt32LE(0); }, methodToString : function(/*Number*/method) { switch (method) { case Constants.STORED: return 'STORED (' + method + ')'; case Constants.DEFLATED: return 'DEFLATED (' + method + ')'; default: return 'UNSUPPORTED (' + method + ')'; } }, writeFileTo : function(/*String*/path, /*Buffer*/content, /*Boolean*/overwrite, /*Number*/attr) { if (fs.existsSync(path)) { if (!overwrite) return false; // cannot overwrite var stat = fs.statSync(path); if (stat.isDirectory()) { return false; } } var folder = pth.dirname(path); if (!fs.existsSync(folder)) { mkdirSync(folder); } var fd; try { fd = fs.openSync(path, 'w', 438); // 0666 } catch(e) { fs.chmodSync(path, 438); fd = fs.openSync(path, 'w', 438); } if (fd) { try { fs.writeSync(fd, content, 0, content.length, 0); } catch (e){ throw e; } finally { fs.closeSync(fd); } } fs.chmodSync(path, attr || 438); return true; }, writeFileToAsync : function(/*String*/path, /*Buffer*/content, /*Boolean*/overwrite, /*Number*/attr, /*Function*/callback) { if(typeof attr === 'function') { callback = attr; attr = undefined; } fs.exists(path, function(exists) { if(exists && !overwrite) return callback(false); fs.stat(path, function(err, stat) { if(exists &&stat.isDirectory()) { return callback(false); } var folder = pth.dirname(path); fs.exists(folder, function(exists) { if(!exists) mkdirSync(folder); fs.open(path, 'w', 438, function(err, fd) { if(err) { fs.chmod(path, 438, function() { fs.open(path, 'w', 438, function(err, fd) { fs.write(fd, content, 0, content.length, 0, function() { fs.close(fd, function() { fs.chmod(path, attr || 438, function() { callback(true); }) }); }); }); }) } else { if(fd) { fs.write(fd, content, 0, content.length, 0, function() { fs.close(fd, function() { fs.chmod(path, attr || 438, function() { callback(true); }) }); }); } else { fs.chmod(path, attr || 438, function() { callback(true); }) } } }); }) }) }) }, findFiles : function(/*String*/path) { return findSync(path, true); }, getAttributes : function(/*String*/path) { }, setAttributes : function(/*String*/path) { }, toBuffer : function(input) { if (Buffer.isBuffer(input)) { return input; } else { if (input.length === 0) { return Buffer.alloc(0) } return Buffer.from(input, 'utf8'); } }, readBigUInt64LE, Constants : Constants, Errors : Errors } })(); /***/ }), /***/ 4057: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { var Utils = __nccwpck_require__(5182), Headers = __nccwpck_require__(4958), Constants = Utils.Constants, Methods = __nccwpck_require__(3928); module.exports = function (/*Buffer*/input) { var _entryHeader = new Headers.EntryHeader(), _entryName = Buffer.alloc(0), _comment = Buffer.alloc(0), _isDirectory = false, uncompressedData = null, _extra = Buffer.alloc(0); function getCompressedDataFromZip() { if (!input || !Buffer.isBuffer(input)) { return Buffer.alloc(0); } _entryHeader.loadDataHeaderFromBinary(input); return input.slice(_entryHeader.realDataOffset, _entryHeader.realDataOffset + _entryHeader.compressedSize) } function crc32OK(data) { // if bit 3 (0x08) of the general-purpose flags field is set, then the CRC-32 and file sizes are not known when the header is written if ((_entryHeader.flags & 0x8) !== 0x8) { if (Utils.crc32(data) !== _entryHeader.dataHeader.crc) { return false; } } else { // @TODO: load and check data descriptor header // The fields in the local header are filled with zero, and the CRC-32 and size are appended in a 12-byte structure // (optionally preceded by a 4-byte signature) immediately after the compressed data: } return true; } function decompress(/*Boolean*/async, /*Function*/callback, /*String, Buffer*/pass) { if(typeof callback === 'undefined' && typeof async === 'string') { pass=async; async=void 0; } if (_isDirectory) { if (async && callback) { callback(Buffer.alloc(0), Utils.Errors.DIRECTORY_CONTENT_ERROR); //si added error. } return Buffer.alloc(0); } var compressedData = getCompressedDataFromZip(); if (compressedData.length === 0) { // File is empty, nothing to decompress. if (async && callback) callback(compressedData); return compressedData; } if (_entryHeader.encripted){ if ('string' !== typeof pass && !Buffer.isBuffer(pass)){ throw new Error('ADM-ZIP: Incompatible password parameter'); } compressedData = Methods.ZipCrypto.decrypt(compressedData, _entryHeader, pass); } var data = Buffer.alloc(_entryHeader.size); switch (_entryHeader.method) { case Utils.Constants.STORED: compressedData.copy(data); if (!crc32OK(data)) { if (async && callback) callback(data, Utils.Errors.BAD_CRC);//si added error throw new Error(Utils.Errors.BAD_CRC); } else {//si added otherwise did not seem to return data. if (async && callback) callback(data); return data; } case Utils.Constants.DEFLATED: var inflater = new Methods.Inflater(compressedData); if (!async) { var result = inflater.inflate(data); result.copy(data, 0); if (!crc32OK(data)) { throw new Error(Utils.Errors.BAD_CRC + " " + _entryName.toString()); } return data; } else { inflater.inflateAsync(function(result) { result.copy(data, 0); if (!crc32OK(data)) { if (callback) callback(data, Utils.Errors.BAD_CRC); //si added error } else { //si added otherwise did not seem to return data. if (callback) callback(data); } }); } break; default: if (async && callback) callback(Buffer.alloc(0), Utils.Errors.UNKNOWN_METHOD); throw new Error(Utils.Errors.UNKNOWN_METHOD); } } function compress(/*Boolean*/async, /*Function*/callback) { if ((!uncompressedData || !uncompressedData.length) && Buffer.isBuffer(input)) { // no data set or the data wasn't changed to require recompression if (async && callback) callback(getCompressedDataFromZip()); return getCompressedDataFromZip(); } if (uncompressedData.length && !_isDirectory) { var compressedData; // Local file header switch (_entryHeader.method) { case Utils.Constants.STORED: _entryHeader.compressedSize = _entryHeader.size; compressedData = Buffer.alloc(uncompressedData.length); uncompressedData.copy(compressedData); if (async && callback) callback(compressedData); return compressedData; default: case Utils.Constants.DEFLATED: var deflater = new Methods.Deflater(uncompressedData); if (!async) { var deflated = deflater.deflate(); _entryHeader.compressedSize = deflated.length; return deflated; } else { deflater.deflateAsync(function(data) { compressedData = Buffer.alloc(data.length); _entryHeader.compressedSize = data.length; data.copy(compressedData); callback && callback(compressedData); }); } deflater = null; break; } } else { if (async && callback) { callback(Buffer.alloc(0)); } else { return Buffer.alloc(0); } } } function readUInt64LE(buffer, offset) { return (buffer.readUInt32LE(offset + 4) << 4) + buffer.readUInt32LE(offset); } function parseExtra(data) { var offset = 0; var signature, size, part; while(offset= Constants.EF_ZIP64_SCOMP) { size = readUInt64LE(data, Constants.EF_ZIP64_SUNCOMP); if(_entryHeader.size === Constants.EF_ZIP64_OR_32) { _entryHeader.size = size; } } if(data.length >= Constants.EF_ZIP64_RHO) { compressedSize = readUInt64LE(data, Constants.EF_ZIP64_SCOMP); if(_entryHeader.compressedSize === Constants.EF_ZIP64_OR_32) { _entryHeader.compressedSize = compressedSize; } } if(data.length >= Constants.EF_ZIP64_DSN) { offset = readUInt64LE(data, Constants.EF_ZIP64_RHO); if(_entryHeader.offset === Constants.EF_ZIP64_OR_32) { _entryHeader.offset = offset; } } if(data.length >= Constants.EF_ZIP64_DSN+4) { diskNumStart = data.readUInt32LE(Constants.EF_ZIP64_DSN); if(_entryHeader.diskNumStart === Constants.EF_ZIP64_OR_16) { _entryHeader.diskNumStart = diskNumStart; } } } return { get entryName () { return _entryName.toString(); }, get rawEntryName() { return _entryName; }, set entryName (val) { _entryName = Utils.toBuffer(val); var lastChar = _entryName[_entryName.length - 1]; _isDirectory = (lastChar === 47) || (lastChar === 92); _entryHeader.fileNameLength = _entryName.length; }, get extra () { return _extra; }, set extra (val) { _extra = val; _entryHeader.extraLength = val.length; parseExtra(val); }, get comment () { return _comment.toString(); }, set comment (val) { _comment = Utils.toBuffer(val); _entryHeader.commentLength = _comment.length; }, get name () { var n = _entryName.toString(); return _isDirectory ? n.substr(n.length - 1).split("/").pop() : n.split("/").pop(); }, get isDirectory () { return _isDirectory }, getCompressedData : function() { return compress(false, null) }, getCompressedDataAsync : function(/*Function*/callback) { compress(true, callback) }, setData : function(value) { uncompressedData = Utils.toBuffer(value); if (!_isDirectory && uncompressedData.length) { _entryHeader.size = uncompressedData.length; _entryHeader.method = Utils.Constants.DEFLATED; _entryHeader.crc = Utils.crc32(value); _entryHeader.changed = true; } else { // folders and blank files should be stored _entryHeader.method = Utils.Constants.STORED; } }, getData : function(pass) { if (_entryHeader.changed) { return uncompressedData; } else { return decompress(false, null, pass); } }, getDataAsync : function(/*Function*/callback, pass) { if (_entryHeader.changed) { callback(uncompressedData); } else { decompress(true, callback, pass); } }, set attr(attr) { _entryHeader.attr = attr; }, get attr() { return _entryHeader.attr; }, set header(/*Buffer*/data) { _entryHeader.loadFromBinary(data); }, get header() { return _entryHeader; }, packHeader : function() { // 1. create header (buffer) var header = _entryHeader.entryHeaderToBinary(); var addpos = Utils.Constants.CENHDR; // 2. add file name _entryName.copy(header, addpos); addpos += _entryName.length; // 3. add extra data if (_entryHeader.extraLength) { _extra.copy(header, addpos); addpos += _entryHeader.extraLength; } // 4. add file comment if (_entryHeader.commentLength) { _comment.copy(header, addpos); } return header; }, toString : function() { return '{\n' + '\t"entryName" : "' + _entryName.toString() + "\",\n" + '\t"name" : "' + (_isDirectory ? _entryName.toString().replace(/\/$/, '').split("/").pop() : _entryName.toString().split("/").pop()) + "\",\n" + '\t"comment" : "' + _comment.toString() + "\",\n" + '\t"isDirectory" : ' + _isDirectory + ",\n" + '\t"header" : ' + _entryHeader.toString().replace(/\t/mg, "\t\t").replace(/}/mg, "\t}") + ",\n" + '\t"compressedData" : <' + (input && input.length + " bytes buffer" || "null") + ">\n" + '\t"data" : <' + (uncompressedData && uncompressedData.length + " bytes buffer" || "null") + ">\n" + '}'; } } }; /***/ }), /***/ 7744: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { const ZipEntry = __nccwpck_require__(4057); const Headers = __nccwpck_require__(4958); const Utils = __nccwpck_require__(5182); module.exports = function (/*Buffer|null*/inBuffer, /** object */options) { var entryList = [], entryTable = {}, _comment = Buffer.alloc(0), mainHeader = new Headers.MainHeader(), loadedEntries = false; // assign options const opts = Object.assign(Object.create(null), options); if (inBuffer){ // is a memory buffer readMainHeader(opts.readEntries); } else { // none. is a new file loadedEntries = true; } function iterateEntries(callback) { const totalEntries = mainHeader.diskEntries; // total number of entries let index = mainHeader.offset; // offset of first CEN header for (let i = 0; i < totalEntries; i++) { let tmp = index; const entry = new ZipEntry(inBuffer); entry.header = inBuffer.slice(tmp, tmp += Utils.Constants.CENHDR); entry.entryName = inBuffer.slice(tmp, tmp += entry.header.fileNameLength); index += entry.header.entryHeaderSize; callback(entry); } } function readEntries() { loadedEntries = true; entryTable = {}; entryList = new Array(mainHeader.diskEntries); // total number of entries var index = mainHeader.offset; // offset of first CEN header for (var i = 0; i < entryList.length; i++) { var tmp = index, entry = new ZipEntry(inBuffer); entry.header = inBuffer.slice(tmp, tmp += Utils.Constants.CENHDR); entry.entryName = inBuffer.slice(tmp, tmp += entry.header.fileNameLength); if (entry.header.extraLength) { entry.extra = inBuffer.slice(tmp, tmp += entry.header.extraLength); } if (entry.header.commentLength) entry.comment = inBuffer.slice(tmp, tmp + entry.header.commentLength); index += entry.header.entryHeaderSize; entryList[i] = entry; entryTable[entry.entryName] = entry; } } function readMainHeader(/*Boolean*/ readNow) { var i = inBuffer.length - Utils.Constants.ENDHDR, // END header size max = Math.max(0, i - 0xFFFF), // 0xFFFF is the max zip file comment length n = max, endStart = inBuffer.length, endOffset = -1, // Start offset of the END header commentEnd = 0; for (i; i >= n; i--) { if (inBuffer[i] !== 0x50) continue; // quick check that the byte is 'P' if (inBuffer.readUInt32LE(i) === Utils.Constants.ENDSIG) { // "PK\005\006" endOffset = i; commentEnd = i; endStart = i + Utils.Constants.ENDHDR; // We already found a regular signature, let's look just a bit further to check if there's any zip64 signature n = i - Utils.Constants.END64HDR; continue; } if (inBuffer.readUInt32LE(i) === Utils.Constants.END64SIG) { // Found a zip64 signature, let's continue reading the whole zip64 record n = max; continue; } if (inBuffer.readUInt32LE(i) == Utils.Constants.ZIP64SIG) { // Found the zip64 record, let's determine it's size endOffset = i; endStart = i + Utils.readBigUInt64LE(inBuffer, i + Utils.Constants.ZIP64SIZE) + Utils.Constants.ZIP64LEAD; break; } } if (!~endOffset) throw new Error(Utils.Errors.INVALID_FORMAT); mainHeader.loadFromBinary(inBuffer.slice(endOffset, endStart)); if (mainHeader.commentLength) { _comment = inBuffer.slice(commentEnd + Utils.Constants.ENDHDR); } if (readNow) readEntries(); } return { /** * Returns an array of ZipEntry objects existent in the current opened archive * @return Array */ get entries() { if (!loadedEntries) { readEntries(); } return entryList; }, /** * Archive comment * @return {String} */ get comment() { return _comment.toString(); }, set comment(val) { _comment = Utils.toBuffer(val); mainHeader.commentLength = _comment.length; }, getEntryCount: function() { if (!loadedEntries) { return mainHeader.diskEntries; } return entryList.length; }, forEach: function(callback) { if (!loadedEntries) { iterateEntries(callback); return; } entryList.forEach(callback); }, /** * Returns a reference to the entry with the given name or null if entry is inexistent * * @param entryName * @return ZipEntry */ getEntry: function (/*String*/entryName) { if (!loadedEntries) { readEntries(); } return entryTable[entryName] || null; }, /** * Adds the given entry to the entry list * * @param entry */ setEntry: function (/*ZipEntry*/entry) { if (!loadedEntries) { readEntries(); } entryList.push(entry); entryTable[entry.entryName] = entry; mainHeader.totalEntries = entryList.length; }, /** * Removes the entry with the given name from the entry list. * * If the entry is a directory, then all nested files and directories will be removed * @param entryName */ deleteEntry: function (/*String*/entryName) { if (!loadedEntries) { readEntries(); } var entry = entryTable[entryName]; if (entry && entry.isDirectory) { var _self = this; this.getEntryChildren(entry).forEach(function (child) { if (child.entryName !== entryName) { _self.deleteEntry(child.entryName) } }) } entryList.splice(entryList.indexOf(entry), 1); delete(entryTable[entryName]); mainHeader.totalEntries = entryList.length; }, /** * Iterates and returns all nested files and directories of the given entry * * @param entry * @return Array */ getEntryChildren: function (/*ZipEntry*/entry) { if (!loadedEntries) { readEntries(); } if (entry.isDirectory) { var list = [], name = entry.entryName, len = name.length; entryList.forEach(function (zipEntry) { if (zipEntry.entryName.substr(0, len) === name) { list.push(zipEntry); } }); return list; } return [] }, /** * Returns the zip file * * @return Buffer */ compressToBuffer: function () { if (!loadedEntries) { readEntries(); } if (entryList.length > 1) { entryList.sort(function (a, b) { var nameA = a.entryName.toLowerCase(); var nameB = b.entryName.toLowerCase(); if (nameA < nameB) { return -1 } if (nameA > nameB) { return 1 } return 0; }); } var totalSize = 0, dataBlock = [], entryHeaders = [], dindex = 0; mainHeader.size = 0; mainHeader.offset = 0; entryList.forEach(function (entry) { // compress data and set local and entry header accordingly. Reason why is called first var compressedData = entry.getCompressedData(); // data header entry.header.offset = dindex; var dataHeader = entry.header.dataHeaderToBinary(); var entryNameLen = entry.rawEntryName.length; var extra = entry.extra.toString(); var postHeader = Buffer.alloc(entryNameLen + extra.length); entry.rawEntryName.copy(postHeader, 0); postHeader.fill(extra, entryNameLen); var dataLength = dataHeader.length + postHeader.length + compressedData.length; dindex += dataLength; dataBlock.push(dataHeader); dataBlock.push(postHeader); dataBlock.push(compressedData); var entryHeader = entry.packHeader(); entryHeaders.push(entryHeader); mainHeader.size += entryHeader.length; totalSize += (dataLength + entryHeader.length); }); totalSize += mainHeader.mainHeaderSize; // also includes zip file comment length // point to end of data and beginning of central directory first record mainHeader.offset = dindex; dindex = 0; var outBuffer = Buffer.alloc(totalSize); dataBlock.forEach(function (content) { content.copy(outBuffer, dindex); // write data blocks dindex += content.length; }); entryHeaders.forEach(function (content) { content.copy(outBuffer, dindex); // write central directory entries dindex += content.length; }); var mh = mainHeader.toBinary(); if (_comment) { Buffer.from(_comment).copy(mh, Utils.Constants.ENDHDR); // add zip file comment } mh.copy(outBuffer, dindex); // write main header return outBuffer; }, toAsyncBuffer: function (/*Function*/onSuccess, /*Function*/onFail, /*Function*/onItemStart, /*Function*/onItemEnd) { if (!loadedEntries) { readEntries(); } if (entryList.length > 1) { entryList.sort(function (a, b) { var nameA = a.entryName.toLowerCase(); var nameB = b.entryName.toLowerCase(); if (nameA > nameB) { return -1 } if (nameA < nameB) { return 1 } return 0; }); } var totalSize = 0, dataBlock = [], entryHeaders = [], dindex = 0; mainHeader.size = 0; mainHeader.offset = 0; var compress = function (entryList) { var self = arguments.callee; if (entryList.length) { var entry = entryList.pop(); var name = entry.entryName + entry.extra.toString(); if (onItemStart) onItemStart(name); entry.getCompressedDataAsync(function (compressedData) { if (onItemEnd) onItemEnd(name); entry.header.offset = dindex; // data header var dataHeader = entry.header.dataHeaderToBinary(); var postHeader; try { postHeader = Buffer.alloc(name.length, name); // using alloc will work on node 5.x+ } catch(e){ postHeader = new Buffer(name); // use deprecated method if alloc fails... } var dataLength = dataHeader.length + postHeader.length + compressedData.length; dindex += dataLength; dataBlock.push(dataHeader); dataBlock.push(postHeader); dataBlock.push(compressedData); var entryHeader = entry.packHeader(); entryHeaders.push(entryHeader); mainHeader.size += entryHeader.length; totalSize += (dataLength + entryHeader.length); if (entryList.length) { self(entryList); } else { totalSize += mainHeader.mainHeaderSize; // also includes zip file comment length // point to end of data and beginning of central directory first record mainHeader.offset = dindex; dindex = 0; var outBuffer = Buffer.alloc(totalSize); dataBlock.forEach(function (content) { content.copy(outBuffer, dindex); // write data blocks dindex += content.length; }); entryHeaders.forEach(function (content) { content.copy(outBuffer, dindex); // write central directory entries dindex += content.length; }); var mh = mainHeader.toBinary(); if (_comment) { _comment.copy(mh, Utils.Constants.ENDHDR); // add zip file comment } mh.copy(outBuffer, dindex); // write main header onSuccess(outBuffer); } }); } }; compress(entryList); } } }; /***/ }), /***/ 3682: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { var register = __nccwpck_require__(4670) var addHook = __nccwpck_require__(5549) var removeHook = __nccwpck_require__(6819) // bind with array of arguments: https://stackoverflow.com/a/21792913 var bind = Function.bind var bindable = bind.bind(bind) function bindApi (hook, state, name) { var removeHookRef = bindable(removeHook, null).apply(null, name ? [state, name] : [state]) hook.api = { remove: removeHookRef } hook.remove = removeHookRef ;['before', 'error', 'after', 'wrap'].forEach(function (kind) { var args = name ? [state, kind, name] : [state, kind] hook[kind] = hook.api[kind] = bindable(addHook, null).apply(null, args) }) } function HookSingular () { var singularHookName = 'h' var singularHookState = { registry: {} } var singularHook = register.bind(null, singularHookState, singularHookName) bindApi(singularHook, singularHookState, singularHookName) return singularHook } function HookCollection () { var state = { registry: {} } var hook = register.bind(null, state) bindApi(hook, state) return hook } var collectionHookDeprecationMessageDisplayed = false function Hook () { if (!collectionHookDeprecationMessageDisplayed) { console.warn('[before-after-hook]: "Hook()" repurposing warning, use "Hook.Collection()". Read more: https://git.io/upgrade-before-after-hook-to-1.4') collectionHookDeprecationMessageDisplayed = true } return HookCollection() } Hook.Singular = HookSingular.bind() Hook.Collection = HookCollection.bind() module.exports = Hook // expose constructors as a named property for TypeScript module.exports.Hook = Hook module.exports.Singular = Hook.Singular module.exports.Collection = Hook.Collection /***/ }), /***/ 5549: /***/ ((module) => { module.exports = addHook; function addHook(state, kind, name, hook) { var orig = hook; if (!state.registry[name]) { state.registry[name] = []; } if (kind === "before") { hook = function (method, options) { return Promise.resolve() .then(orig.bind(null, options)) .then(method.bind(null, options)); }; } if (kind === "after") { hook = function (method, options) { var result; return Promise.resolve() .then(method.bind(null, options)) .then(function (result_) { result = result_; return orig(result, options); }) .then(function () { return result; }); }; } if (kind === "error") { hook = function (method, options) { return Promise.resolve() .then(method.bind(null, options)) .catch(function (error) { return orig(error, options); }); }; } state.registry[name].push({ hook: hook, orig: orig, }); } /***/ }), /***/ 4670: /***/ ((module) => { module.exports = register; function register(state, name, method, options) { if (typeof method !== "function") { throw new Error("method for before hook must be a function"); } if (!options) { options = {}; } if (Array.isArray(name)) { return name.reverse().reduce(function (callback, name) { return register.bind(null, state, name, callback, options); }, method)(); } return Promise.resolve().then(function () { if (!state.registry[name]) { return method(options); } return state.registry[name].reduce(function (method, registered) { return registered.hook.bind(null, method, options); }, method)(); }); } /***/ }), /***/ 6819: /***/ ((module) => { module.exports = removeHook; function removeHook(state, name, method) { if (!state.registry[name]) { return; } var index = state.registry[name] .map(function (registered) { return registered.orig; }) .indexOf(method); if (index === -1) { return; } state.registry[name].splice(index, 1); } /***/ }), /***/ 1174: /***/ (function(module) { /** * This file contains the Bottleneck library (MIT), compiled to ES2017, and without Clustering support. * https://github.com/SGrondin/bottleneck */ (function (global, factory) { true ? module.exports = factory() : 0; }(this, (function () { 'use strict'; var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {}; function getCjsExportFromNamespace (n) { return n && n['default'] || n; } var load = function(received, defaults, onto = {}) { var k, ref, v; for (k in defaults) { v = defaults[k]; onto[k] = (ref = received[k]) != null ? ref : v; } return onto; }; var overwrite = function(received, defaults, onto = {}) { var k, v; for (k in received) { v = received[k]; if (defaults[k] !== void 0) { onto[k] = v; } } return onto; }; var parser = { load: load, overwrite: overwrite }; var DLList; DLList = class DLList { constructor(incr, decr) { this.incr = incr; this.decr = decr; this._first = null; this._last = null; this.length = 0; } push(value) { var node; this.length++; if (typeof this.incr === "function") { this.incr(); } node = { value, prev: this._last, next: null }; if (this._last != null) { this._last.next = node; this._last = node; } else { this._first = this._last = node; } return void 0; } shift() { var value; if (this._first == null) { return; } else { this.length--; if (typeof this.decr === "function") { this.decr(); } } value = this._first.value; if ((this._first = this._first.next) != null) { this._first.prev = null; } else { this._last = null; } return value; } first() { if (this._first != null) { return this._first.value; } } getArray() { var node, ref, results; node = this._first; results = []; while (node != null) { results.push((ref = node, node = node.next, ref.value)); } return results; } forEachShift(cb) { var node; node = this.shift(); while (node != null) { (cb(node), node = this.shift()); } return void 0; } debug() { var node, ref, ref1, ref2, results; node = this._first; results = []; while (node != null) { results.push((ref = node, node = node.next, { value: ref.value, prev: (ref1 = ref.prev) != null ? ref1.value : void 0, next: (ref2 = ref.next) != null ? ref2.value : void 0 })); } return results; } }; var DLList_1 = DLList; var Events; Events = class Events { constructor(instance) { this.instance = instance; this._events = {}; if ((this.instance.on != null) || (this.instance.once != null) || (this.instance.removeAllListeners != null)) { throw new Error("An Emitter already exists for this object"); } this.instance.on = (name, cb) => { return this._addListener(name, "many", cb); }; this.instance.once = (name, cb) => { return this._addListener(name, "once", cb); }; this.instance.removeAllListeners = (name = null) => { if (name != null) { return delete this._events[name]; } else { return this._events = {}; } }; } _addListener(name, status, cb) { var base; if ((base = this._events)[name] == null) { base[name] = []; } this._events[name].push({cb, status}); return this.instance; } listenerCount(name) { if (this._events[name] != null) { return this._events[name].length; } else { return 0; } } async trigger(name, ...args) { var e, promises; try { if (name !== "debug") { this.trigger("debug", `Event triggered: ${name}`, args); } if (this._events[name] == null) { return; } this._events[name] = this._events[name].filter(function(listener) { return listener.status !== "none"; }); promises = this._events[name].map(async(listener) => { var e, returned; if (listener.status === "none") { return; } if (listener.status === "once") { listener.status = "none"; } try { returned = typeof listener.cb === "function" ? listener.cb(...args) : void 0; if (typeof (returned != null ? returned.then : void 0) === "function") { return (await returned); } else { return returned; } } catch (error) { e = error; { this.trigger("error", e); } return null; } }); return ((await Promise.all(promises))).find(function(x) { return x != null; }); } catch (error) { e = error; { this.trigger("error", e); } return null; } } }; var Events_1 = Events; var DLList$1, Events$1, Queues; DLList$1 = DLList_1; Events$1 = Events_1; Queues = class Queues { constructor(num_priorities) { var i; this.Events = new Events$1(this); this._length = 0; this._lists = (function() { var j, ref, results; results = []; for (i = j = 1, ref = num_priorities; (1 <= ref ? j <= ref : j >= ref); i = 1 <= ref ? ++j : --j) { results.push(new DLList$1((() => { return this.incr(); }), (() => { return this.decr(); }))); } return results; }).call(this); } incr() { if (this._length++ === 0) { return this.Events.trigger("leftzero"); } } decr() { if (--this._length === 0) { return this.Events.trigger("zero"); } } push(job) { return this._lists[job.options.priority].push(job); } queued(priority) { if (priority != null) { return this._lists[priority].length; } else { return this._length; } } shiftAll(fn) { return this._lists.forEach(function(list) { return list.forEachShift(fn); }); } getFirst(arr = this._lists) { var j, len, list; for (j = 0, len = arr.length; j < len; j++) { list = arr[j]; if (list.length > 0) { return list; } } return []; } shiftLastFrom(priority) { return this.getFirst(this._lists.slice(priority).reverse()).shift(); } }; var Queues_1 = Queues; var BottleneckError; BottleneckError = class BottleneckError extends Error {}; var BottleneckError_1 = BottleneckError; var BottleneckError$1, DEFAULT_PRIORITY, Job, NUM_PRIORITIES, parser$1; NUM_PRIORITIES = 10; DEFAULT_PRIORITY = 5; parser$1 = parser; BottleneckError$1 = BottleneckError_1; Job = class Job { constructor(task, args, options, jobDefaults, rejectOnDrop, Events, _states, Promise) { this.task = task; this.args = args; this.rejectOnDrop = rejectOnDrop; this.Events = Events; this._states = _states; this.Promise = Promise; this.options = parser$1.load(options, jobDefaults); this.options.priority = this._sanitizePriority(this.options.priority); if (this.options.id === jobDefaults.id) { this.options.id = `${this.options.id}-${this._randomIndex()}`; } this.promise = new this.Promise((_resolve, _reject) => { this._resolve = _resolve; this._reject = _reject; }); this.retryCount = 0; } _sanitizePriority(priority) { var sProperty; sProperty = ~~priority !== priority ? DEFAULT_PRIORITY : priority; if (sProperty < 0) { return 0; } else if (sProperty > NUM_PRIORITIES - 1) { return NUM_PRIORITIES - 1; } else { return sProperty; } } _randomIndex() { return Math.random().toString(36).slice(2); } doDrop({error, message = "This job has been dropped by Bottleneck"} = {}) { if (this._states.remove(this.options.id)) { if (this.rejectOnDrop) { this._reject(error != null ? error : new BottleneckError$1(message)); } this.Events.trigger("dropped", {args: this.args, options: this.options, task: this.task, promise: this.promise}); return true; } else { return false; } } _assertStatus(expected) { var status; status = this._states.jobStatus(this.options.id); if (!(status === expected || (expected === "DONE" && status === null))) { throw new BottleneckError$1(`Invalid job status ${status}, expected ${expected}. Please open an issue at https://github.com/SGrondin/bottleneck/issues`); } } doReceive() { this._states.start(this.options.id); return this.Events.trigger("received", {args: this.args, options: this.options}); } doQueue(reachedHWM, blocked) { this._assertStatus("RECEIVED"); this._states.next(this.options.id); return this.Events.trigger("queued", {args: this.args, options: this.options, reachedHWM, blocked}); } doRun() { if (this.retryCount === 0) { this._assertStatus("QUEUED"); this._states.next(this.options.id); } else { this._assertStatus("EXECUTING"); } return this.Events.trigger("scheduled", {args: this.args, options: this.options}); } async doExecute(chained, clearGlobalState, run, free) { var error, eventInfo, passed; if (this.retryCount === 0) { this._assertStatus("RUNNING"); this._states.next(this.options.id); } else { this._assertStatus("EXECUTING"); } eventInfo = {args: this.args, options: this.options, retryCount: this.retryCount}; this.Events.trigger("executing", eventInfo); try { passed = (await (chained != null ? chained.schedule(this.options, this.task, ...this.args) : this.task(...this.args))); if (clearGlobalState()) { this.doDone(eventInfo); await free(this.options, eventInfo); this._assertStatus("DONE"); return this._resolve(passed); } } catch (error1) { error = error1; return this._onFailure(error, eventInfo, clearGlobalState, run, free); } } doExpire(clearGlobalState, run, free) { var error, eventInfo; if (this._states.jobStatus(this.options.id === "RUNNING")) { this._states.next(this.options.id); } this._assertStatus("EXECUTING"); eventInfo = {args: this.args, options: this.options, retryCount: this.retryCount}; error = new BottleneckError$1(`This job timed out after ${this.options.expiration} ms.`); return this._onFailure(error, eventInfo, clearGlobalState, run, free); } async _onFailure(error, eventInfo, clearGlobalState, run, free) { var retry, retryAfter; if (clearGlobalState()) { retry = (await this.Events.trigger("failed", error, eventInfo)); if (retry != null) { retryAfter = ~~retry; this.Events.trigger("retry", `Retrying ${this.options.id} after ${retryAfter} ms`, eventInfo); this.retryCount++; return run(retryAfter); } else { this.doDone(eventInfo); await free(this.options, eventInfo); this._assertStatus("DONE"); return this._reject(error); } } } doDone(eventInfo) { this._assertStatus("EXECUTING"); this._states.next(this.options.id); return this.Events.trigger("done", eventInfo); } }; var Job_1 = Job; var BottleneckError$2, LocalDatastore, parser$2; parser$2 = parser; BottleneckError$2 = BottleneckError_1; LocalDatastore = class LocalDatastore { constructor(instance, storeOptions, storeInstanceOptions) { this.instance = instance; this.storeOptions = storeOptions; this.clientId = this.instance._randomIndex(); parser$2.load(storeInstanceOptions, storeInstanceOptions, this); this._nextRequest = this._lastReservoirRefresh = this._lastReservoirIncrease = Date.now(); this._running = 0; this._done = 0; this._unblockTime = 0; this.ready = this.Promise.resolve(); this.clients = {}; this._startHeartbeat(); } _startHeartbeat() { var base; if ((this.heartbeat == null) && (((this.storeOptions.reservoirRefreshInterval != null) && (this.storeOptions.reservoirRefreshAmount != null)) || ((this.storeOptions.reservoirIncreaseInterval != null) && (this.storeOptions.reservoirIncreaseAmount != null)))) { return typeof (base = (this.heartbeat = setInterval(() => { var amount, incr, maximum, now, reservoir; now = Date.now(); if ((this.storeOptions.reservoirRefreshInterval != null) && now >= this._lastReservoirRefresh + this.storeOptions.reservoirRefreshInterval) { this._lastReservoirRefresh = now; this.storeOptions.reservoir = this.storeOptions.reservoirRefreshAmount; this.instance._drainAll(this.computeCapacity()); } if ((this.storeOptions.reservoirIncreaseInterval != null) && now >= this._lastReservoirIncrease + this.storeOptions.reservoirIncreaseInterval) { ({ reservoirIncreaseAmount: amount, reservoirIncreaseMaximum: maximum, reservoir } = this.storeOptions); this._lastReservoirIncrease = now; incr = maximum != null ? Math.min(amount, maximum - reservoir) : amount; if (incr > 0) { this.storeOptions.reservoir += incr; return this.instance._drainAll(this.computeCapacity()); } } }, this.heartbeatInterval))).unref === "function" ? base.unref() : void 0; } else { return clearInterval(this.heartbeat); } } async __publish__(message) { await this.yieldLoop(); return this.instance.Events.trigger("message", message.toString()); } async __disconnect__(flush) { await this.yieldLoop(); clearInterval(this.heartbeat); return this.Promise.resolve(); } yieldLoop(t = 0) { return new this.Promise(function(resolve, reject) { return setTimeout(resolve, t); }); } computePenalty() { var ref; return (ref = this.storeOptions.penalty) != null ? ref : (15 * this.storeOptions.minTime) || 5000; } async __updateSettings__(options) { await this.yieldLoop(); parser$2.overwrite(options, options, this.storeOptions); this._startHeartbeat(); this.instance._drainAll(this.computeCapacity()); return true; } async __running__() { await this.yieldLoop(); return this._running; } async __queued__() { await this.yieldLoop(); return this.instance.queued(); } async __done__() { await this.yieldLoop(); return this._done; } async __groupCheck__(time) { await this.yieldLoop(); return (this._nextRequest + this.timeout) < time; } computeCapacity() { var maxConcurrent, reservoir; ({maxConcurrent, reservoir} = this.storeOptions); if ((maxConcurrent != null) && (reservoir != null)) { return Math.min(maxConcurrent - this._running, reservoir); } else if (maxConcurrent != null) { return maxConcurrent - this._running; } else if (reservoir != null) { return reservoir; } else { return null; } } conditionsCheck(weight) { var capacity; capacity = this.computeCapacity(); return (capacity == null) || weight <= capacity; } async __incrementReservoir__(incr) { var reservoir; await this.yieldLoop(); reservoir = this.storeOptions.reservoir += incr; this.instance._drainAll(this.computeCapacity()); return reservoir; } async __currentReservoir__() { await this.yieldLoop(); return this.storeOptions.reservoir; } isBlocked(now) { return this._unblockTime >= now; } check(weight, now) { return this.conditionsCheck(weight) && (this._nextRequest - now) <= 0; } async __check__(weight) { var now; await this.yieldLoop(); now = Date.now(); return this.check(weight, now); } async __register__(index, weight, expiration) { var now, wait; await this.yieldLoop(); now = Date.now(); if (this.conditionsCheck(weight)) { this._running += weight; if (this.storeOptions.reservoir != null) { this.storeOptions.reservoir -= weight; } wait = Math.max(this._nextRequest - now, 0); this._nextRequest = now + wait + this.storeOptions.minTime; return { success: true, wait, reservoir: this.storeOptions.reservoir }; } else { return { success: false }; } } strategyIsBlock() { return this.storeOptions.strategy === 3; } async __submit__(queueLength, weight) { var blocked, now, reachedHWM; await this.yieldLoop(); if ((this.storeOptions.maxConcurrent != null) && weight > this.storeOptions.maxConcurrent) { throw new BottleneckError$2(`Impossible to add a job having a weight of ${weight} to a limiter having a maxConcurrent setting of ${this.storeOptions.maxConcurrent}`); } now = Date.now(); reachedHWM = (this.storeOptions.highWater != null) && queueLength === this.storeOptions.highWater && !this.check(weight, now); blocked = this.strategyIsBlock() && (reachedHWM || this.isBlocked(now)); if (blocked) { this._unblockTime = now + this.computePenalty(); this._nextRequest = this._unblockTime + this.storeOptions.minTime; this.instance._dropAllQueued(); } return { reachedHWM, blocked, strategy: this.storeOptions.strategy }; } async __free__(index, weight) { await this.yieldLoop(); this._running -= weight; this._done += weight; this.instance._drainAll(this.computeCapacity()); return { running: this._running }; } }; var LocalDatastore_1 = LocalDatastore; var BottleneckError$3, States; BottleneckError$3 = BottleneckError_1; States = class States { constructor(status1) { this.status = status1; this._jobs = {}; this.counts = this.status.map(function() { return 0; }); } next(id) { var current, next; current = this._jobs[id]; next = current + 1; if ((current != null) && next < this.status.length) { this.counts[current]--; this.counts[next]++; return this._jobs[id]++; } else if (current != null) { this.counts[current]--; return delete this._jobs[id]; } } start(id) { var initial; initial = 0; this._jobs[id] = initial; return this.counts[initial]++; } remove(id) { var current; current = this._jobs[id]; if (current != null) { this.counts[current]--; delete this._jobs[id]; } return current != null; } jobStatus(id) { var ref; return (ref = this.status[this._jobs[id]]) != null ? ref : null; } statusJobs(status) { var k, pos, ref, results, v; if (status != null) { pos = this.status.indexOf(status); if (pos < 0) { throw new BottleneckError$3(`status must be one of ${this.status.join(', ')}`); } ref = this._jobs; results = []; for (k in ref) { v = ref[k]; if (v === pos) { results.push(k); } } return results; } else { return Object.keys(this._jobs); } } statusCounts() { return this.counts.reduce(((acc, v, i) => { acc[this.status[i]] = v; return acc; }), {}); } }; var States_1 = States; var DLList$2, Sync; DLList$2 = DLList_1; Sync = class Sync { constructor(name, Promise) { this.schedule = this.schedule.bind(this); this.name = name; this.Promise = Promise; this._running = 0; this._queue = new DLList$2(); } isEmpty() { return this._queue.length === 0; } async _tryToRun() { var args, cb, error, reject, resolve, returned, task; if ((this._running < 1) && this._queue.length > 0) { this._running++; ({task, args, resolve, reject} = this._queue.shift()); cb = (await (async function() { try { returned = (await task(...args)); return function() { return resolve(returned); }; } catch (error1) { error = error1; return function() { return reject(error); }; } })()); this._running--; this._tryToRun(); return cb(); } } schedule(task, ...args) { var promise, reject, resolve; resolve = reject = null; promise = new this.Promise(function(_resolve, _reject) { resolve = _resolve; return reject = _reject; }); this._queue.push({task, args, resolve, reject}); this._tryToRun(); return promise; } }; var Sync_1 = Sync; var version = "2.19.5"; var version$1 = { version: version }; var version$2 = /*#__PURE__*/Object.freeze({ version: version, default: version$1 }); var require$$2 = () => console.log('You must import the full version of Bottleneck in order to use this feature.'); var require$$3 = () => console.log('You must import the full version of Bottleneck in order to use this feature.'); var require$$4 = () => console.log('You must import the full version of Bottleneck in order to use this feature.'); var Events$2, Group, IORedisConnection$1, RedisConnection$1, Scripts$1, parser$3; parser$3 = parser; Events$2 = Events_1; RedisConnection$1 = require$$2; IORedisConnection$1 = require$$3; Scripts$1 = require$$4; Group = (function() { class Group { constructor(limiterOptions = {}) { this.deleteKey = this.deleteKey.bind(this); this.limiterOptions = limiterOptions; parser$3.load(this.limiterOptions, this.defaults, this); this.Events = new Events$2(this); this.instances = {}; this.Bottleneck = Bottleneck_1; this._startAutoCleanup(); this.sharedConnection = this.connection != null; if (this.connection == null) { if (this.limiterOptions.datastore === "redis") { this.connection = new RedisConnection$1(Object.assign({}, this.limiterOptions, {Events: this.Events})); } else if (this.limiterOptions.datastore === "ioredis") { this.connection = new IORedisConnection$1(Object.assign({}, this.limiterOptions, {Events: this.Events})); } } } key(key = "") { var ref; return (ref = this.instances[key]) != null ? ref : (() => { var limiter; limiter = this.instances[key] = new this.Bottleneck(Object.assign(this.limiterOptions, { id: `${this.id}-${key}`, timeout: this.timeout, connection: this.connection })); this.Events.trigger("created", limiter, key); return limiter; })(); } async deleteKey(key = "") { var deleted, instance; instance = this.instances[key]; if (this.connection) { deleted = (await this.connection.__runCommand__(['del', ...Scripts$1.allKeys(`${this.id}-${key}`)])); } if (instance != null) { delete this.instances[key]; await instance.disconnect(); } return (instance != null) || deleted > 0; } limiters() { var k, ref, results, v; ref = this.instances; results = []; for (k in ref) { v = ref[k]; results.push({ key: k, limiter: v }); } return results; } keys() { return Object.keys(this.instances); } async clusterKeys() { var cursor, end, found, i, k, keys, len, next, start; if (this.connection == null) { return this.Promise.resolve(this.keys()); } keys = []; cursor = null; start = `b_${this.id}-`.length; end = "_settings".length; while (cursor !== 0) { [next, found] = (await this.connection.__runCommand__(["scan", cursor != null ? cursor : 0, "match", `b_${this.id}-*_settings`, "count", 10000])); cursor = ~~next; for (i = 0, len = found.length; i < len; i++) { k = found[i]; keys.push(k.slice(start, -end)); } } return keys; } _startAutoCleanup() { var base; clearInterval(this.interval); return typeof (base = (this.interval = setInterval(async() => { var e, k, ref, results, time, v; time = Date.now(); ref = this.instances; results = []; for (k in ref) { v = ref[k]; try { if ((await v._store.__groupCheck__(time))) { results.push(this.deleteKey(k)); } else { results.push(void 0); } } catch (error) { e = error; results.push(v.Events.trigger("error", e)); } } return results; }, this.timeout / 2))).unref === "function" ? base.unref() : void 0; } updateSettings(options = {}) { parser$3.overwrite(options, this.defaults, this); parser$3.overwrite(options, options, this.limiterOptions); if (options.timeout != null) { return this._startAutoCleanup(); } } disconnect(flush = true) { var ref; if (!this.sharedConnection) { return (ref = this.connection) != null ? ref.disconnect(flush) : void 0; } } } Group.prototype.defaults = { timeout: 1000 * 60 * 5, connection: null, Promise: Promise, id: "group-key" }; return Group; }).call(commonjsGlobal); var Group_1 = Group; var Batcher, Events$3, parser$4; parser$4 = parser; Events$3 = Events_1; Batcher = (function() { class Batcher { constructor(options = {}) { this.options = options; parser$4.load(this.options, this.defaults, this); this.Events = new Events$3(this); this._arr = []; this._resetPromise(); this._lastFlush = Date.now(); } _resetPromise() { return this._promise = new this.Promise((res, rej) => { return this._resolve = res; }); } _flush() { clearTimeout(this._timeout); this._lastFlush = Date.now(); this._resolve(); this.Events.trigger("batch", this._arr); this._arr = []; return this._resetPromise(); } add(data) { var ret; this._arr.push(data); ret = this._promise; if (this._arr.length === this.maxSize) { this._flush(); } else if ((this.maxTime != null) && this._arr.length === 1) { this._timeout = setTimeout(() => { return this._flush(); }, this.maxTime); } return ret; } } Batcher.prototype.defaults = { maxTime: null, maxSize: null, Promise: Promise }; return Batcher; }).call(commonjsGlobal); var Batcher_1 = Batcher; var require$$4$1 = () => console.log('You must import the full version of Bottleneck in order to use this feature.'); var require$$8 = getCjsExportFromNamespace(version$2); var Bottleneck, DEFAULT_PRIORITY$1, Events$4, Job$1, LocalDatastore$1, NUM_PRIORITIES$1, Queues$1, RedisDatastore$1, States$1, Sync$1, parser$5, splice = [].splice; NUM_PRIORITIES$1 = 10; DEFAULT_PRIORITY$1 = 5; parser$5 = parser; Queues$1 = Queues_1; Job$1 = Job_1; LocalDatastore$1 = LocalDatastore_1; RedisDatastore$1 = require$$4$1; Events$4 = Events_1; States$1 = States_1; Sync$1 = Sync_1; Bottleneck = (function() { class Bottleneck { constructor(options = {}, ...invalid) { var storeInstanceOptions, storeOptions; this._addToQueue = this._addToQueue.bind(this); this._validateOptions(options, invalid); parser$5.load(options, this.instanceDefaults, this); this._queues = new Queues$1(NUM_PRIORITIES$1); this._scheduled = {}; this._states = new States$1(["RECEIVED", "QUEUED", "RUNNING", "EXECUTING"].concat(this.trackDoneStatus ? ["DONE"] : [])); this._limiter = null; this.Events = new Events$4(this); this._submitLock = new Sync$1("submit", this.Promise); this._registerLock = new Sync$1("register", this.Promise); storeOptions = parser$5.load(options, this.storeDefaults, {}); this._store = (function() { if (this.datastore === "redis" || this.datastore === "ioredis" || (this.connection != null)) { storeInstanceOptions = parser$5.load(options, this.redisStoreDefaults, {}); return new RedisDatastore$1(this, storeOptions, storeInstanceOptions); } else if (this.datastore === "local") { storeInstanceOptions = parser$5.load(options, this.localStoreDefaults, {}); return new LocalDatastore$1(this, storeOptions, storeInstanceOptions); } else { throw new Bottleneck.prototype.BottleneckError(`Invalid datastore type: ${this.datastore}`); } }).call(this); this._queues.on("leftzero", () => { var ref; return (ref = this._store.heartbeat) != null ? typeof ref.ref === "function" ? ref.ref() : void 0 : void 0; }); this._queues.on("zero", () => { var ref; return (ref = this._store.heartbeat) != null ? typeof ref.unref === "function" ? ref.unref() : void 0 : void 0; }); } _validateOptions(options, invalid) { if (!((options != null) && typeof options === "object" && invalid.length === 0)) { throw new Bottleneck.prototype.BottleneckError("Bottleneck v2 takes a single object argument. Refer to https://github.com/SGrondin/bottleneck#upgrading-to-v2 if you're upgrading from Bottleneck v1."); } } ready() { return this._store.ready; } clients() { return this._store.clients; } channel() { return `b_${this.id}`; } channel_client() { return `b_${this.id}_${this._store.clientId}`; } publish(message) { return this._store.__publish__(message); } disconnect(flush = true) { return this._store.__disconnect__(flush); } chain(_limiter) { this._limiter = _limiter; return this; } queued(priority) { return this._queues.queued(priority); } clusterQueued() { return this._store.__queued__(); } empty() { return this.queued() === 0 && this._submitLock.isEmpty(); } running() { return this._store.__running__(); } done() { return this._store.__done__(); } jobStatus(id) { return this._states.jobStatus(id); } jobs(status) { return this._states.statusJobs(status); } counts() { return this._states.statusCounts(); } _randomIndex() { return Math.random().toString(36).slice(2); } check(weight = 1) { return this._store.__check__(weight); } _clearGlobalState(index) { if (this._scheduled[index] != null) { clearTimeout(this._scheduled[index].expiration); delete this._scheduled[index]; return true; } else { return false; } } async _free(index, job, options, eventInfo) { var e, running; try { ({running} = (await this._store.__free__(index, options.weight))); this.Events.trigger("debug", `Freed ${options.id}`, eventInfo); if (running === 0 && this.empty()) { return this.Events.trigger("idle"); } } catch (error1) { e = error1; return this.Events.trigger("error", e); } } _run(index, job, wait) { var clearGlobalState, free, run; job.doRun(); clearGlobalState = this._clearGlobalState.bind(this, index); run = this._run.bind(this, index, job); free = this._free.bind(this, index, job); return this._scheduled[index] = { timeout: setTimeout(() => { return job.doExecute(this._limiter, clearGlobalState, run, free); }, wait), expiration: job.options.expiration != null ? setTimeout(function() { return job.doExpire(clearGlobalState, run, free); }, wait + job.options.expiration) : void 0, job: job }; } _drainOne(capacity) { return this._registerLock.schedule(() => { var args, index, next, options, queue; if (this.queued() === 0) { return this.Promise.resolve(null); } queue = this._queues.getFirst(); ({options, args} = next = queue.first()); if ((capacity != null) && options.weight > capacity) { return this.Promise.resolve(null); } this.Events.trigger("debug", `Draining ${options.id}`, {args, options}); index = this._randomIndex(); return this._store.__register__(index, options.weight, options.expiration).then(({success, wait, reservoir}) => { var empty; this.Events.trigger("debug", `Drained ${options.id}`, {success, args, options}); if (success) { queue.shift(); empty = this.empty(); if (empty) { this.Events.trigger("empty"); } if (reservoir === 0) { this.Events.trigger("depleted", empty); } this._run(index, next, wait); return this.Promise.resolve(options.weight); } else { return this.Promise.resolve(null); } }); }); } _drainAll(capacity, total = 0) { return this._drainOne(capacity).then((drained) => { var newCapacity; if (drained != null) { newCapacity = capacity != null ? capacity - drained : capacity; return this._drainAll(newCapacity, total + drained); } else { return this.Promise.resolve(total); } }).catch((e) => { return this.Events.trigger("error", e); }); } _dropAllQueued(message) { return this._queues.shiftAll(function(job) { return job.doDrop({message}); }); } stop(options = {}) { var done, waitForExecuting; options = parser$5.load(options, this.stopDefaults); waitForExecuting = (at) => { var finished; finished = () => { var counts; counts = this._states.counts; return (counts[0] + counts[1] + counts[2] + counts[3]) === at; }; return new this.Promise((resolve, reject) => { if (finished()) { return resolve(); } else { return this.on("done", () => { if (finished()) { this.removeAllListeners("done"); return resolve(); } }); } }); }; done = options.dropWaitingJobs ? (this._run = function(index, next) { return next.doDrop({ message: options.dropErrorMessage }); }, this._drainOne = () => { return this.Promise.resolve(null); }, this._registerLock.schedule(() => { return this._submitLock.schedule(() => { var k, ref, v; ref = this._scheduled; for (k in ref) { v = ref[k]; if (this.jobStatus(v.job.options.id) === "RUNNING") { clearTimeout(v.timeout); clearTimeout(v.expiration); v.job.doDrop({ message: options.dropErrorMessage }); } } this._dropAllQueued(options.dropErrorMessage); return waitForExecuting(0); }); })) : this.schedule({ priority: NUM_PRIORITIES$1 - 1, weight: 0 }, () => { return waitForExecuting(1); }); this._receive = function(job) { return job._reject(new Bottleneck.prototype.BottleneckError(options.enqueueErrorMessage)); }; this.stop = () => { return this.Promise.reject(new Bottleneck.prototype.BottleneckError("stop() has already been called")); }; return done; } async _addToQueue(job) { var args, blocked, error, options, reachedHWM, shifted, strategy; ({args, options} = job); try { ({reachedHWM, blocked, strategy} = (await this._store.__submit__(this.queued(), options.weight))); } catch (error1) { error = error1; this.Events.trigger("debug", `Could not queue ${options.id}`, {args, options, error}); job.doDrop({error}); return false; } if (blocked) { job.doDrop(); return true; } else if (reachedHWM) { shifted = strategy === Bottleneck.prototype.strategy.LEAK ? this._queues.shiftLastFrom(options.priority) : strategy === Bottleneck.prototype.strategy.OVERFLOW_PRIORITY ? this._queues.shiftLastFrom(options.priority + 1) : strategy === Bottleneck.prototype.strategy.OVERFLOW ? job : void 0; if (shifted != null) { shifted.doDrop(); } if ((shifted == null) || strategy === Bottleneck.prototype.strategy.OVERFLOW) { if (shifted == null) { job.doDrop(); } return reachedHWM; } } job.doQueue(reachedHWM, blocked); this._queues.push(job); await this._drainAll(); return reachedHWM; } _receive(job) { if (this._states.jobStatus(job.options.id) != null) { job._reject(new Bottleneck.prototype.BottleneckError(`A job with the same id already exists (id=${job.options.id})`)); return false; } else { job.doReceive(); return this._submitLock.schedule(this._addToQueue, job); } } submit(...args) { var cb, fn, job, options, ref, ref1, task; if (typeof args[0] === "function") { ref = args, [fn, ...args] = ref, [cb] = splice.call(args, -1); options = parser$5.load({}, this.jobDefaults); } else { ref1 = args, [options, fn, ...args] = ref1, [cb] = splice.call(args, -1); options = parser$5.load(options, this.jobDefaults); } task = (...args) => { return new this.Promise(function(resolve, reject) { return fn(...args, function(...args) { return (args[0] != null ? reject : resolve)(args); }); }); }; job = new Job$1(task, args, options, this.jobDefaults, this.rejectOnDrop, this.Events, this._states, this.Promise); job.promise.then(function(args) { return typeof cb === "function" ? cb(...args) : void 0; }).catch(function(args) { if (Array.isArray(args)) { return typeof cb === "function" ? cb(...args) : void 0; } else { return typeof cb === "function" ? cb(args) : void 0; } }); return this._receive(job); } schedule(...args) { var job, options, task; if (typeof args[0] === "function") { [task, ...args] = args; options = {}; } else { [options, task, ...args] = args; } job = new Job$1(task, args, options, this.jobDefaults, this.rejectOnDrop, this.Events, this._states, this.Promise); this._receive(job); return job.promise; } wrap(fn) { var schedule, wrapped; schedule = this.schedule.bind(this); wrapped = function(...args) { return schedule(fn.bind(this), ...args); }; wrapped.withOptions = function(options, ...args) { return schedule(options, fn, ...args); }; return wrapped; } async updateSettings(options = {}) { await this._store.__updateSettings__(parser$5.overwrite(options, this.storeDefaults)); parser$5.overwrite(options, this.instanceDefaults, this); return this; } currentReservoir() { return this._store.__currentReservoir__(); } incrementReservoir(incr = 0) { return this._store.__incrementReservoir__(incr); } } Bottleneck.default = Bottleneck; Bottleneck.Events = Events$4; Bottleneck.version = Bottleneck.prototype.version = require$$8.version; Bottleneck.strategy = Bottleneck.prototype.strategy = { LEAK: 1, OVERFLOW: 2, OVERFLOW_PRIORITY: 4, BLOCK: 3 }; Bottleneck.BottleneckError = Bottleneck.prototype.BottleneckError = BottleneckError_1; Bottleneck.Group = Bottleneck.prototype.Group = Group_1; Bottleneck.RedisConnection = Bottleneck.prototype.RedisConnection = require$$2; Bottleneck.IORedisConnection = Bottleneck.prototype.IORedisConnection = require$$3; Bottleneck.Batcher = Bottleneck.prototype.Batcher = Batcher_1; Bottleneck.prototype.jobDefaults = { priority: DEFAULT_PRIORITY$1, weight: 1, expiration: null, id: "" }; Bottleneck.prototype.storeDefaults = { maxConcurrent: null, minTime: 0, highWater: null, strategy: Bottleneck.prototype.strategy.LEAK, penalty: null, reservoir: null, reservoirRefreshInterval: null, reservoirRefreshAmount: null, reservoirIncreaseInterval: null, reservoirIncreaseAmount: null, reservoirIncreaseMaximum: null }; Bottleneck.prototype.localStoreDefaults = { Promise: Promise, timeout: null, heartbeatInterval: 250 }; Bottleneck.prototype.redisStoreDefaults = { Promise: Promise, timeout: null, heartbeatInterval: 5000, clientTimeout: 10000, Redis: null, clientOptions: {}, clusterNodes: null, clearDatastore: false, connection: null }; Bottleneck.prototype.instanceDefaults = { datastore: "local", connection: null, id: "", rejectOnDrop: true, trackDoneStatus: false, Promise: Promise }; Bottleneck.prototype.stopDefaults = { enqueueErrorMessage: "This limiter has been stopped and cannot accept new jobs.", dropWaitingJobs: true, dropErrorMessage: "This limiter has been stopped." }; return Bottleneck; }).call(commonjsGlobal); var Bottleneck_1 = Bottleneck; var lib = Bottleneck_1; return lib; }))); /***/ }), /***/ 8932: /***/ ((__unused_webpack_module, exports) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); class Deprecation extends Error { constructor(message) { super(message); // Maintains proper stack trace (only available on V8) /* istanbul ignore next */ if (Error.captureStackTrace) { Error.captureStackTrace(this, this.constructor); } this.name = 'Deprecation'; } } exports.Deprecation = Deprecation; /***/ }), /***/ 5060: /***/ ((module) => { "use strict"; /** * filesize * * @copyright 2020 Jason Mulligan * @license BSD-3-Clause * @version 6.1.0 */ (function (global) { var b = /^(b|B)$/, symbol = { iec: { bits: ["b", "Kib", "Mib", "Gib", "Tib", "Pib", "Eib", "Zib", "Yib"], bytes: ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"] }, jedec: { bits: ["b", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb", "Yb"], bytes: ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"] } }, fullform = { iec: ["", "kibi", "mebi", "gibi", "tebi", "pebi", "exbi", "zebi", "yobi"], jedec: ["", "kilo", "mega", "giga", "tera", "peta", "exa", "zetta", "yotta"] }; /** * filesize * * @method filesize * @param {Mixed} arg String, Int or Float to transform * @param {Object} descriptor [Optional] Flags * @return {String} Readable file size String */ function filesize(arg) { var descriptor = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; var result = [], val = 0, e = void 0, base = void 0, bits = void 0, ceil = void 0, full = void 0, fullforms = void 0, locale = void 0, localeOptions = void 0, neg = void 0, num = void 0, output = void 0, round = void 0, unix = void 0, separator = void 0, spacer = void 0, standard = void 0, symbols = void 0; if (isNaN(arg)) { throw new TypeError("Invalid number"); } bits = descriptor.bits === true; unix = descriptor.unix === true; base = descriptor.base || 2; round = descriptor.round !== void 0 ? descriptor.round : unix ? 1 : 2; locale = descriptor.locale !== void 0 ? descriptor.locale : ""; localeOptions = descriptor.localeOptions || {}; separator = descriptor.separator !== void 0 ? descriptor.separator : ""; spacer = descriptor.spacer !== void 0 ? descriptor.spacer : unix ? "" : " "; symbols = descriptor.symbols || {}; standard = base === 2 ? descriptor.standard || "jedec" : "jedec"; output = descriptor.output || "string"; full = descriptor.fullform === true; fullforms = descriptor.fullforms instanceof Array ? descriptor.fullforms : []; e = descriptor.exponent !== void 0 ? descriptor.exponent : -1; num = Number(arg); neg = num < 0; ceil = base > 2 ? 1000 : 1024; // Flipping a negative number to determine the size if (neg) { num = -num; } // Determining the exponent if (e === -1 || isNaN(e)) { e = Math.floor(Math.log(num) / Math.log(ceil)); if (e < 0) { e = 0; } } // Exceeding supported length, time to reduce & multiply if (e > 8) { e = 8; } if (output === "exponent") { return e; } // Zero is now a special case because bytes divide by 1 if (num === 0) { result[0] = 0; result[1] = unix ? "" : symbol[standard][bits ? "bits" : "bytes"][e]; } else { val = num / (base === 2 ? Math.pow(2, e * 10) : Math.pow(1000, e)); if (bits) { val = val * 8; if (val >= ceil && e < 8) { val = val / ceil; e++; } } result[0] = Number(val.toFixed(e > 0 ? round : 0)); if (result[0] === ceil && e < 8 && descriptor.exponent === void 0) { result[0] = 1; e++; } result[1] = base === 10 && e === 1 ? bits ? "kb" : "kB" : symbol[standard][bits ? "bits" : "bytes"][e]; if (unix) { result[1] = standard === "jedec" ? result[1].charAt(0) : e > 0 ? result[1].replace(/B$/, "") : result[1]; if (b.test(result[1])) { result[0] = Math.floor(result[0]); result[1] = ""; } } } // Decorating a 'diff' if (neg) { result[0] = -result[0]; } // Applying custom symbol result[1] = symbols[result[1]] || result[1]; if (locale === true) { result[0] = result[0].toLocaleString(); } else if (locale.length > 0) { result[0] = result[0].toLocaleString(locale, localeOptions); } else if (separator.length > 0) { result[0] = result[0].toString().replace(".", separator); } // Returning Array, Object, or String (default) if (output === "array") { return result; } if (full) { result[1] = fullforms[e] ? fullforms[e] : fullform[standard][e] + (bits ? "bit" : "byte") + (result[0] === 1 ? "" : "s"); } if (output === "object") { return { value: result[0], symbol: result[1], exponent: e }; } return result.join(spacer); } // Partial application for functional programming filesize.partial = function (opt) { return function (arg) { return filesize(arg, opt); }; }; // CommonJS, AMD, script tag if (true) { module.exports = filesize; } else {} })(typeof window !== "undefined" ? window : global); /***/ }), /***/ 3287: /***/ ((__unused_webpack_module, exports) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); /*! * is-plain-object * * Copyright (c) 2014-2017, Jon Schlinkert. * Released under the MIT License. */ function isObject(o) { return Object.prototype.toString.call(o) === '[object Object]'; } function isPlainObject(o) { var ctor,prot; if (isObject(o) === false) return false; // If has modified constructor ctor = o.constructor; if (ctor === undefined) return true; // If has modified prototype prot = ctor.prototype; if (isObject(prot) === false) return false; // If constructor does not have an Object-specific method if (prot.hasOwnProperty('isPrototypeOf') === false) { return false; } // Most likely a plain Object return true; } exports.isPlainObject = isPlainObject; /***/ }), /***/ 467: /***/ ((module, exports, __nccwpck_require__) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; } var Stream = _interopDefault(__nccwpck_require__(2413)); var http = _interopDefault(__nccwpck_require__(8605)); var Url = _interopDefault(__nccwpck_require__(8835)); var https = _interopDefault(__nccwpck_require__(7211)); var zlib = _interopDefault(__nccwpck_require__(8761)); // Based on https://github.com/tmpvar/jsdom/blob/aa85b2abf07766ff7bf5c1f6daafb3726f2f2db5/lib/jsdom/living/blob.js // fix for "Readable" isn't a named export issue const Readable = Stream.Readable; const BUFFER = Symbol('buffer'); const TYPE = Symbol('type'); class Blob { constructor() { this[TYPE] = ''; const blobParts = arguments[0]; const options = arguments[1]; const buffers = []; let size = 0; if (blobParts) { const a = blobParts; const length = Number(a.length); for (let i = 0; i < length; i++) { const element = a[i]; let buffer; if (element instanceof Buffer) { buffer = element; } else if (ArrayBuffer.isView(element)) { buffer = Buffer.from(element.buffer, element.byteOffset, element.byteLength); } else if (element instanceof ArrayBuffer) { buffer = Buffer.from(element); } else if (element instanceof Blob) { buffer = element[BUFFER]; } else { buffer = Buffer.from(typeof element === 'string' ? element : String(element)); } size += buffer.length; buffers.push(buffer); } } this[BUFFER] = Buffer.concat(buffers); let type = options && options.type !== undefined && String(options.type).toLowerCase(); if (type && !/[^\u0020-\u007E]/.test(type)) { this[TYPE] = type; } } get size() { return this[BUFFER].length; } get type() { return this[TYPE]; } text() { return Promise.resolve(this[BUFFER].toString()); } arrayBuffer() { const buf = this[BUFFER]; const ab = buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength); return Promise.resolve(ab); } stream() { const readable = new Readable(); readable._read = function () {}; readable.push(this[BUFFER]); readable.push(null); return readable; } toString() { return '[object Blob]'; } slice() { const size = this.size; const start = arguments[0]; const end = arguments[1]; let relativeStart, relativeEnd; if (start === undefined) { relativeStart = 0; } else if (start < 0) { relativeStart = Math.max(size + start, 0); } else { relativeStart = Math.min(start, size); } if (end === undefined) { relativeEnd = size; } else if (end < 0) { relativeEnd = Math.max(size + end, 0); } else { relativeEnd = Math.min(end, size); } const span = Math.max(relativeEnd - relativeStart, 0); const buffer = this[BUFFER]; const slicedBuffer = buffer.slice(relativeStart, relativeStart + span); const blob = new Blob([], { type: arguments[2] }); blob[BUFFER] = slicedBuffer; return blob; } } Object.defineProperties(Blob.prototype, { size: { enumerable: true }, type: { enumerable: true }, slice: { enumerable: true } }); Object.defineProperty(Blob.prototype, Symbol.toStringTag, { value: 'Blob', writable: false, enumerable: false, configurable: true }); /** * fetch-error.js * * FetchError interface for operational errors */ /** * Create FetchError instance * * @param String message Error message for human * @param String type Error type for machine * @param String systemError For Node.js system error * @return FetchError */ function FetchError(message, type, systemError) { Error.call(this, message); this.message = message; this.type = type; // when err.type is `system`, err.code contains system error code if (systemError) { this.code = this.errno = systemError.code; } // hide custom error implementation details from end-users Error.captureStackTrace(this, this.constructor); } FetchError.prototype = Object.create(Error.prototype); FetchError.prototype.constructor = FetchError; FetchError.prototype.name = 'FetchError'; let convert; try { convert = __nccwpck_require__(2877).convert; } catch (e) {} const INTERNALS = Symbol('Body internals'); // fix an issue where "PassThrough" isn't a named export for node <10 const PassThrough = Stream.PassThrough; /** * Body mixin * * Ref: https://fetch.spec.whatwg.org/#body * * @param Stream body Readable stream * @param Object opts Response options * @return Void */ function Body(body) { var _this = this; var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}, _ref$size = _ref.size; let size = _ref$size === undefined ? 0 : _ref$size; var _ref$timeout = _ref.timeout; let timeout = _ref$timeout === undefined ? 0 : _ref$timeout; if (body == null) { // body is undefined or null body = null; } else if (isURLSearchParams(body)) { // body is a URLSearchParams body = Buffer.from(body.toString()); } else if (isBlob(body)) ; else if (Buffer.isBuffer(body)) ; else if (Object.prototype.toString.call(body) === '[object ArrayBuffer]') { // body is ArrayBuffer body = Buffer.from(body); } else if (ArrayBuffer.isView(body)) { // body is ArrayBufferView body = Buffer.from(body.buffer, body.byteOffset, body.byteLength); } else if (body instanceof Stream) ; else { // none of the above // coerce to string then buffer body = Buffer.from(String(body)); } this[INTERNALS] = { body, disturbed: false, error: null }; this.size = size; this.timeout = timeout; if (body instanceof Stream) { body.on('error', function (err) { const error = err.name === 'AbortError' ? err : new FetchError(`Invalid response body while trying to fetch ${_this.url}: ${err.message}`, 'system', err); _this[INTERNALS].error = error; }); } } Body.prototype = { get body() { return this[INTERNALS].body; }, get bodyUsed() { return this[INTERNALS].disturbed; }, /** * Decode response as ArrayBuffer * * @return Promise */ arrayBuffer() { return consumeBody.call(this).then(function (buf) { return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength); }); }, /** * Return raw response as Blob * * @return Promise */ blob() { let ct = this.headers && this.headers.get('content-type') || ''; return consumeBody.call(this).then(function (buf) { return Object.assign( // Prevent copying new Blob([], { type: ct.toLowerCase() }), { [BUFFER]: buf }); }); }, /** * Decode response as json * * @return Promise */ json() { var _this2 = this; return consumeBody.call(this).then(function (buffer) { try { return JSON.parse(buffer.toString()); } catch (err) { return Body.Promise.reject(new FetchError(`invalid json response body at ${_this2.url} reason: ${err.message}`, 'invalid-json')); } }); }, /** * Decode response as text * * @return Promise */ text() { return consumeBody.call(this).then(function (buffer) { return buffer.toString(); }); }, /** * Decode response as buffer (non-spec api) * * @return Promise */ buffer() { return consumeBody.call(this); }, /** * Decode response as text, while automatically detecting the encoding and * trying to decode to UTF-8 (non-spec api) * * @return Promise */ textConverted() { var _this3 = this; return consumeBody.call(this).then(function (buffer) { return convertBody(buffer, _this3.headers); }); } }; // In browsers, all properties are enumerable. Object.defineProperties(Body.prototype, { body: { enumerable: true }, bodyUsed: { enumerable: true }, arrayBuffer: { enumerable: true }, blob: { enumerable: true }, json: { enumerable: true }, text: { enumerable: true } }); Body.mixIn = function (proto) { for (const name of Object.getOwnPropertyNames(Body.prototype)) { // istanbul ignore else: future proof if (!(name in proto)) { const desc = Object.getOwnPropertyDescriptor(Body.prototype, name); Object.defineProperty(proto, name, desc); } } }; /** * Consume and convert an entire Body to a Buffer. * * Ref: https://fetch.spec.whatwg.org/#concept-body-consume-body * * @return Promise */ function consumeBody() { var _this4 = this; if (this[INTERNALS].disturbed) { return Body.Promise.reject(new TypeError(`body used already for: ${this.url}`)); } this[INTERNALS].disturbed = true; if (this[INTERNALS].error) { return Body.Promise.reject(this[INTERNALS].error); } let body = this.body; // body is null if (body === null) { return Body.Promise.resolve(Buffer.alloc(0)); } // body is blob if (isBlob(body)) { body = body.stream(); } // body is buffer if (Buffer.isBuffer(body)) { return Body.Promise.resolve(body); } // istanbul ignore if: should never happen if (!(body instanceof Stream)) { return Body.Promise.resolve(Buffer.alloc(0)); } // body is stream // get ready to actually consume the body let accum = []; let accumBytes = 0; let abort = false; return new Body.Promise(function (resolve, reject) { let resTimeout; // allow timeout on slow response body if (_this4.timeout) { resTimeout = setTimeout(function () { abort = true; reject(new FetchError(`Response timeout while trying to fetch ${_this4.url} (over ${_this4.timeout}ms)`, 'body-timeout')); }, _this4.timeout); } // handle stream errors body.on('error', function (err) { if (err.name === 'AbortError') { // if the request was aborted, reject with this Error abort = true; reject(err); } else { // other errors, such as incorrect content-encoding reject(new FetchError(`Invalid response body while trying to fetch ${_this4.url}: ${err.message}`, 'system', err)); } }); body.on('data', function (chunk) { if (abort || chunk === null) { return; } if (_this4.size && accumBytes + chunk.length > _this4.size) { abort = true; reject(new FetchError(`content size at ${_this4.url} over limit: ${_this4.size}`, 'max-size')); return; } accumBytes += chunk.length; accum.push(chunk); }); body.on('end', function () { if (abort) { return; } clearTimeout(resTimeout); try { resolve(Buffer.concat(accum, accumBytes)); } catch (err) { // handle streams that have accumulated too much data (issue #414) reject(new FetchError(`Could not create Buffer from response body for ${_this4.url}: ${err.message}`, 'system', err)); } }); }); } /** * Detect buffer encoding and convert to target encoding * ref: http://www.w3.org/TR/2011/WD-html5-20110113/parsing.html#determining-the-character-encoding * * @param Buffer buffer Incoming buffer * @param String encoding Target encoding * @return String */ function convertBody(buffer, headers) { if (typeof convert !== 'function') { throw new Error('The package `encoding` must be installed to use the textConverted() function'); } const ct = headers.get('content-type'); let charset = 'utf-8'; let res, str; // header if (ct) { res = /charset=([^;]*)/i.exec(ct); } // no charset in content type, peek at response body for at most 1024 bytes str = buffer.slice(0, 1024).toString(); // html5 if (!res && str) { res = / 0 && arguments[0] !== undefined ? arguments[0] : undefined; this[MAP] = Object.create(null); if (init instanceof Headers) { const rawHeaders = init.raw(); const headerNames = Object.keys(rawHeaders); for (const headerName of headerNames) { for (const value of rawHeaders[headerName]) { this.append(headerName, value); } } return; } // We don't worry about converting prop to ByteString here as append() // will handle it. if (init == null) ; else if (typeof init === 'object') { const method = init[Symbol.iterator]; if (method != null) { if (typeof method !== 'function') { throw new TypeError('Header pairs must be iterable'); } // sequence> // Note: per spec we have to first exhaust the lists then process them const pairs = []; for (const pair of init) { if (typeof pair !== 'object' || typeof pair[Symbol.iterator] !== 'function') { throw new TypeError('Each header pair must be iterable'); } pairs.push(Array.from(pair)); } for (const pair of pairs) { if (pair.length !== 2) { throw new TypeError('Each header pair must be a name/value tuple'); } this.append(pair[0], pair[1]); } } else { // record for (const key of Object.keys(init)) { const value = init[key]; this.append(key, value); } } } else { throw new TypeError('Provided initializer must be an object'); } } /** * Return combined header value given name * * @param String name Header name * @return Mixed */ get(name) { name = `${name}`; validateName(name); const key = find(this[MAP], name); if (key === undefined) { return null; } return this[MAP][key].join(', '); } /** * Iterate over all headers * * @param Function callback Executed for each item with parameters (value, name, thisArg) * @param Boolean thisArg `this` context for callback function * @return Void */ forEach(callback) { let thisArg = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : undefined; let pairs = getHeaders(this); let i = 0; while (i < pairs.length) { var _pairs$i = pairs[i]; const name = _pairs$i[0], value = _pairs$i[1]; callback.call(thisArg, value, name, this); pairs = getHeaders(this); i++; } } /** * Overwrite header values given name * * @param String name Header name * @param String value Header value * @return Void */ set(name, value) { name = `${name}`; value = `${value}`; validateName(name); validateValue(value); const key = find(this[MAP], name); this[MAP][key !== undefined ? key : name] = [value]; } /** * Append a value onto existing header * * @param String name Header name * @param String value Header value * @return Void */ append(name, value) { name = `${name}`; value = `${value}`; validateName(name); validateValue(value); const key = find(this[MAP], name); if (key !== undefined) { this[MAP][key].push(value); } else { this[MAP][name] = [value]; } } /** * Check for header name existence * * @param String name Header name * @return Boolean */ has(name) { name = `${name}`; validateName(name); return find(this[MAP], name) !== undefined; } /** * Delete all header values given name * * @param String name Header name * @return Void */ delete(name) { name = `${name}`; validateName(name); const key = find(this[MAP], name); if (key !== undefined) { delete this[MAP][key]; } } /** * Return raw headers (non-spec api) * * @return Object */ raw() { return this[MAP]; } /** * Get an iterator on keys. * * @return Iterator */ keys() { return createHeadersIterator(this, 'key'); } /** * Get an iterator on values. * * @return Iterator */ values() { return createHeadersIterator(this, 'value'); } /** * Get an iterator on entries. * * This is the default iterator of the Headers object. * * @return Iterator */ [Symbol.iterator]() { return createHeadersIterator(this, 'key+value'); } } Headers.prototype.entries = Headers.prototype[Symbol.iterator]; Object.defineProperty(Headers.prototype, Symbol.toStringTag, { value: 'Headers', writable: false, enumerable: false, configurable: true }); Object.defineProperties(Headers.prototype, { get: { enumerable: true }, forEach: { enumerable: true }, set: { enumerable: true }, append: { enumerable: true }, has: { enumerable: true }, delete: { enumerable: true }, keys: { enumerable: true }, values: { enumerable: true }, entries: { enumerable: true } }); function getHeaders(headers) { let kind = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'key+value'; const keys = Object.keys(headers[MAP]).sort(); return keys.map(kind === 'key' ? function (k) { return k.toLowerCase(); } : kind === 'value' ? function (k) { return headers[MAP][k].join(', '); } : function (k) { return [k.toLowerCase(), headers[MAP][k].join(', ')]; }); } const INTERNAL = Symbol('internal'); function createHeadersIterator(target, kind) { const iterator = Object.create(HeadersIteratorPrototype); iterator[INTERNAL] = { target, kind, index: 0 }; return iterator; } const HeadersIteratorPrototype = Object.setPrototypeOf({ next() { // istanbul ignore if if (!this || Object.getPrototypeOf(this) !== HeadersIteratorPrototype) { throw new TypeError('Value of `this` is not a HeadersIterator'); } var _INTERNAL = this[INTERNAL]; const target = _INTERNAL.target, kind = _INTERNAL.kind, index = _INTERNAL.index; const values = getHeaders(target, kind); const len = values.length; if (index >= len) { return { value: undefined, done: true }; } this[INTERNAL].index = index + 1; return { value: values[index], done: false }; } }, Object.getPrototypeOf(Object.getPrototypeOf([][Symbol.iterator]()))); Object.defineProperty(HeadersIteratorPrototype, Symbol.toStringTag, { value: 'HeadersIterator', writable: false, enumerable: false, configurable: true }); /** * Export the Headers object in a form that Node.js can consume. * * @param Headers headers * @return Object */ function exportNodeCompatibleHeaders(headers) { const obj = Object.assign({ __proto__: null }, headers[MAP]); // http.request() only supports string as Host header. This hack makes // specifying custom Host header possible. const hostHeaderKey = find(headers[MAP], 'Host'); if (hostHeaderKey !== undefined) { obj[hostHeaderKey] = obj[hostHeaderKey][0]; } return obj; } /** * Create a Headers object from an object of headers, ignoring those that do * not conform to HTTP grammar productions. * * @param Object obj Object of headers * @return Headers */ function createHeadersLenient(obj) { const headers = new Headers(); for (const name of Object.keys(obj)) { if (invalidTokenRegex.test(name)) { continue; } if (Array.isArray(obj[name])) { for (const val of obj[name]) { if (invalidHeaderCharRegex.test(val)) { continue; } if (headers[MAP][name] === undefined) { headers[MAP][name] = [val]; } else { headers[MAP][name].push(val); } } } else if (!invalidHeaderCharRegex.test(obj[name])) { headers[MAP][name] = [obj[name]]; } } return headers; } const INTERNALS$1 = Symbol('Response internals'); // fix an issue where "STATUS_CODES" aren't a named export for node <10 const STATUS_CODES = http.STATUS_CODES; /** * Response class * * @param Stream body Readable stream * @param Object opts Response options * @return Void */ class Response { constructor() { let body = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; let opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; Body.call(this, body, opts); const status = opts.status || 200; const headers = new Headers(opts.headers); if (body != null && !headers.has('Content-Type')) { const contentType = extractContentType(body); if (contentType) { headers.append('Content-Type', contentType); } } this[INTERNALS$1] = { url: opts.url, status, statusText: opts.statusText || STATUS_CODES[status], headers, counter: opts.counter }; } get url() { return this[INTERNALS$1].url || ''; } get status() { return this[INTERNALS$1].status; } /** * Convenience property representing if the request ended normally */ get ok() { return this[INTERNALS$1].status >= 200 && this[INTERNALS$1].status < 300; } get redirected() { return this[INTERNALS$1].counter > 0; } get statusText() { return this[INTERNALS$1].statusText; } get headers() { return this[INTERNALS$1].headers; } /** * Clone this response * * @return Response */ clone() { return new Response(clone(this), { url: this.url, status: this.status, statusText: this.statusText, headers: this.headers, ok: this.ok, redirected: this.redirected }); } } Body.mixIn(Response.prototype); Object.defineProperties(Response.prototype, { url: { enumerable: true }, status: { enumerable: true }, ok: { enumerable: true }, redirected: { enumerable: true }, statusText: { enumerable: true }, headers: { enumerable: true }, clone: { enumerable: true } }); Object.defineProperty(Response.prototype, Symbol.toStringTag, { value: 'Response', writable: false, enumerable: false, configurable: true }); const INTERNALS$2 = Symbol('Request internals'); // fix an issue where "format", "parse" aren't a named export for node <10 const parse_url = Url.parse; const format_url = Url.format; const streamDestructionSupported = 'destroy' in Stream.Readable.prototype; /** * Check if a value is an instance of Request. * * @param Mixed input * @return Boolean */ function isRequest(input) { return typeof input === 'object' && typeof input[INTERNALS$2] === 'object'; } function isAbortSignal(signal) { const proto = signal && typeof signal === 'object' && Object.getPrototypeOf(signal); return !!(proto && proto.constructor.name === 'AbortSignal'); } /** * Request class * * @param Mixed input Url or Request instance * @param Object init Custom options * @return Void */ class Request { constructor(input) { let init = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; let parsedURL; // normalize input if (!isRequest(input)) { if (input && input.href) { // in order to support Node.js' Url objects; though WHATWG's URL objects // will fall into this branch also (since their `toString()` will return // `href` property anyway) parsedURL = parse_url(input.href); } else { // coerce input to a string before attempting to parse parsedURL = parse_url(`${input}`); } input = {}; } else { parsedURL = parse_url(input.url); } let method = init.method || input.method || 'GET'; method = method.toUpperCase(); if ((init.body != null || isRequest(input) && input.body !== null) && (method === 'GET' || method === 'HEAD')) { throw new TypeError('Request with GET/HEAD method cannot have body'); } let inputBody = init.body != null ? init.body : isRequest(input) && input.body !== null ? clone(input) : null; Body.call(this, inputBody, { timeout: init.timeout || input.timeout || 0, size: init.size || input.size || 0 }); const headers = new Headers(init.headers || input.headers || {}); if (inputBody != null && !headers.has('Content-Type')) { const contentType = extractContentType(inputBody); if (contentType) { headers.append('Content-Type', contentType); } } let signal = isRequest(input) ? input.signal : null; if ('signal' in init) signal = init.signal; if (signal != null && !isAbortSignal(signal)) { throw new TypeError('Expected signal to be an instanceof AbortSignal'); } this[INTERNALS$2] = { method, redirect: init.redirect || input.redirect || 'follow', headers, parsedURL, signal }; // node-fetch-only options this.follow = init.follow !== undefined ? init.follow : input.follow !== undefined ? input.follow : 20; this.compress = init.compress !== undefined ? init.compress : input.compress !== undefined ? input.compress : true; this.counter = init.counter || input.counter || 0; this.agent = init.agent || input.agent; } get method() { return this[INTERNALS$2].method; } get url() { return format_url(this[INTERNALS$2].parsedURL); } get headers() { return this[INTERNALS$2].headers; } get redirect() { return this[INTERNALS$2].redirect; } get signal() { return this[INTERNALS$2].signal; } /** * Clone this request * * @return Request */ clone() { return new Request(this); } } Body.mixIn(Request.prototype); Object.defineProperty(Request.prototype, Symbol.toStringTag, { value: 'Request', writable: false, enumerable: false, configurable: true }); Object.defineProperties(Request.prototype, { method: { enumerable: true }, url: { enumerable: true }, headers: { enumerable: true }, redirect: { enumerable: true }, clone: { enumerable: true }, signal: { enumerable: true } }); /** * Convert a Request to Node.js http request options. * * @param Request A Request instance * @return Object The options object to be passed to http.request */ function getNodeRequestOptions(request) { const parsedURL = request[INTERNALS$2].parsedURL; const headers = new Headers(request[INTERNALS$2].headers); // fetch step 1.3 if (!headers.has('Accept')) { headers.set('Accept', '*/*'); } // Basic fetch if (!parsedURL.protocol || !parsedURL.hostname) { throw new TypeError('Only absolute URLs are supported'); } if (!/^https?:$/.test(parsedURL.protocol)) { throw new TypeError('Only HTTP(S) protocols are supported'); } if (request.signal && request.body instanceof Stream.Readable && !streamDestructionSupported) { throw new Error('Cancellation of streamed requests with AbortSignal is not supported in node < 8'); } // HTTP-network-or-cache fetch steps 2.4-2.7 let contentLengthValue = null; if (request.body == null && /^(POST|PUT)$/i.test(request.method)) { contentLengthValue = '0'; } if (request.body != null) { const totalBytes = getTotalBytes(request); if (typeof totalBytes === 'number') { contentLengthValue = String(totalBytes); } } if (contentLengthValue) { headers.set('Content-Length', contentLengthValue); } // HTTP-network-or-cache fetch step 2.11 if (!headers.has('User-Agent')) { headers.set('User-Agent', 'node-fetch/1.0 (+https://github.com/bitinn/node-fetch)'); } // HTTP-network-or-cache fetch step 2.15 if (request.compress && !headers.has('Accept-Encoding')) { headers.set('Accept-Encoding', 'gzip,deflate'); } let agent = request.agent; if (typeof agent === 'function') { agent = agent(parsedURL); } if (!headers.has('Connection') && !agent) { headers.set('Connection', 'close'); } // HTTP-network fetch step 4.2 // chunked encoding is handled by Node.js return Object.assign({}, parsedURL, { method: request.method, headers: exportNodeCompatibleHeaders(headers), agent }); } /** * abort-error.js * * AbortError interface for cancelled requests */ /** * Create AbortError instance * * @param String message Error message for human * @return AbortError */ function AbortError(message) { Error.call(this, message); this.type = 'aborted'; this.message = message; // hide custom error implementation details from end-users Error.captureStackTrace(this, this.constructor); } AbortError.prototype = Object.create(Error.prototype); AbortError.prototype.constructor = AbortError; AbortError.prototype.name = 'AbortError'; // fix an issue where "PassThrough", "resolve" aren't a named export for node <10 const PassThrough$1 = Stream.PassThrough; const resolve_url = Url.resolve; /** * Fetch function * * @param Mixed url Absolute url or Request instance * @param Object opts Fetch options * @return Promise */ function fetch(url, opts) { // allow custom promise if (!fetch.Promise) { throw new Error('native promise missing, set fetch.Promise to your favorite alternative'); } Body.Promise = fetch.Promise; // wrap http.request into fetch return new fetch.Promise(function (resolve, reject) { // build request object const request = new Request(url, opts); const options = getNodeRequestOptions(request); const send = (options.protocol === 'https:' ? https : http).request; const signal = request.signal; let response = null; const abort = function abort() { let error = new AbortError('The user aborted a request.'); reject(error); if (request.body && request.body instanceof Stream.Readable) { request.body.destroy(error); } if (!response || !response.body) return; response.body.emit('error', error); }; if (signal && signal.aborted) { abort(); return; } const abortAndFinalize = function abortAndFinalize() { abort(); finalize(); }; // send request const req = send(options); let reqTimeout; if (signal) { signal.addEventListener('abort', abortAndFinalize); } function finalize() { req.abort(); if (signal) signal.removeEventListener('abort', abortAndFinalize); clearTimeout(reqTimeout); } if (request.timeout) { req.once('socket', function (socket) { reqTimeout = setTimeout(function () { reject(new FetchError(`network timeout at: ${request.url}`, 'request-timeout')); finalize(); }, request.timeout); }); } req.on('error', function (err) { reject(new FetchError(`request to ${request.url} failed, reason: ${err.message}`, 'system', err)); finalize(); }); req.on('response', function (res) { clearTimeout(reqTimeout); const headers = createHeadersLenient(res.headers); // HTTP fetch step 5 if (fetch.isRedirect(res.statusCode)) { // HTTP fetch step 5.2 const location = headers.get('Location'); // HTTP fetch step 5.3 const locationURL = location === null ? null : resolve_url(request.url, location); // HTTP fetch step 5.5 switch (request.redirect) { case 'error': reject(new FetchError(`uri requested responds with a redirect, redirect mode is set to error: ${request.url}`, 'no-redirect')); finalize(); return; case 'manual': // node-fetch-specific step: make manual redirect a bit easier to use by setting the Location header value to the resolved URL. if (locationURL !== null) { // handle corrupted header try { headers.set('Location', locationURL); } catch (err) { // istanbul ignore next: nodejs server prevent invalid response headers, we can't test this through normal request reject(err); } } break; case 'follow': // HTTP-redirect fetch step 2 if (locationURL === null) { break; } // HTTP-redirect fetch step 5 if (request.counter >= request.follow) { reject(new FetchError(`maximum redirect reached at: ${request.url}`, 'max-redirect')); finalize(); return; } // HTTP-redirect fetch step 6 (counter increment) // Create a new Request object. const requestOpts = { headers: new Headers(request.headers), follow: request.follow, counter: request.counter + 1, agent: request.agent, compress: request.compress, method: request.method, body: request.body, signal: request.signal, timeout: request.timeout, size: request.size }; // HTTP-redirect fetch step 9 if (res.statusCode !== 303 && request.body && getTotalBytes(request) === null) { reject(new FetchError('Cannot follow redirect with body being a readable stream', 'unsupported-redirect')); finalize(); return; } // HTTP-redirect fetch step 11 if (res.statusCode === 303 || (res.statusCode === 301 || res.statusCode === 302) && request.method === 'POST') { requestOpts.method = 'GET'; requestOpts.body = undefined; requestOpts.headers.delete('content-length'); } // HTTP-redirect fetch step 15 resolve(fetch(new Request(locationURL, requestOpts))); finalize(); return; } } // prepare response res.once('end', function () { if (signal) signal.removeEventListener('abort', abortAndFinalize); }); let body = res.pipe(new PassThrough$1()); const response_options = { url: request.url, status: res.statusCode, statusText: res.statusMessage, headers: headers, size: request.size, timeout: request.timeout, counter: request.counter }; // HTTP-network fetch step 12.1.1.3 const codings = headers.get('Content-Encoding'); // HTTP-network fetch step 12.1.1.4: handle content codings // in following scenarios we ignore compression support // 1. compression support is disabled // 2. HEAD request // 3. no Content-Encoding header // 4. no content response (204) // 5. content not modified response (304) if (!request.compress || request.method === 'HEAD' || codings === null || res.statusCode === 204 || res.statusCode === 304) { response = new Response(body, response_options); resolve(response); return; } // For Node v6+ // Be less strict when decoding compressed responses, since sometimes // servers send slightly invalid responses that are still accepted // by common browsers. // Always using Z_SYNC_FLUSH is what cURL does. const zlibOptions = { flush: zlib.Z_SYNC_FLUSH, finishFlush: zlib.Z_SYNC_FLUSH }; // for gzip if (codings == 'gzip' || codings == 'x-gzip') { body = body.pipe(zlib.createGunzip(zlibOptions)); response = new Response(body, response_options); resolve(response); return; } // for deflate if (codings == 'deflate' || codings == 'x-deflate') { // handle the infamous raw deflate response from old servers // a hack for old IIS and Apache servers const raw = res.pipe(new PassThrough$1()); raw.once('data', function (chunk) { // see http://stackoverflow.com/questions/37519828 if ((chunk[0] & 0x0F) === 0x08) { body = body.pipe(zlib.createInflate()); } else { body = body.pipe(zlib.createInflateRaw()); } response = new Response(body, response_options); resolve(response); }); return; } // for br if (codings == 'br' && typeof zlib.createBrotliDecompress === 'function') { body = body.pipe(zlib.createBrotliDecompress()); response = new Response(body, response_options); resolve(response); return; } // otherwise, use response as-is response = new Response(body, response_options); resolve(response); }); writeToStream(req, request); }); } /** * Redirect code matching * * @param Number code Status code * @return Boolean */ fetch.isRedirect = function (code) { return code === 301 || code === 302 || code === 303 || code === 307 || code === 308; }; // expose Promise fetch.Promise = global.Promise; module.exports = exports = fetch; Object.defineProperty(exports, "__esModule", ({ value: true })); exports.default = exports; exports.Headers = Headers; exports.Request = Request; exports.Response = Response; exports.FetchError = FetchError; /***/ }), /***/ 1223: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { var wrappy = __nccwpck_require__(2940) module.exports = wrappy(once) module.exports.strict = wrappy(onceStrict) once.proto = once(function () { Object.defineProperty(Function.prototype, 'once', { value: function () { return once(this) }, configurable: true }) Object.defineProperty(Function.prototype, 'onceStrict', { value: function () { return onceStrict(this) }, configurable: true }) }) function once (fn) { var f = function () { if (f.called) return f.value f.called = true return f.value = fn.apply(this, arguments) } f.called = false return f } function onceStrict (fn) { var f = function () { if (f.called) throw new Error(f.onceError) f.called = true return f.value = fn.apply(this, arguments) } var name = fn.name || 'Function wrapped with `once`' f.onceError = name + " shouldn't be called more than once" f.called = false return f } /***/ }), /***/ 4294: /***/ ((module, __unused_webpack_exports, __nccwpck_require__) => { module.exports = __nccwpck_require__(4219); /***/ }), /***/ 4219: /***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { "use strict"; var net = __nccwpck_require__(1631); var tls = __nccwpck_require__(4016); var http = __nccwpck_require__(8605); var https = __nccwpck_require__(7211); var events = __nccwpck_require__(8614); var assert = __nccwpck_require__(2357); var util = __nccwpck_require__(1669); exports.httpOverHttp = httpOverHttp; exports.httpsOverHttp = httpsOverHttp; exports.httpOverHttps = httpOverHttps; exports.httpsOverHttps = httpsOverHttps; function httpOverHttp(options) { var agent = new TunnelingAgent(options); agent.request = http.request; return agent; } function httpsOverHttp(options) { var agent = new TunnelingAgent(options); agent.request = http.request; agent.createSocket = createSecureSocket; agent.defaultPort = 443; return agent; } function httpOverHttps(options) { var agent = new TunnelingAgent(options); agent.request = https.request; return agent; } function httpsOverHttps(options) { var agent = new TunnelingAgent(options); agent.request = https.request; agent.createSocket = createSecureSocket; agent.defaultPort = 443; return agent; } function TunnelingAgent(options) { var self = this; self.options = options || {}; self.proxyOptions = self.options.proxy || {}; self.maxSockets = self.options.maxSockets || http.Agent.defaultMaxSockets; self.requests = []; self.sockets = []; self.on('free', function onFree(socket, host, port, localAddress) { var options = toOptions(host, port, localAddress); for (var i = 0, len = self.requests.length; i < len; ++i) { var pending = self.requests[i]; if (pending.host === options.host && pending.port === options.port) { // Detect the request to connect same origin server, // reuse the connection. self.requests.splice(i, 1); pending.request.onSocket(socket); return; } } socket.destroy(); self.removeSocket(socket); }); } util.inherits(TunnelingAgent, events.EventEmitter); TunnelingAgent.prototype.addRequest = function addRequest(req, host, port, localAddress) { var self = this; var options = mergeOptions({request: req}, self.options, toOptions(host, port, localAddress)); if (self.sockets.length >= this.maxSockets) { // We are over limit so we'll add it to the queue. self.requests.push(options); return; } // If we are under maxSockets create a new one. self.createSocket(options, function(socket) { socket.on('free', onFree); socket.on('close', onCloseOrRemove); socket.on('agentRemove', onCloseOrRemove); req.onSocket(socket); function onFree() { self.emit('free', socket, options); } function onCloseOrRemove(err) { self.removeSocket(socket); socket.removeListener('free', onFree); socket.removeListener('close', onCloseOrRemove); socket.removeListener('agentRemove', onCloseOrRemove); } }); }; TunnelingAgent.prototype.createSocket = function createSocket(options, cb) { var self = this; var placeholder = {}; self.sockets.push(placeholder); var connectOptions = mergeOptions({}, self.proxyOptions, { method: 'CONNECT', path: options.host + ':' + options.port, agent: false, headers: { host: options.host + ':' + options.port } }); if (options.localAddress) { connectOptions.localAddress = options.localAddress; } if (connectOptions.proxyAuth) { connectOptions.headers = connectOptions.headers || {}; connectOptions.headers['Proxy-Authorization'] = 'Basic ' + new Buffer(connectOptions.proxyAuth).toString('base64'); } debug('making CONNECT request'); var connectReq = self.request(connectOptions); connectReq.useChunkedEncodingByDefault = false; // for v0.6 connectReq.once('response', onResponse); // for v0.6 connectReq.once('upgrade', onUpgrade); // for v0.6 connectReq.once('connect', onConnect); // for v0.7 or later connectReq.once('error', onError); connectReq.end(); function onResponse(res) { // Very hacky. This is necessary to avoid http-parser leaks. res.upgrade = true; } function onUpgrade(res, socket, head) { // Hacky. process.nextTick(function() { onConnect(res, socket, head); }); } function onConnect(res, socket, head) { connectReq.removeAllListeners(); socket.removeAllListeners(); if (res.statusCode !== 200) { debug('tunneling socket could not be established, statusCode=%d', res.statusCode); socket.destroy(); var error = new Error('tunneling socket could not be established, ' + 'statusCode=' + res.statusCode); error.code = 'ECONNRESET'; options.request.emit('error', error); self.removeSocket(placeholder); return; } if (head.length > 0) { debug('got illegal response body from proxy'); socket.destroy(); var error = new Error('got illegal response body from proxy'); error.code = 'ECONNRESET'; options.request.emit('error', error); self.removeSocket(placeholder); return; } debug('tunneling connection has established'); self.sockets[self.sockets.indexOf(placeholder)] = socket; return cb(socket); } function onError(cause) { connectReq.removeAllListeners(); debug('tunneling socket could not be established, cause=%s\n', cause.message, cause.stack); var error = new Error('tunneling socket could not be established, ' + 'cause=' + cause.message); error.code = 'ECONNRESET'; options.request.emit('error', error); self.removeSocket(placeholder); } }; TunnelingAgent.prototype.removeSocket = function removeSocket(socket) { var pos = this.sockets.indexOf(socket) if (pos === -1) { return; } this.sockets.splice(pos, 1); var pending = this.requests.shift(); if (pending) { // If we have pending requests and a socket gets closed a new one // needs to be created to take over in the pool for the one that closed. this.createSocket(pending, function(socket) { pending.request.onSocket(socket); }); } }; function createSecureSocket(options, cb) { var self = this; TunnelingAgent.prototype.createSocket.call(self, options, function(socket) { var hostHeader = options.request.getHeader('host'); var tlsOptions = mergeOptions({}, self.options, { socket: socket, servername: hostHeader ? hostHeader.replace(/:.*$/, '') : options.host }); // 0 is dummy port for v0.6 var secureSocket = tls.connect(0, tlsOptions); self.sockets[self.sockets.indexOf(socket)] = secureSocket; cb(secureSocket); }); } function toOptions(host, port, localAddress) { if (typeof host === 'string') { // since v0.10 return { host: host, port: port, localAddress: localAddress }; } return host; // for v0.11 or later } function mergeOptions(target) { for (var i = 1, len = arguments.length; i < len; ++i) { var overrides = arguments[i]; if (typeof overrides === 'object') { var keys = Object.keys(overrides); for (var j = 0, keyLen = keys.length; j < keyLen; ++j) { var k = keys[j]; if (overrides[k] !== undefined) { target[k] = overrides[k]; } } } } return target; } var debug; if (process.env.NODE_DEBUG && /\btunnel\b/.test(process.env.NODE_DEBUG)) { debug = function() { var args = Array.prototype.slice.call(arguments); if (typeof args[0] === 'string') { args[0] = 'TUNNEL: ' + args[0]; } else { args.unshift('TUNNEL:'); } console.error.apply(console, args); } } else { debug = function() {}; } exports.debug = debug; // for test /***/ }), /***/ 5030: /***/ ((__unused_webpack_module, exports) => { "use strict"; Object.defineProperty(exports, "__esModule", ({ value: true })); function getUserAgent() { if (typeof navigator === "object" && "userAgent" in navigator) { return navigator.userAgent; } if (typeof process === "object" && "version" in process) { return `Node.js/${process.version.substr(1)} (${process.platform}; ${process.arch})`; } return ""; } exports.getUserAgent = getUserAgent; //# sourceMappingURL=index.js.map /***/ }), /***/ 2940: /***/ ((module) => { // Returns a wrapper function that returns a wrapped callback // The wrapper function should do some stuff, and return a // presumably different callback function. // This makes sure that own properties are retained, so that // decorations and such are not lost along the way. module.exports = wrappy function wrappy (fn, cb) { if (fn && cb) return wrappy(fn)(cb) if (typeof fn !== 'function') throw new TypeError('need wrapper function') Object.keys(fn).forEach(function (k) { wrapper[k] = fn[k] }) return wrapper function wrapper() { var args = new Array(arguments.length) for (var i = 0; i < args.length; i++) { args[i] = arguments[i] } var ret = fn.apply(this, args) var cb = args[args.length-1] if (typeof ret === 'function' && ret !== cb) { Object.keys(cb).forEach(function (k) { ret[k] = cb[k] }) } return ret } } /***/ }), /***/ 2877: /***/ ((module) => { module.exports = eval("require")("encoding"); /***/ }), /***/ 2941: /***/ ((module) => { module.exports = eval("require")("original-fs"); /***/ }), /***/ 2357: /***/ ((module) => { "use strict"; module.exports = require("assert");; /***/ }), /***/ 6417: /***/ ((module) => { "use strict"; module.exports = require("crypto");; /***/ }), /***/ 8614: /***/ ((module) => { "use strict"; module.exports = require("events");; /***/ }), /***/ 5747: /***/ ((module) => { "use strict"; module.exports = require("fs");; /***/ }), /***/ 8605: /***/ ((module) => { "use strict"; module.exports = require("http");; /***/ }), /***/ 7211: /***/ ((module) => { "use strict"; module.exports = require("https");; /***/ }), /***/ 1631: /***/ ((module) => { "use strict"; module.exports = require("net");; /***/ }), /***/ 2087: /***/ ((module) => { "use strict"; module.exports = require("os");; /***/ }), /***/ 5622: /***/ ((module) => { "use strict"; module.exports = require("path");; /***/ }), /***/ 2413: /***/ ((module) => { "use strict"; module.exports = require("stream");; /***/ }), /***/ 4016: /***/ ((module) => { "use strict"; module.exports = require("tls");; /***/ }), /***/ 8835: /***/ ((module) => { "use strict"; module.exports = require("url");; /***/ }), /***/ 1669: /***/ ((module) => { "use strict"; module.exports = require("util");; /***/ }), /***/ 8761: /***/ ((module) => { "use strict"; module.exports = require("zlib");; /***/ }) /******/ }); /************************************************************************/ /******/ // The module cache /******/ var __webpack_module_cache__ = {}; /******/ /******/ // The require function /******/ function __nccwpck_require__(moduleId) { /******/ // Check if module is in cache /******/ if(__webpack_module_cache__[moduleId]) { /******/ return __webpack_module_cache__[moduleId].exports; /******/ } /******/ // Create a new module (and put it into the cache) /******/ var module = __webpack_module_cache__[moduleId] = { /******/ // no module.id needed /******/ // no module.loaded needed /******/ exports: {} /******/ }; /******/ /******/ // Execute the module function /******/ var threw = true; /******/ try { /******/ __webpack_modules__[moduleId].call(module.exports, module, module.exports, __nccwpck_require__); /******/ threw = false; /******/ } finally { /******/ if(threw) delete __webpack_module_cache__[moduleId]; /******/ } /******/ /******/ // Return the exports of the module /******/ return module.exports; /******/ } /******/ /************************************************************************/ /******/ /* webpack/runtime/compat */ /******/ /******/ __nccwpck_require__.ab = __dirname + "/";/************************************************************************/ /******/ // module exports must be returned from runtime so entry inlining is disabled /******/ // startup /******/ // Load entry module and return exports /******/ return __nccwpck_require__(5496); /******/ })() ; ================================================ FILE: .github/actions/check_artifact_exists/dist/licenses.txt ================================================ @actions/core MIT The MIT License (MIT) Copyright 2019 GitHub Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @actions/github MIT @actions/http-client MIT Actions Http Client for Node.js Copyright (c) GitHub, Inc. All rights reserved. MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @octokit/auth-token MIT The MIT License Copyright (c) 2019 Octokit contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @octokit/core MIT The MIT License Copyright (c) 2019 Octokit contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @octokit/endpoint MIT The MIT License Copyright (c) 2018 Octokit contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @octokit/graphql MIT The MIT License Copyright (c) 2018 Octokit contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @octokit/plugin-paginate-rest MIT MIT License Copyright (c) 2019 Octokit contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @octokit/plugin-rest-endpoint-methods MIT MIT License Copyright (c) 2019 Octokit contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @octokit/plugin-throttling MIT The MIT License Copyright (c) 2018 Octokit contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @octokit/request MIT The MIT License Copyright (c) 2018 Octokit contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @octokit/request-error MIT The MIT License Copyright (c) 2019 Octokit contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @vercel/ncc MIT Copyright 2018 ZEIT, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. adm-zip MIT MIT License Copyright (c) 2012 Another-D-Mention Software and other contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. before-after-hook Apache-2.0 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2018 Gregor Martynus and other contributors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. bottleneck MIT The MIT License (MIT) Copyright (c) 2014 Simon Grondin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. deprecation ISC The ISC License Copyright (c) Gregor Martynus and contributors Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. filesize BSD-3-Clause Copyright (c) 2020, Jason Mulligan All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of filesize nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. is-plain-object MIT The MIT License (MIT) Copyright (c) 2014-2017, Jon Schlinkert. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. node-fetch MIT The MIT License (MIT) Copyright (c) 2016 David Frank Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. once ISC The ISC License Copyright (c) Isaac Z. Schlueter and Contributors Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. tunnel MIT The MIT License (MIT) Copyright (c) 2012 Koichi Kobayashi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. universal-user-agent ISC # [ISC License](https://spdx.org/licenses/ISC) Copyright (c) 2018, Gregor Martynus (https://github.com/gr2m) Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. wrappy ISC The ISC License Copyright (c) Isaac Z. Schlueter and Contributors Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ================================================ FILE: .github/actions/check_artifact_exists/main.js ================================================ const core = require('@actions/core'); const github = require('@actions/github'); const AdmZip = require('adm-zip'); const filesize = require('filesize'); const pathname = require('path'); const fs = require('fs'); const { throttling } = require('@octokit/plugin-throttling'); const { GitHub } = require('@actions/github/lib/utils'); async function getGoodArtifacts(client, owner, repo, name) { const goodRepoArtifacts = await client.paginate( "GET /repos/{owner}/{repo}/actions/artifacts", { owner: owner, repo: repo, per_page: 100, }, (repoArtifacts, done) => { // console.log(" ==> repoArtifacts", repoArtifacts); const goodArtifacts = repoArtifacts.data.filter((a) => { // console.log("==> Artifact check", a); return a.name == name }); if (goodArtifacts.length > 0) { done(); } return goodArtifacts; } ); console.log("==> maybe goodRepoArtifacts:", goodRepoArtifacts); return goodRepoArtifacts; } async function main() { const token = core.getInput("github_token", { required: true }); const [owner, repo] = core.getInput("repo", { required: true }).split("/"); const path = core.getInput("path", { required: true }); const name = core.getInput("name"); const download = core.getInput("download"); const OctokitWithThrottling = GitHub.plugin(throttling); const client = new OctokitWithThrottling({ auth: token, throttle: { onRateLimit: (retryAfter, options) => { console.log( `Request quota exhausted for request ${options.method} ${options.url}` ); // Retry twice after hitting a rate limit error, then give up if (options.request.retryCount <= 2) { console.log(`Retrying after ${retryAfter} seconds!`); return true; } }, onAbuseLimit: (retryAfter, options) => { // does not retry, only logs a warning console.log( `Abuse detected for request ${options.method} ${options.url}` ); }, }, }); console.log("==> Repo:", owner + "/" + repo); const goodArtifacts = await getGoodArtifacts(client, owner, repo, name); console.log("==> goodArtifacts:", goodArtifacts); let artifactStatus = ""; if (goodArtifacts.length === 0) { artifactStatus = "missing"; } else { artifactStatus = "found"; } console.log("==> Artifact", name, artifactStatus); console.log("==> download", download); core.setOutput("status", artifactStatus); if (artifactStatus === "found" && download == "true") { console.log("==> # artifacts:", goodArtifacts.length); let artifact = goodArtifacts[0]; console.log("==> Artifact:", artifact.id) const size = filesize(artifact.size_in_bytes, { base: 10 }) console.log("==> Downloading:", artifact.name + ".zip", `(${size})`) const zip = await client.actions.downloadArtifact({ owner: owner, repo: repo, artifact_id: artifact.id, archive_format: "zip", }) const dir = name ? path : pathname.join(path, artifact.name) fs.mkdirSync(dir, { recursive: true }) const adm = new AdmZip(Buffer.from(zip.data)) adm.getEntries().forEach((entry) => { const action = entry.isDirectory ? "creating" : "inflating" const filepath = pathname.join(dir, entry.entryName) console.log(` ${action}: ${filepath}`) }) adm.extractAllTo(dir, true) } if (artifactStatus === "missing" && download == "true") { core.setFailed("Required", name, "that is missing"); } return; } // We have to manually wrap the main function with a try-catch here because // GitHub will ignore uncatched exceptions and continue running the workflow, // leading to harder to diagnose errors downstream from this action. try { main(); } catch (error) { core.setFailed(error.message); } ================================================ FILE: .github/actions/check_artifact_exists/package.json ================================================ { "name": "check_artifact_exists", "main": "main.js", "devDependencies": { "@actions/core": "^1.2.6", "@actions/github": "^4.0.0", "@octokit/plugin-throttling": "^3.4.1", "@vercel/ncc": "^0.27.0", "adm-zip": "^0.5.2", "filesize": "^6.1.0" } } ================================================ FILE: .github/actions/chroot-bind-mount/action.yml ================================================ name: "chroot bind mount" description: "Bind mount into chroot" inputs: mounts: description: "Path to consider" required: true runs: using: "composite" steps: - id: install_qemu run: | sudo apt-get update -y sudo apt-get install -y --no-install-recommends qemu-user-static shell: bash - id: bind_mount_chroot run: | set -xe # Bind-mount so that we have the same tree inside the chroot for dev in ${{ github.workspace }} ${{ inputs.mounts }}; do sudo mount -o bind ${dev} ${{ env.SYSTEM_RASPBIAN }}${dev} done; for dev in ${{ inputs.mounts }}; do sudo mount -o bind /${dev} ${{ env.SYSTEM_RASPBIAN }}/${dev} done; shell: bash ================================================ FILE: .github/actions/get_cache_key/README.md ================================================ GitHub Action to compute cache key ================================== It is intended to work in harmony with `check_artifact_exists`: - compute a stable cache key - as simple to use as possible (less parameters) It will expect to be ran in a GitHub Action job that follows `SUBMODULE_FLAVOR-PLATFORM`: - it will use the `SUBMODULE` part to check what is the current SHA1 of this git submodule. - the `FLAVOR` allows to distringuish e.g., opt/dbg builds - the PLATFORM permits defining an os/arch couple It allows for an `extras` field for extensive customization, like forcing a re-build. ================================================ FILE: .github/actions/get_cache_key/action.yml ================================================ name: "get cache key for submodule" description: "Compute a cache key based on git submodule" inputs: extras: description: "Extra cache key value" required: true osarch: description: "Override automatic OSARCH value" required: false outputs: key: description: "Computed cache key name" value: ${{ steps.compute_cache_key.outputs.key }} runs: using: "composite" steps: - id: compute_cache_key run: | JOB=${{ github.job }} SUBMODULE=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f1) FLAVOR=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f2) if [ -z "${{ inputs.osarch }}" ]; then OSARCH=$(echo $JOB | cut -d'-' -f2) else OSARCH=${{ inputs.osarch }} fi SHA=$(git submodule status ${SUBMODULE} | sed -e 's/^-//g' -e 's/^+//g' -e 's/^U//g' | awk '{ print $1 }') KEY=${SUBMODULE}-${FLAVOR}_${OSARCH}_${SHA}_${{ inputs.extras }} echo "::set-output name=key::${KEY}" shell: bash ================================================ FILE: .github/actions/host-build/action.yml ================================================ name: "Run build lib" description: "Run build of lib" inputs: arch: description: "Target arch for loading script (host/armv7/aarch64)" required: false default: "host" flavor: description: "Build flavor" required: true runs: using: "composite" steps: - run: ./ci_scripts/${{ inputs.arch }}-build.sh ${{ inputs.flavor }} shell: bash ================================================ FILE: .github/actions/install-python-upstream/action.yml ================================================ name: "Install Python" description: "Installing an upstream python release" inputs: version: description: "Python version" required: true runs: using: "composite" steps: - shell: bash run: | set -xe curl https://www.python.org/ftp/python/${{ inputs.version }}/python-${{ inputs.version }}-macosx10.9.pkg -o "python.pkg" - shell: bash run: ls -hal . - shell: bash run: | set -xe sudo installer -verbose -pkg python.pkg -target / - shell: bash run: | set -xe which python3 python3 --version python3 -c "import sysconfig; print(sysconfig.get_config_var('MACOSX_DEPLOYMENT_TARGET'))" - shell: bash name: Set up venv with upstream Python run: | python3 -m venv /tmp/venv echo "/tmp/venv/bin" >> $GITHUB_PATH ================================================ FILE: .github/actions/install-xldd/action.yml ================================================ name: "xldd install" description: "Install xldd" inputs: target: description: "System target" required: true runs: using: "composite" steps: - id: install_xldd run: | source ./ci_scripts/all-vars.sh # -s required to avoid the noisy output like "Entering / Leaving directories" toolchain=$(make -s -C ${DS_DSDIR}/native_client/ TARGET=${{ inputs.target }} TFDIR=${DS_TFDIR} print-toolchain) if [ ! -x "${toolchain}ldd" ]; then cp "${DS_DSDIR}/native_client/xldd" "${toolchain}ldd" && chmod +x "${toolchain}ldd" fi shell: bash ================================================ FILE: .github/actions/multistrap/action.yml ================================================ name: "multistrap install" description: "Install a system root using multistrap" inputs: arch: description: "Target arch" required: true packages: description: "Extra packages to install" required: false default: "" runs: using: "composite" steps: - id: install_multistrap run: | sudo apt-get update -y sudo apt-get install -y --no-install-recommends multistrap qemu-user-static shell: bash - id: create_chroot run: | set -xe multistrap_conf="" if [ "${{ inputs.arch }}" = "armv7" ]; then multistrap_conf=multistrap_raspbian_buster.conf wget http://archive.raspbian.org/raspbian/pool/main/r/raspbian-archive-keyring/raspbian-archive-keyring_20120528.2_all.deb && sudo dpkg -i raspbian-archive-keyring_20120528.2_all.deb fi if [ "${{ inputs.arch }}" = "aarch64" ]; then multistrap_conf=multistrap_armbian64_buster.conf fi multistrap -d ${{ env.SYSTEM_RASPBIAN }} -f ${{ github.workspace }}/native_client/${multistrap_conf} if [ ! -z "${{ inputs.packages }}" ]; then TO_MOUNT=${{ github.workspace }} # Prepare target directory to bind-mount the github tree mkdir -p ${{ env.SYSTEM_RASPBIAN }}/${{ github.workspace }} # Bind-mount so that we have the same tree inside the chroot for dev in ${TO_MOUNT}; do sudo mount -o bind ${dev} ${{ env.SYSTEM_RASPBIAN }}${dev} done; # Copy some host data: # resolv.conf: for getting DNS working # passwd, group, shadow: to have user accounts and apt-get install working for ff in resolv.conf passwd group shadow; do sudo cp /etc/${ff} ${{ env.SYSTEM_RASPBIAN }}/etc/ done; # Perform apt steps. # Preserving the env is required sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get update -y sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get install -y --no-install-recommends ${{ inputs.packages }} # Cleanup apt info to save space sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ rm -fr /var/cache/apt/* /var/lib/apt/lists/* # Unmount what has been mounted for dev in ${TO_MOUNT}; do sudo umount ${{ env.SYSTEM_RASPBIAN }}${dev} done; fi shell: bash ================================================ FILE: .github/actions/node-build/action.yml ================================================ name: "NodeJS binding" description: "Binding a nodejs binding" inputs: nodejs_versions: description: "NodeJS versions supported" required: true electronjs_versions: description: "ElectronJS versions supported" required: true local_cflags: description: "CFLAGS for NodeJS package" required: false default: "" local_ldflags: description: "LDFLAGS for NodeJS package" required: false default: "" local_libs: description: "LIBS for NodeJS package" required: false default: "" target: description: "TARGET value" required: false default: "host" chroot: description: "RASPBIAN value" required: false default: "" runs: using: "composite" steps: - run: | node --version npm --version shell: bash - run: | npm update shell: bash - run: | mkdir -p tmp/headers/nodejs tmp/headers/electronjs shell: bash - run: | for node in ${{ inputs.nodejs_versions }}; do EXTRA_CFLAGS=${{ inputs.local_cflags }} \ EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \ EXTRA_LIBS=${{ inputs.local_libs }} \ make -C native_client/javascript \ TARGET=${{ inputs.target }} \ RASPBIAN=${{ inputs.chroot }} \ NODE_ABI_TARGET=--target=${node} \ NODE_DEVDIR=--devdir=headers/nodejs \ clean node-wrapper done; shell: bash - run: | for electron in ${{ inputs.electronjs_versions }}; do EXTRA_CFLAGS=${{ inputs.local_cflags }} \ EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \ EXTRA_LIBS=${{ inputs.local_libs }} \ make -C native_client/javascript \ TARGET=${{ inputs.target }} \ RASPBIAN=${{ inputs.chroot }} \ NODE_ABI_TARGET=--target=${electron} \ NODE_DIST_URL=--disturl=https://electronjs.org/headers \ NODE_RUNTIME=--runtime=electron \ NODE_DEVDIR=--devdir=headers/electronjs \ clean node-wrapper done; shell: bash - run: | make -C native_client/javascript clean npm-pack shell: bash - run: | tar -czf native_client/javascript/wrapper.tar.gz \ -C native_client/javascript/ lib/ shell: bash ================================================ FILE: .github/actions/node-install/action.yml ================================================ name: "nodejs install" description: "Install nodejs in a chroot" inputs: node: description: "NodeJS version" required: true runs: using: "composite" steps: - id: add_apt_source run: | set -ex (echo "Package: nodejs" && echo "Pin: origin deb.nodesource.com" && echo "Pin-Priority: 999") > ${{ env.SYSTEM_RASPBIAN }}/etc/apt/preferences echo "deb http://deb.nodesource.com/node_${{ inputs.node }}.x buster main" > ${{ env.SYSTEM_RASPBIAN }}/etc/apt/sources.list.d/nodesource.list wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-key add - shell: bash - id: install_nodejs run: | set -ex sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get update -y sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get install -y nodejs shell: bash ================================================ FILE: .github/actions/numpy_vers/README.md ================================================ GitHub Action to set NumPy versions =================================== This actions aims at computing correct values for NumPy dependencies: - `NUMPY_BUILD_VERSION`: range of accepted versions at Python binding build time - `NUMPY_DEP_VERSION`: range of accepted versions for execution time Versions are set considering several factors: - API and ABI compatibility ; otherwise we can have the binding wrapper throwing errors like "Illegal instruction", or computing wrong values because of changed memory layout - Wheels availability: for CI and end users, we want to avoid having to rebuild numpy so we stick to versions where there is an existing upstream `wheel` file ================================================ FILE: .github/actions/numpy_vers/action.yml ================================================ name: "get numpy versions" description: "Get proper NumPy build and runtime versions dependencies range" inputs: pyver: description: "Python version" required: true outputs: build_version: description: "NumPy build dependency" value: ${{ steps.numpy.outputs.build }} dep_version: description: "NumPy runtime dependency" value: ${{ steps.numpy.outputs.dep }} runs: using: "composite" steps: - id: numpy run: | set -ex NUMPY_BUILD_VERSION="==1.7.0" NUMPY_DEP_VERSION=">=1.7.0" OS=$(uname -s) ARCH=$(uname -m) case "${OS}:${ARCH}" in Linux:x86_64) case "${{ inputs.pyver }}" in 3.7*) NUMPY_BUILD_VERSION="==1.14.5" NUMPY_DEP_VERSION=">=1.14.5" ;; 3.8*) NUMPY_BUILD_VERSION="==1.17.3" NUMPY_DEP_VERSION=">=1.17.3" ;; 3.9*) NUMPY_BUILD_VERSION="==1.19.4" NUMPY_DEP_VERSION=">=1.19.4" ;; esac ;; Darwin:*) case "${{ inputs.pyver }}" in 3.6*) NUMPY_BUILD_VERSION="==1.9.0" NUMPY_DEP_VERSION=">=1.9.0" ;; 3.7*) NUMPY_BUILD_VERSION="==1.14.5" NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0" ;; 3.8*) NUMPY_BUILD_VERSION="==1.17.3" NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3" ;; 3.9*) NUMPY_BUILD_VERSION="==1.19.4" NUMPY_DEP_VERSION=">=1.19.4" ;; esac ;; ${CI_MSYS_VERSION}:x86_64) case "${{ inputs.pyver }}" in 3.5*) NUMPY_BUILD_VERSION="==1.11.0" NUMPY_DEP_VERSION=">=1.11.0,<1.12.0" ;; 3.6*) NUMPY_BUILD_VERSION="==1.12.0" NUMPY_DEP_VERSION=">=1.12.0,<1.14.5" ;; 3.7*) NUMPY_BUILD_VERSION="==1.14.5" NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0" ;; 3.8*) NUMPY_BUILD_VERSION="==1.17.3" NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3" ;; 3.9*) NUMPY_BUILD_VERSION="==1.19.4" NUMPY_DEP_VERSION=">=1.19.4" ;; esac ;; esac echo "::set-output name=build::${NUMPY_BUILD_VERSION}" echo "::set-output name=dep::${NUMPY_DEP_VERSION}" shell: bash ================================================ FILE: .github/actions/package/action.yml ================================================ name: "Package lib" description: "Package of lib" runs: using: "composite" steps: - run: ./ci_scripts/package.sh shell: bash ================================================ FILE: .github/actions/package-tensorflow/action.yml ================================================ name: "Package TensorFlow" description: "Package TensorFlow Build" runs: using: "composite" steps: - run: ./ci_scripts/tf-package.sh shell: bash ================================================ FILE: .github/actions/python-build/action.yml ================================================ name: "Python binding" description: "Binding a python binding" inputs: build_flavor: description: "Python package name" required: true numpy_build: description: "NumPy build dependecy" required: true numpy_dep: description: "NumPy runtime dependecy" required: true local_cflags: description: "CFLAGS for Python package" required: false default: "" local_ldflags: description: "LDFLAGS for Python package" required: false default: "" local_libs: description: "LIBS for Python package" required: false default: "" target: description: "TARGET value" required: false default: "host" chroot: description: "RASPBIAN value" required: false default: "" runs: using: "composite" steps: - run: | python3 --version pip3 --version python3 -m pip install virtualenv python3 -m virtualenv deepspeech-build shell: bash - run: | mkdir -p wheels shell: bash - run: | set -xe PROJECT_NAME="deepspeech" if [ "${{ inputs.build_flavor }}" = "tflite" ]; then PROJECT_NAME="deepspeech-tflite" fi OS=$(uname) if [ "${OS}" = "Linux" ]; then source deepspeech-build/bin/activate fi NUMPY_BUILD_VERSION="${{ inputs.numpy_build }}" \ NUMPY_DEP_VERSION="${{ inputs.numpy_dep }}" \ EXTRA_CFLAGS=${{ inputs.local_cflags }} \ EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \ EXTRA_LIBS=${{ inputs.local_libs }} \ make -C native_client/python/ \ TARGET=${{ inputs.target }} \ RASPBIAN=${{ inputs.chroot }} \ SETUP_FLAGS="--project_name ${PROJECT_NAME}" \ bindings-clean bindings if [ "${OS}" = "Linux" ]; then deactivate fi shell: bash - run: | cp native_client/python/dist/*.whl wheels shell: bash - run: | make -C native_client/python/ bindings-clean shell: bash ================================================ FILE: .github/actions/run-tests/action.yml ================================================ name: "Tests execution" description: "Running DeepSpeech tests" inputs: runtime: description: "Runtime to use for running test" required: true build-flavor: description: "Running against TF or TFLite" required: true model-kind: description: "Running against CI baked or production model" required: true bitrate: description: "Bitrate for testing" required: true chroot: description: "Run using a chroot" required: false runs: using: "composite" steps: - run: | set -xe build="" if [ "${{ inputs.build-flavor }}" = "tflite" ]; then build="_tflite" fi model_kind="" if [ "${{ inputs.model-kind }}" = "prod" ]; then model_kind="-prod" fi prefix="." if [ ! -z "${{ inputs.chroot }}" ]; then prefix="${{ inputs.chroot }}" fi ${prefix}/ci_scripts/${{ inputs.runtime }}${build}-tests${model_kind}.sh ${{ inputs.bitrate }} shell: bash ================================================ FILE: .github/actions/select-xcode/action.yml ================================================ name: "Select XCode version" description: "Select XCode version" inputs: version: description: "XCode version" required: true runs: using: "composite" steps: - run: sudo xcode-select --switch /Applications/Xcode_${{ inputs.version }}.app shell: bash ================================================ FILE: .github/actions/setup-tensorflow/action.yml ================================================ name: "Setup TensorFlow" description: "Setup TensorFlow Build" runs: using: "composite" steps: - run: ./ci_scripts/tf-setup.sh shell: bash ================================================ FILE: .github/actions/win-install-sox/action.yml ================================================ name: "Install SoX and add to PATH" description: "Install SoX and add to PATH" runs: using: "composite" steps: - run: | set -ex wget https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2-win32.zip/download -O sox-14.4.2-win32.zip "C:/Program Files/7-Zip/7z.exe" x -o`pwd`/bin/ -tzip -aoa sox-14.4.2-win32.zip rm sox-*zip echo "`pwd`/bin/sox-14.4.2/" >> $GITHUB_PATH shell: bash ================================================ FILE: .github/lock.yml ================================================ # Configuration for lock-threads - https://github.com/dessant/lock-threads # Number of days of inactivity before a closed issue or pull request is locked daysUntilLock: 30 # Skip issues and pull requests created before a given timestamp. Timestamp must # follow ISO 8601 (`YYYY-MM-DD`). Set to `false` to disable skipCreatedBefore: false # Issues and pull requests with these labels will not be locked. Set to `[]` to disable exemptLabels: [] # Label to add before locking, such as `outdated`. Set to `false` to disable lockLabel: false # Comment to post before locking. Set to `false` to disable lockComment: > This thread has been automatically locked since there has not been any recent activity after it was closed. Please open a new issue for related bugs. # Assign `resolved` as the reason for locking. Set to `false` to disable setLockReason: false # Limit to only `issues` or `pulls` # only: issues # Optionally, specify configuration settings just for `issues` or `pulls` # issues: # exemptLabels: # - help-wanted # lockLabel: outdated # pulls: # daysUntilLock: 30 # Repository to extend settings from # _extends: repo ================================================ FILE: .github/workflows/.git-keep-empty-folder ================================================ ================================================ FILE: .github/workflows/build-and-test.yml ================================================ name: "Builds and tests" on: pull_request: push: branches: - master env: # Shared variables CI_TASK_DIR: ${{ github.workspace }} CI_ARTIFACTS_DIR: ${{ github.workspace }}/artifacts # macOS specific MACOSX_DEPLOYMENT_TARGET: "10.10" CI_NODE_MODULES_NTH: 1 # Windows specific CI_MSYS_VERSION: MSYS_NT-10.0-17763 MSYS2_SHELL_PATH: D:\a\_temp\msys\msys64\usr\bin defaults: run: shell: bash jobs: # Linux jobs swig_Windows_crosscompiled: name: "Lin|Build SWIG for Windows" runs-on: ubuntu-20.04 env: swig_hash: "90cdbee6a69d13b39d734083b9f91069533b0d7b" steps: - uses: actions/checkout@v2 with: repository: "swig/swig" ref: ${{ env.swig_hash }} - run: | mkdir -p build-static/ - uses: actions/cache@v2 id: swig-build-cache with: path: build-static/ key: swig-win-3-${{ env.swig_hash }} - run: | sudo apt-get install -y --no-install-recommends autoconf automake bison build-essential mingw-w64 if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | curl -sSL https://ftp.pcre.org/pub/pcre/pcre-8.43.tar.gz > pcre-8.43.tar.gz if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | ./Tools/pcre-build.sh --host=x86_64-w64-mingw32 if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | sh autogen.sh CFLAGS="-static-libgcc -static-libstdc++" \ CXXFLAGS="-static-libgcc -static-libstdc++" \ ./configure \ --host=x86_64-w64-mingw32 \ --prefix=`pwd`/build-static/ \ --program-prefix=ds- if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | make -j if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | make install if: steps.swig-build-cache.outputs.cache-hit != 'true' - uses: actions/upload-artifact@v2 with: name: ${{ github.job }} path: ${{ github.workspace }}/build-static/ swig_Linux: name: "Lin|Build SWIG" runs-on: ubuntu-20.04 env: swig_hash: "90cdbee6a69d13b39d734083b9f91069533b0d7b" steps: - uses: actions/checkout@v2 with: repository: "swig/swig" ref: ${{ env.swig_hash }} - run: | mkdir -p build-static/ - uses: actions/cache@v2 id: swig-build-cache with: path: build-static/ key: swig-2-${{ runner.os }}-${{ env.swig_hash }} - run: | sudo apt-get install -y --no-install-recommends autoconf automake bison build-essential if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | curl -sSL https://ftp.pcre.org/pub/pcre/pcre-8.43.tar.gz > pcre-8.43.tar.gz if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | ./Tools/pcre-build.sh if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | sh autogen.sh ./configure \ --prefix=${{ github.workspace }}/build-static/ \ --program-prefix=ds- if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | make -j if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | make install if: steps.swig-build-cache.outputs.cache-hit != 'true' - uses: actions/upload-artifact@v2 with: name: ${{ github.job }} path: ${{ github.workspace }}/build-static/ build-ctc-decoder-Linux: name: "Lin|Build CTC decoder Python package for testing" needs: [ swig_Linux ] runs-on: ubuntu-20.04 if: ${{ github.event_name == 'pull_request' }} steps: - uses: actions/checkout@v2 with: fetch-depth: 0 - uses: actions/setup-python@v2 with: python-version: 3.6 - run: | python --version pip --version - uses: actions/download-artifact@v2 with: name: "swig_Linux" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - id: get_numpy uses: ./.github/actions/numpy_vers with: pyver: 3.6 - name: Make decoder package run: | NUMPY_BUILD_VERSION=${{ steps.get_numpy.outputs.build_version }} \ NUMPY_DEP_VERSION=${{ steps.get_numpy.outputs.dep_version }} \ make -C native_client/ctcdecode/ \ NUM_PROCESSES=$(nproc) \ bindings - uses: actions/upload-artifact@v2 with: name: "ds_ctcdecoder-Linux-test.whl" path: ${{ github.workspace }}/native_client/ctcdecode/dist/*.whl - run: | make -C native_client/ctcdecode clean-keep-third-party train-test-model-Linux: name: "Lin|Train a test model" needs: [ "build-ctc-decoder-Linux" ] runs-on: ubuntu-20.04 if: ${{ github.event_name == 'pull_request' }} strategy: matrix: bitrate: ["8k", "16k"] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-python@v2 with: python-version: 3.6 - uses: actions/download-artifact@v2 with: name: "ds_ctcdecoder-Linux-test.whl" - run: | python --version pip --version - run: | pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 - run: | pip install ds_ctcdecoder-*-cp36-cp36m-*_x86_64.whl DS_NODECODER=y pip install --upgrade . - run: | bits="" if [ "${{ matrix.bitrate }}" = "8k" ]; then bits=8000 fi if [ "${{ matrix.bitrate }}" = "16k" ]; then bits=16000 fi # Easier to rename to that we can exercize the LDC93S1 importer code to # generate the CSV file. echo "Moving ${bits} to LDC93S1.wav" mv data/smoke_test/LDC93S1_pcms16le_1_${bits}.wav data/smoke_test/LDC93S1.wav ./bin/run-ci-ldc93s1_new.sh 249 ${bits} ./bin/run-ci-ldc93s1_tflite.sh ${bits} - run: | curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > /tmp/convert_graphdef_memmapped_format chmod +x /tmp/convert_graphdef_memmapped_format /tmp/convert_graphdef_memmapped_format --in_graph=/tmp/train/output_graph.pb --out_graph=/tmp/train/output_graph.pbmm - run: | tar -cf - \ -C /tmp/ckpt/ . \ | xz -9 -T0 > /tmp/checkpoint.tar.xz - run: | mkdir -p ${{ github.workspace }}/tmp/ cp /tmp/train*/output_graph.* /tmp/checkpoint.tar.xz ${{ github.workspace }}/tmp/ - run: | ls -hal /tmp/ ${{ github.workspace }}/tmp/ - uses: actions/upload-artifact@v2 with: name: "test-model.tf-${{ matrix.bitrate }}.zip" path: ${{ github.workspace }}/tmp/output_graph.pb* - uses: actions/upload-artifact@v2 with: name: "test-model.tflite-${{ matrix.bitrate }}.zip" path: ${{ github.workspace }}/tmp/output_graph.tflite - uses: actions/upload-artifact@v2 with: name: "test-checkpoint.${{ matrix.bitrate }}.zip" path: ${{ github.workspace }}/tmp/checkpoint.tar.xz tensorflow_opt-Linux: name: "Lin|Check TensorFlow cache" runs-on: ubuntu-20.04 outputs: status: ${{ steps.check_artifact_exists.outputs.status }} cache_key: ${{ steps.get_cache_key.outputs.key }} steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - id: get_cache_key uses: ./.github/actions/get_cache_key with: extras: "2" - id: check_artifact_exists uses: ./.github/actions/check_artifact_exists with: name: ${{ steps.get_cache_key.outputs.key }} build-tensorflow-Linux: name: "Lin|Build TensorFlow (opt)" needs: tensorflow_opt-Linux runs-on: ubuntu-20.04 steps: - run: true if: needs.tensorflow_opt-Linux.outputs.status == 'found' - uses: actions/checkout@v2 with: fetch-depth: 0 submodules: 'recursive' if: needs.tensorflow_opt-Linux.outputs.status == 'missing' - run: | sudo apt-get install -y --no-install-recommends pixz if: needs.tensorflow_opt-Linux.outputs.status == 'missing' - uses: ./.github/actions/setup-tensorflow if: needs.tensorflow_opt-Linux.outputs.status == 'missing' - uses: ./.github/actions/build-tensorflow with: flavor: "--linux-cpu" if: needs.tensorflow_opt-Linux.outputs.status == 'missing' - uses: ./.github/actions/package-tensorflow if: needs.tensorflow_opt-Linux.outputs.status == 'missing' - uses: actions/upload-artifact@v2 with: name: ${{ needs.tensorflow_opt-Linux.outputs.cache_key }} path: ${{ github.workspace }}/artifacts/home.tar.xz if: needs.tensorflow_opt-Linux.outputs.status == 'missing' build-lib_Linux: name: "Lin|Build libdeepspeech+client" runs-on: ubuntu-20.04 needs: [ build-tensorflow-Linux, tensorflow_opt-Linux ] strategy: matrix: build-flavor: ["tf", "tflite"] steps: - uses: actions/checkout@v2 with: fetch-depth: 0 - uses: actions/download-artifact@v2 with: name: ${{ needs.tensorflow_opt-Linux.outputs.cache_key }} path: ${{ github.workspace }}/ if: needs.tensorflow_opt-Linux.outputs.status == 'missing' - uses: ./.github/actions/check_artifact_exists with: name: ${{ needs.tensorflow_opt-Linux.outputs.cache_key }} path: ${{ github.workspace }}/ download: true if: needs.tensorflow_opt-Linux.outputs.status == 'found' - run: | tar --skip-old-files -xf ${{ github.workspace }}/home.tar.xz rm ${{ github.workspace }}/home.tar.xz - run: | sudo apt-get install -y --no-install-recommends make build-essential gfortran git libblas-dev liblapack-dev libsox-dev libmagic-dev libgsm1-dev libltdl-dev libpng-dev python python-dev zlib1g-dev - run: | git status - uses: ./.github/actions/host-build with: flavor: ${{ matrix.build-flavor }} - uses: ./.github/actions/package - uses: actions/upload-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.Linux.tar.xz" path: ${{ github.workspace }}/artifacts/native_client.tar.xz - uses: actions/upload-artifact@v2 with: name: "libdeepspeech.${{ matrix.build-flavor }}.zip" path: ${{ github.workspace }}/artifacts/libdeepspeech.zip build-python-Linux: name: "Lin|Build Python bindings" runs-on: ubuntu-20.04 needs: [ build-lib_Linux, swig_Linux ] strategy: matrix: build-flavor: ["tf", "tflite"] python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.Linux.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | cd ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ tar xf native_client.tar.xz ls -hal cd ${{ github.workspace }}/ - uses: actions/download-artifact@v2 with: name: "swig_Linux" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - id: get_numpy uses: ./.github/actions/numpy_vers with: pyver: ${{ matrix.python-version }} - uses: ./.github/actions/python-build with: build_flavor: ${{ matrix.build-flavor }} numpy_build: "${{ steps.get_numpy.outputs.build_version }}" numpy_dep: "${{ steps.get_numpy.outputs.dep_version }}" - uses: actions/upload-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}-${{ matrix.python-version }}-Linux.whl" path: ${{ github.workspace }}/wheels/*.whl build-nodejs-Linux: name: "Lin|Build NodeJS and ElectronJS" runs-on: ubuntu-20.04 needs: [ build-lib_Linux, swig_Linux ] strategy: matrix: build-flavor: ["tf", "tflite"] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.Linux.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | cd ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ tar xf native_client.tar.xz ls -hal cd ${{ github.workspace }}/ - uses: actions/download-artifact@v2 with: name: "swig_Linux" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - uses: actions/setup-node@v2 with: node-version: 12 - uses: actions/cache@v2 id: node-headers-cache with: path: native_client/javascript/headers/nodejs/ key: node-headers-10.0.0_16.0.0 - uses: actions/cache@v2 id: electron-headers-cache with: path: native_client/javascript/headers/electronjs/ key: electron-headers-5.0.13_12.0.0 - uses: ./.github/actions/node-build with: nodejs_versions: "10.0.0 11.0.0 12.7.0 13.0.0 14.0.0 15.0.0 16.0.0" electronjs_versions: "5.0.13 6.0.12 6.1.7 7.0.1 7.1.8 8.0.1 9.0.1 9.1.0 9.2.0 10.0.0 10.1.0 11.0.0 12.0.0" - uses: actions/upload-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-Linux_amd64.tar.gz" path: ${{ github.workspace }}/native_client/javascript/wrapper.tar.gz - uses: actions/upload-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-Linux.tgz" path: ${{ github.workspace }}/native_client/javascript/deepspeech-*.tgz test-cpp-Linux: name: "Lin|Test C++ binary" runs-on: ubuntu-20.04 needs: [ build-lib_Linux, train-test-model-Linux ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.Linux.tar.xz" path: ${{ env.CI_TMP_DIR }} - run: | cd ${{ env.CI_TMP_DIR }} mkdir ds && cd ds && tar xf ../native_client.tar.xz - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: ./.github/actions/run-tests with: runtime: "cpp" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-py-Linux: name: "Lin|Test Python bindings" runs-on: ubuntu-20.04 needs: [ build-python-Linux, train-test-model-Linux ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: python-version: [3.6, 3.7, 3.8, 3.9] build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - run: | sudo apt-get install -y --no-install-recommends sox - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}-${{ matrix.python-version }}-Linux.whl" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ pip3 install --only-binary :all: --upgrade ${{ env.CI_TMP_DIR }}/deepspeech*.whl - uses: ./.github/actions/run-tests with: runtime: "python" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-nodejs-Linux: name: "Lin|Test NodeJS bindings" runs-on: ubuntu-20.04 needs: [ build-nodejs-Linux, train-test-model-Linux ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: # https://nodejs.org/en/about/releases/ nodejs-version: [10, 12, 14, 16] build-flavor: ["tf", "tflite"] models: ["test"] bitrate: ["16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: ${{ matrix.nodejs-version }} - run: | sudo apt-get install -y --no-install-recommends sox - uses: actions/download-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-Linux.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: actions/cache@v2 id: node-modules-cache with: path: ~/.npm/ key: node-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install --verbose ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | ls -hal node_modules/deepspeech* node_modules/.bin/ - uses: ./.github/actions/run-tests with: runtime: "node" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-electronjs-Linux: name: "Lin|Test ElectronJS bindings" runs-on: ubuntu-20.04 needs: [ build-nodejs-Linux, train-test-model-Linux ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: electronjs-version: [5.0.13, 6.1.7, 7.1.8, 8.0.1, 9.2.0, 10.1.0, 11.0.0, 12.0.0] build-flavor: ["tf", "tflite"] models: ["test"] bitrate: ["16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: 12 - run: | sudo apt-get install -y --no-install-recommends sox - uses: actions/download-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-Linux.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: actions/cache@v2 id: electron-modules-cache with: path: ~/.npm/ key: electron-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | npm install electron@${{ matrix.electronjs-version }} - uses: ./.github/actions/run-tests with: runtime: "electronjs" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} timeout-minutes: 5 # macOS jobs swig_macOS: name: "Mac|Build SWIG" runs-on: macos-10.15 env: swig_hash: "90cdbee6a69d13b39d734083b9f91069533b0d7b" steps: - uses: actions/checkout@v2 with: repository: "swig/swig" ref: ${{ env.swig_hash }} - run: | mkdir -p build-static/ - uses: actions/cache@v2 id: swig-build-cache with: path: build-static/ key: swig-${{ runner.os }}-${{ env.swig_hash }} - run: | brew install automake if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | curl -sSL https://ftp.pcre.org/pub/pcre/pcre-8.43.tar.gz > pcre-8.43.tar.gz if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | ./Tools/pcre-build.sh if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | sh autogen.sh ./configure \ --prefix=${{ github.workspace }}/build-static/ \ --program-prefix=ds- if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | make -j if: steps.swig-build-cache.outputs.cache-hit != 'true' - run: | make install if: steps.swig-build-cache.outputs.cache-hit != 'true' - uses: actions/upload-artifact@v2 with: name: ${{ github.job }} path: ${{ github.workspace }}/build-static/ build-ctc-decoder-macos: name: "Mac|Build CTC decoder Python package for testing" needs: [ swig_macOS ] runs-on: macos-10.15 if: ${{ github.event_name == 'pull_request' }} steps: - uses: actions/checkout@v2 with: fetch-depth: 0 - uses: ./.github/actions/install-python-upstream with: version: 3.6.8 - run: | python --version pip --version - uses: actions/download-artifact@v2 with: name: "swig_macOS" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - id: get_numpy uses: ./.github/actions/numpy_vers with: pyver: 3.6.8 - name: Make decoder package run: | NUMPY_BUILD_VERSION=${{ steps.get_numpy.outputs.build_version }} \ NUMPY_DEP_VERSION=${{ steps.get_numpy.outputs.dep_version }} \ make -C native_client/ctcdecode/ \ NUM_PROCESSES=$(sysctl hw.ncpu |cut -d' ' -f2) \ bindings - uses: actions/upload-artifact@v2 with: name: "ds_ctcdecoder-macOS-test.whl" path: ${{ github.workspace }}/native_client/ctcdecode/dist/*.whl - run: | make -C native_client/ctcdecode clean-keep-third-party train-test-model-macOS: name: "Mac|Train a test model" needs: [ "build-ctc-decoder-macos" ] runs-on: macos-10.15 if: ${{ github.event_name == 'pull_request' }} strategy: matrix: bitrate: ["8k", "16k"] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-python@v2 with: python-version: 3.6 - uses: actions/download-artifact@v2 with: name: "ds_ctcdecoder-macOS-test.whl" - run: | python --version pip --version - run: | pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 - run: | pip install ds_ctcdecoder-*-cp36-cp36m-*_x86_64.whl DS_NODECODER=y pip install --upgrade . - run: | bits="" if [ "${{ matrix.bitrate }}" = "8k" ]; then bits=8000 fi if [ "${{ matrix.bitrate }}" = "16k" ]; then bits=16000 fi # Easier to rename to that we can exercize the LDC93S1 importer code to # generate the CSV file. echo "Moving ${bits} to LDC93S1.wav" mv data/smoke_test/LDC93S1_pcms16le_1_${bits}.wav data/smoke_test/LDC93S1.wav ./bin/run-ci-ldc93s1_new.sh 249 ${bits} ./bin/run-ci-ldc93s1_tflite.sh ${bits} - run: | curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/macOS.amd64.convert_graphdef_memmapped_format.xz | xz -d > /tmp/convert_graphdef_memmapped_format chmod +x /tmp/convert_graphdef_memmapped_format /tmp/convert_graphdef_memmapped_format --in_graph=/tmp/train/output_graph.pb --out_graph=/tmp/train/output_graph.pbmm - run: | tar -cf - \ -C /tmp/ckpt/ . \ | xz -9 -T0 > /tmp/checkpoint.tar.xz - run: | mkdir -p ${{ github.workspace }}/tmp/ cp /tmp/train*/output_graph.* /tmp/checkpoint.tar.xz ${{ github.workspace }}/tmp/ - run: | ls -hal /tmp/ ${{ github.workspace }}/tmp/ - uses: actions/upload-artifact@v2 with: name: "test-model.tf-${{ matrix.bitrate }}.zip" path: ${{ github.workspace }}/tmp/output_graph.pb* - uses: actions/upload-artifact@v2 with: name: "test-model.tflite-${{ matrix.bitrate }}.zip" path: ${{ github.workspace }}/tmp/output_graph.tflite - uses: actions/upload-artifact@v2 with: name: "test-checkpoint.${{ matrix.bitrate }}.zip" path: ${{ github.workspace }}/tmp/checkpoint.tar.xz tensorflow_opt-macOS: name: "Mac|Check TensorFlow cache" runs-on: ubuntu-20.04 outputs: status: ${{ steps.check_artifact_exists.outputs.status }} cache_key: ${{ steps.get_cache_key.outputs.key }} steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - id: get_cache_key uses: ./.github/actions/get_cache_key with: extras: "2" - id: check_artifact_exists uses: ./.github/actions/check_artifact_exists with: name: ${{ steps.get_cache_key.outputs.key }} build-tensorflow-macOS: name: "Mac|Build TensorFlow (opt)" needs: tensorflow_opt-macOS runs-on: macos-10.15 steps: - run: true if: needs.tensorflow_opt-macOS.outputs.status == 'found' - uses: actions/checkout@v2 with: fetch-depth: 0 submodules: 'recursive' if: needs.tensorflow_opt-macOS.outputs.status == 'missing' - uses: ./.github/actions/select-xcode with: version: "12.1.1" if: needs.tensorflow_opt-macOS.outputs.status == 'missing' - uses: ./.github/actions/setup-tensorflow if: needs.tensorflow_opt-macOS.outputs.status == 'missing' - uses: ./.github/actions/build-tensorflow with: flavor: "--darwin-cpu" if: needs.tensorflow_opt-macOS.outputs.status == 'missing' - uses: ./.github/actions/package-tensorflow if: needs.tensorflow_opt-macOS.outputs.status == 'missing' - uses: actions/upload-artifact@v2 with: name: ${{ needs.tensorflow_opt-macOS.outputs.cache_key }} path: ${{ github.workspace }}/artifacts/home.tar.xz if: needs.tensorflow_opt-macOS.outputs.status == 'missing' build-lib_macOS: name: "Mac|Build libdeepspeech+client" runs-on: macos-10.15 needs: [ build-tensorflow-macOS, tensorflow_opt-macOS ] strategy: matrix: build-flavor: ["tf", "tflite"] steps: - uses: actions/checkout@v2 with: fetch-depth: 0 - uses: actions/download-artifact@v2 with: name: ${{ needs.tensorflow_opt-macOS.outputs.cache_key }} path: ${{ github.workspace }}/ if: needs.tensorflow_opt-macOS.outputs.status == 'missing' - uses: ./.github/actions/check_artifact_exists with: name: ${{ needs.tensorflow_opt-macOS.outputs.cache_key }} path: ${{ github.workspace }}/ download: true if: needs.tensorflow_opt-macOS.outputs.status == 'found' - run: | tar xkf ${{ github.workspace }}/home.tar.xz rm ${{ github.workspace }}/home.tar.xz - run: | git status - uses: ./.github/actions/select-xcode with: version: "12.1.1" - uses: ./.github/actions/host-build with: flavor: ${{ matrix.build-flavor }} - uses: ./.github/actions/package - uses: actions/upload-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.macOS.tar.xz" path: ${{ github.workspace }}/artifacts/native_client.tar.xz - uses: actions/upload-artifact@v2 with: name: "libdeepspeech.${{ matrix.build-flavor }}.zip" path: ${{ github.workspace }}/artifacts/libdeepspeech.zip build-python-macOS: name: "Mac|Build python bindings" runs-on: macos-10.15 needs: [ build-lib_macOS, swig_macOS ] strategy: matrix: build-flavor: ["tf", "tflite"] python-version: [3.6.8, 3.7.9, 3.8.8, 3.9.2] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.macOS.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | cd ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ tar xf native_client.tar.xz ls -hal cd ${{ github.workspace }}/ - uses: actions/download-artifact@v2 with: name: "swig_macOS" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - uses: ./.github/actions/install-python-upstream with: version: ${{ matrix.python-version }} # GitHub packaged version are limited to macOS deployment target 10.14 #- uses: actions/setup-python@v2 # with: # python-version: ${{ matrix.python-version }} - id: get_numpy uses: ./.github/actions/numpy_vers with: pyver: ${{ matrix.python-version }} - uses: ./.github/actions/python-build with: build_flavor: ${{ matrix.build-flavor }} numpy_build: "${{ steps.get_numpy.outputs.build_version }}" numpy_dep: "${{ steps.get_numpy.outputs.dep_version }}" - uses: actions/upload-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}-${{ matrix.python-version }}-macOS.whl" path: ${{ github.workspace }}/wheels/*.whl build-nodejs-macOS: name: "Mac|Build NodeJS and ElectronJS" runs-on: macos-10.15 needs: [ build-lib_macOS, swig_macOS ] strategy: matrix: build-flavor: ["tf", "tflite"] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.macOS.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | cd ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ tar xf native_client.tar.xz ls -hal cd ${{ github.workspace }}/ - uses: actions/download-artifact@v2 with: name: "swig_macOS" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - uses: actions/setup-node@v2 with: node-version: 12 - uses: actions/cache@v2 id: node-headers-cache with: path: native_client/javascript/headers/nodejs/ key: node-headers-10.0.0_16.0.0 - uses: actions/cache@v2 id: electron-headers-cache with: path: native_client/javascript/headers/electronjs/ key: electron-headers-5.0.13_12.0.0 - uses: ./.github/actions/node-build with: nodejs_versions: "10.0.0 11.0.0 12.7.0 13.0.0 14.0.0 15.0.0 16.0.0" electronjs_versions: "5.0.13 6.0.12 6.1.7 7.0.1 7.1.8 8.0.1 9.0.1 9.1.0 9.2.0 10.0.0 10.1.0 11.0.0 12.0.0" - uses: actions/upload-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-macOS_amd64.tar.gz" path: ${{ github.workspace }}/native_client/javascript/wrapper.tar.gz - uses: actions/upload-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-macOS.tgz" path: ${{ github.workspace }}/native_client/javascript/deepspeech-*.tgz test-cpp-macOS: name: "Mac|Test C++ binary" runs-on: macos-10.15 needs: [ build-lib_macOS, train-test-model-macOS ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.macOS.tar.xz" path: ${{ env.CI_TMP_DIR }} - run: | cd ${{ env.CI_TMP_DIR }} mkdir ds && cd ds && tar xf ../native_client.tar.xz - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: ./.github/actions/run-tests with: runtime: "cpp" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-py-macOS: name: "Mac|Test Python bindings" runs-on: macos-10.15 needs: [ build-python-macOS, train-test-model-macOS ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: python-version: [3.6.8, 3.7.9, 3.8.8, 3.9.2] build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}-${{ matrix.python-version }}-macOS.whl" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ pip3 install --only-binary :all: --upgrade ${{ env.CI_TMP_DIR }}/deepspeech*.whl - uses: ./.github/actions/run-tests with: runtime: "python" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-nodejs-macOS: name: "Mac|Test NodeJS bindings" runs-on: macos-10.15 needs: [ build-nodejs-macOS, train-test-model-macOS ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: # https://nodejs.org/en/about/releases/ nodejs-version: [10, 12, 14, 16] build-flavor: ["tf", "tflite"] models: ["test"] bitrate: ["16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: ${{ matrix.nodejs-version }} - uses: actions/download-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-macOS.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: actions/cache@v2 id: node-modules-cache with: path: ~/.npm/ key: node-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install --verbose ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | ls -hal node_modules/deepspeech* node_modules/.bin/ - uses: ./.github/actions/run-tests with: runtime: "node" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-electronjs-macOS: name: "Mac|Test ElectronJS bindings" runs-on: macos-10.15 needs: [ build-nodejs-macOS, train-test-model-macOS ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: electronjs-version: [5.0.13, 6.1.7, 7.1.8, 8.0.1, 9.2.0, 10.1.0, 11.0.0, 12.0.0] build-flavor: ["tf", "tflite"] models: ["test"] bitrate: ["16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: 12 - uses: actions/download-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-macOS.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: actions/cache@v2 id: electron-modules-cache with: path: ~/.npm/ key: electron-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | npm install electron@${{ matrix.electronjs-version }} - uses: ./.github/actions/run-tests with: runtime: "electronjs" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} timeout-minutes: 5 # Windows jobs build-ctc-decoder-windows: name: "Win|Build CTC decoder Python package" needs: [swig_Windows_crosscompiled] runs-on: windows-2019 steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS path-type: inherit update: true install: >- git make - uses: mozilla/msvc-dev-cmd@v1 - uses: actions/checkout@v2 with: fetch-depth: 0 - uses: actions/setup-python@v2 with: python-version: 3.7.9 - run: | python --version python -m pip --version - uses: actions/download-artifact@v2 with: name: "swig_Windows_crosscompiled" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | set -ex ls -hal native_client/ds-swig/bin ln -s ds-swig.exe native_client/ds-swig/bin/swig.exe chmod +x native_client/ds-swig/bin/ds-swig.exe native_client/ds-swig/bin/swig.exe - name: Remove /usr/bin/link conflicting with MSVC link.exe run: | rm /usr/bin/link - run: | make -C native_client/ctcdecode/ \ NUM_PROCESSES=$(nproc) \ bindings - uses: actions/upload-artifact@v2 with: name: "ds_ctcdecoder-windows-test.whl" path: ${{ github.workspace }}/native_client/ctcdecode/dist/*.whl - run: | make -C native_client/ctcdecode clean-keep-third-party tensorflow_opt-Windows: name: "Win|Check TensorFlow cache" runs-on: ubuntu-20.04 outputs: status: ${{ steps.check_artifact_exists.outputs.status }} cache_key: ${{ steps.get_cache_key.outputs.key }} steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - id: get_cache_key uses: ./.github/actions/get_cache_key with: extras: "7" - id: check_artifact_exists uses: ./.github/actions/check_artifact_exists with: name: ${{ steps.get_cache_key.outputs.key }} build-tensorflow-Windows: name: "Win|Build TensorFlow (opt)" needs: tensorflow_opt-Windows runs-on: windows-2019 steps: - run: true if: needs.tensorflow_opt-Windows.outputs.status == 'found' - uses: mozilla/setup-msys2@v2 with: msystem: MSYS path-type: inherit update: true install: >- git patch tar unzip zip if: needs.tensorflow_opt-Windows.outputs.status == 'missing' - uses: actions/setup-python@v2 with: python-version: 3.7.9 if: needs.tensorflow_opt-Windows.outputs.status == 'missing' - uses: actions/checkout@v2 with: fetch-depth: 0 submodules: 'recursive' if: needs.tensorflow_opt-Windows.outputs.status == 'missing' # It's important that this PATH change only happens *after* the checkout # above, because otherwise the checkout fails when persisisting the # credentials for submodules due to using MSYS2 Git - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH if: needs.tensorflow_opt-Windows.outputs.status == 'missing' - run: ./ci_scripts/tf-setup.sh if: needs.tensorflow_opt-Windows.outputs.status == 'missing' - run: ./ci_scripts/tf-build.sh "--windows-cpu" if: needs.tensorflow_opt-Windows.outputs.status == 'missing' - run: ./ci_scripts/tf-package.sh if: needs.tensorflow_opt-Windows.outputs.status == 'missing' - uses: actions/upload-artifact@v2 with: name: ${{ needs.tensorflow_opt-Windows.outputs.cache_key }} path: ${{ github.workspace }}/artifacts/home.tar.xz if: needs.tensorflow_opt-Windows.outputs.status == 'missing' build-lib_Windows: name: "Win|Build libdeepspeech+client" runs-on: windows-2019 needs: [build-tensorflow-Windows, tensorflow_opt-Windows] strategy: matrix: build-flavor: ["tf", "tflite"] steps: - uses: actions/checkout@v2 with: fetch-depth: 0 - uses: mozilla/msvc-dev-cmd@v1 - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS update: true install: >- git make patch pkg-config tar unzip zip - uses: actions/download-artifact@v2 with: name: ${{ needs.tensorflow_opt-Windows.outputs.cache_key }} path: ${{ github.workspace }}/ if: needs.tensorflow_opt-Windows.outputs.status == 'missing' - uses: ./.github/actions/check_artifact_exists with: name: ${{ needs.tensorflow_opt-Windows.outputs.cache_key }} path: ${{ github.workspace }}/ download: true if: needs.tensorflow_opt-Windows.outputs.status == 'found' - run: | "C:/Program Files/7-Zip/7z.exe" x home.tar.xz -so | "C:/Program Files/7-Zip/7z.exe" x -aos -si -ttar -o`pwd` rm home.tar.xz - run: | git status - run: ./ci_scripts/host-build.sh ${{ matrix.build-flavor }} - run: ./ci_scripts/package.sh - uses: actions/upload-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.Windows.tar.xz" path: ${{ github.workspace }}/artifacts/native_client.tar.xz - uses: actions/upload-artifact@v2 with: name: "libdeepspeech.${{ matrix.build-flavor }}.zip" path: ${{ github.workspace }}/artifacts/libdeepspeech.zip build-python-Windows: name: "Win|Build Python bindings" runs-on: windows-2019 needs: [build-lib_Windows, swig_Windows_crosscompiled] strategy: matrix: build-flavor: ["tf", "tflite"] # Try to keep Python versions in sync with cached versions to speed things up: # https://github.com/actions/virtual-environments/blob/main/images/win/Windows2019-Readme.md python-version: [3.6.8, 3.7.9, 3.8.8, 3.9.4] steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS path-type: inherit update: true install: >- make - uses: mozilla/msvc-dev-cmd@v1 - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.Windows.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | pushd tensorflow/bazel-bin/native_client/ "C:/Program Files/7-Zip/7z.exe" x native_client.tar.xz -so | "C:/Program Files/7-Zip/7z.exe" x -aoa -si -ttar -o`pwd` ls -hal popd - uses: actions/download-artifact@v2 with: name: "swig_Windows_crosscompiled" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | set -ex ls -hal native_client/ds-swig/bin ln -s ds-swig.exe native_client/ds-swig/bin/swig.exe chmod +x native_client/ds-swig/bin/ds-swig.exe native_client/ds-swig/bin/swig.exe - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Remove /usr/bin/link conflicting with MSVC link.exe run: | rm /usr/bin/link - id: get_numpy uses: ./.github/actions/numpy_vers with: pyver: ${{ matrix.python-version }} - uses: ./.github/actions/python-build with: build_flavor: ${{ matrix.build-flavor }} numpy_build: "${{ steps.get_numpy.outputs.build_version }}" numpy_dep: "${{ steps.get_numpy.outputs.dep_version }}" - uses: actions/upload-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}-${{ matrix.python-version }}-Windows.whl" path: ${{ github.workspace }}/wheels/*.whl build-nodejs-Windows: name: "Win|Build NodeJS/ElectronJS" runs-on: windows-2019 needs: [build-lib_Windows, swig_Windows_crosscompiled] strategy: matrix: build-flavor: ["tf", "tflite"] steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS path-type: inherit update: true install: >- make tar - uses: mozilla/msvc-dev-cmd@v1 - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.Windows.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | pushd tensorflow/bazel-bin/native_client/ "C:/Program Files/7-Zip/7z.exe" x native_client.tar.xz -so | "C:/Program Files/7-Zip/7z.exe" x -aoa -si -ttar -o`pwd` ls -hal popd - uses: actions/download-artifact@v2 with: name: "swig_Windows_crosscompiled" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | set -ex ls -hal native_client/ds-swig/bin ln -s ds-swig.exe native_client/ds-swig/bin/swig.exe chmod +x native_client/ds-swig/bin/ds-swig.exe native_client/ds-swig/bin/swig.exe - uses: actions/setup-node@v2 with: node-version: 12 - uses: actions/cache@v2 id: node-headers-cache with: path: native_client/javascript/headers/nodejs/ key: node-headers-win-10.0.0_16.0.0 - uses: actions/cache@v2 id: electron-headers-cache with: path: native_client/javascript/headers/electronjs/ key: electron-headers-win-5.0.13_12.0.0 - uses: ./.github/actions/node-build with: nodejs_versions: "10.0.0 11.0.0 12.7.0 13.0.0 14.0.0 15.0.0 16.0.0" electronjs_versions: "5.0.13 6.0.12 6.1.7 7.0.1 7.1.8 8.0.1 9.0.1 9.1.0 9.2.0 10.0.0 10.1.0 11.0.0 12.0.0" - uses: actions/upload-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-Windows_amd64.tar.gz" path: ${{ github.workspace }}/native_client/javascript/wrapper.tar.gz - uses: actions/upload-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-Windows.tgz" path: ${{ github.workspace }}/native_client/javascript/deepspeech-*.tgz test-cpp-Windows: name: "Win|Test C++ binary" runs-on: windows-2019 needs: [build-lib_Windows, train-test-model-Linux] strategy: matrix: build-flavor: ["tf", "tflite"] env: CI_TMP_DIR: tmp/ DEEPSPEECH_TEST_MODEL: tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS update: true install: >- vim - uses: actions/checkout@v2 with: fetch-depth: 1 - name: Download native_client.tar.xz uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.Windows.tar.xz" path: ${{ env.CI_TMP_DIR }} - name: Extract native_client.tar.xz run: | mkdir -p ${{ env.CI_TMP_DIR }}/ds pushd ${{ env.CI_TMP_DIR }}/ds "C:/Program Files/7-Zip/7z.exe" x ../native_client.tar.xz -so | "C:/Program Files/7-Zip/7z.exe" x -aoa -si -ttar -o`pwd` ls -hal popd - name: Download trained test model uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-16k.zip" path: ${{ env.CI_TMP_DIR }} - run: | ls -hal ${{ env.CI_TMP_DIR }}/ - uses: ./.github/actions/run-tests with: runtime: "cppwin" build-flavor: ${{ matrix.build-flavor }} bitrate: "16k" model-kind: "" test-py-Windows: name: "Win|Test Python bindings" runs-on: windows-2019 needs: [ build-python-Windows, train-test-model-Linux ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: # Try to keep Python versions in sync with cached versions to speed things up: # https://github.com/actions/virtual-environments/blob/main/images/win/Windows2019-Readme.md python-version: [3.6.8, 3.7.9, 3.8.8, 3.9.4] build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS path-type: inherit update: true install: >- vim - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - uses: ./.github/actions/win-install-sox - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}-${{ matrix.python-version }}-Windows.whl" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ python -m pip install --only-binary :all: --upgrade ${{ env.CI_TMP_DIR }}/deepspeech*.whl - uses: ./.github/actions/run-tests with: runtime: "python" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-nodejs-Windows: name: "Win|Test NodeJS bindings" runs-on: windows-2019 needs: [ build-nodejs-Windows, train-test-model-Linux ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: nodejs-version: [10, 12, 14, 16] build-flavor: ["tf", "tflite"] models: ["test"] bitrate: ["16k"] env: CI_TMP_DIR: tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS path-type: inherit update: true install: >- vim - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: ${{ matrix.nodejs-version }} - uses: ./.github/actions/win-install-sox - uses: actions/download-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-Windows.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - name: Get npm cache directory id: npm-cache-dir run: | echo "::set-output name=dir::$(npm config get cache)" - uses: actions/cache@v2 id: node-modules-cache with: path: ${{ steps.npm-cache-dir.outputs.dir }} key: node-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - uses: ./.github/actions/run-tests with: runtime: "node" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-electronjs-Windows: name: "Win|Test ElectronJS bindings" runs-on: windows-2019 needs: [ build-nodejs-Windows, train-test-model-Linux ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: electronjs-version: [5.0.13, 6.1.7, 7.1.8, 8.0.1, 9.2.0, 10.1.0, 11.0.0, 12.0.0] build-flavor: ["tf", "tflite"] models: ["test"] bitrate: ["16k"] env: CI_TMP_DIR: tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS path-type: inherit update: true install: >- vim - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: 12 - uses: ./.github/actions/win-install-sox - uses: actions/download-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-Windows.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - name: Get npm cache directory id: npm-cache-dir run: | echo "::set-output name=dir::$(npm config get cache)" - uses: actions/cache@v2 id: electron-modules-cache with: path: ${{ steps.npm-cache-dir.outputs.dir }} key: electron-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | npm install electron@${{ matrix.electronjs-version }} - uses: ./.github/actions/run-tests with: runtime: "electronjs" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} timeout-minutes: 5 # Shared jobs (multi-platform dependencies) repackage-nodejs-allplatforms: name: "Repackage NodeJS / ElectronJS for multiplatforms" runs-on: ubuntu-20.04 needs: [build-nodejs-macOS, build-nodejs-Windows, build-nodejs-Linux, build-nodejs-LinuxArmv7, build-nodejs-LinuxAarch64] strategy: matrix: build-flavor: ["tf", "tflite"] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - run: | mkdir -p /tmp/nodewrapper-${{ matrix.build-flavor }}-macOS_amd64/ mkdir -p /tmp/nodewrapper-${{ matrix.build-flavor }}-Windows_amd64/ - uses: actions/download-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-macOS_amd64.tar.gz" path: /tmp/nodewrapper-macOS_amd64/ - uses: actions/download-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-Windows_amd64.tar.gz" path: /tmp/nodewrapper-Windows_amd64/ - uses: actions/download-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-Linux_amd64.tar.gz" path: /tmp/nodewrapper-Linux_amd64/ - uses: actions/download-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-Linux_armv7.tar.gz" path: /tmp/nodewrapper-Linux_armv7/ if: matrix.build-flavor == 'tflite' - uses: actions/download-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-Linux_aarch64.tar.gz" path: /tmp/nodewrapper-Linux_aarch64/ if: matrix.build-flavor == 'tflite' - name: Extract nodewrapper archives run: | tar -C ${{ github.workspace }}/native_client/javascript -xzvf /tmp/nodewrapper-macOS_amd64/wrapper.tar.gz tar -C ${{ github.workspace }}/native_client/javascript -xzvf /tmp/nodewrapper-Windows_amd64/wrapper.tar.gz tar -C ${{ github.workspace }}/native_client/javascript -xzvf /tmp/nodewrapper-Linux_amd64/wrapper.tar.gz - name: Extract nodewrapper tflite-only archives run: | tar -C ${{ github.workspace }}/native_client/javascript -xzvf /tmp/nodewrapper-Linux_armv7/wrapper.tar.gz tar -C ${{ github.workspace }}/native_client/javascript -xzvf /tmp/nodewrapper-Linux_aarch64/wrapper.tar.gz if: matrix.build-flavor == 'tflite' - run: | PROJECT_NAME="deepspeech" if [ "${{ matrix.build-flavor }}" = "tflite" ]; then PROJECT_NAME="deepspeech-tflite" fi make -C native_client/javascript clean npm-pack PROJECT_NAME=$PROJECT_NAME - uses: actions/upload-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}.tgz" path: ${{ github.workspace }}/native_client/javascript/deepspeech-*.tgz test-nodejs_all-Linux: name: "Lin|Test MultiArchPlatform NodeJS bindings" runs-on: ubuntu-20.04 needs: [repackage-nodejs-allplatforms, train-test-model-Linux] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: # https://nodejs.org/en/about/releases/ nodejs-version: [10, 16] build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: ${{ matrix.nodejs-version }} - run: | sudo apt-get install -y --no-install-recommends sox - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - uses: actions/cache@v2 id: node-modules-cache with: path: ~/.npm/ key: node-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install --verbose ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | ls -hal node_modules/deepspeech* node_modules/.bin/ - uses: ./.github/actions/run-tests with: runtime: "node" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-electronjs_all-Linux: name: "Lin|Test MultiArchPlatform ElectronJS bindings" runs-on: ubuntu-20.04 needs: [repackage-nodejs-allplatforms, train-test-model-Linux] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: electronjs-version: [5.0.13, 12.0.0] build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: 12 - run: | sudo apt-get install -y --no-install-recommends sox - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: actions/cache@v2 id: electron-modules-cache with: path: ~/.npm/ key: electron-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | npm install electron@${{ matrix.electronjs-version }} - uses: ./.github/actions/run-tests with: runtime: "electronjs" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} timeout-minutes: 5 test-nodejs_all-macOS: name: "Mac|Test MultiArchPlatform NodeJS bindings" runs-on: macos-10.15 needs: [ repackage-nodejs-allplatforms, train-test-model-macOS ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: # https://nodejs.org/en/about/releases/ nodejs-version: [10, 16] build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: ${{ matrix.nodejs-version }} - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - uses: actions/cache@v2 id: node-modules-cache with: path: ~/.npm/ key: node-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install --verbose ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | ls -hal node_modules/deepspeech* node_modules/.bin/ - uses: ./.github/actions/run-tests with: runtime: "node" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-electronjs_all-macOS: name: "Mac|Test MultiArchPlatform ElectronJS bindings" runs-on: macos-10.15 needs: [ repackage-nodejs-allplatforms, train-test-model-macOS ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: electronjs-version: [5.0.13, 12.0.0] build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: 12 - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: actions/cache@v2 id: electron-modules-cache with: path: ~/.npm/ key: electron-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | npm install electron@${{ matrix.electronjs-version }} - uses: ./.github/actions/run-tests with: runtime: "electronjs" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} timeout-minutes: 5 test-nodejs_all-Windows: name: "Win|Test MultiArchPlatform NodeJS bindings" runs-on: windows-2019 needs: [repackage-nodejs-allplatforms, train-test-model-Linux] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: # https://nodejs.org/en/about/releases/ nodejs-version: [10, 16] build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS path-type: inherit update: true install: >- vim - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: ${{ matrix.nodejs-version }} - uses: ./.github/actions/win-install-sox - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - name: Get npm cache directory id: npm-cache-dir run: | echo "::set-output name=dir::$(npm config get cache)" - uses: actions/cache@v2 id: node-modules-cache with: path: ${{ steps.npm-cache-dir.outputs.dir }} key: node-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - uses: ./.github/actions/run-tests with: runtime: "node" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-electronjs_all-Windows: name: "Win|Test MultiArchPlatform ElectronJS bindings" runs-on: windows-2019 needs: [repackage-nodejs-allplatforms, train-test-model-Linux] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: electronjs-version: [5.0.13, 12.0.0] build-flavor: ["tf", "tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH - uses: mozilla/setup-msys2@v2 with: msystem: MSYS path-type: inherit update: true install: >- vim - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/setup-node@v2 with: node-version: 12 - uses: ./.github/actions/win-install-sox - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}.tgz" path: ${{ env.CI_TMP_DIR }} - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }} if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - name: Get npm cache directory id: npm-cache-dir run: | echo "::set-output name=dir::$(npm config get cache)" - uses: actions/cache@v2 id: electron-modules-cache with: path: ${{ steps.npm-cache-dir.outputs.dir }} key: electron-modules-${{ matrix.build-flavor }}-${{ runner.os }}-${{ env.CI_NODE_MODULES_NTH }} - name: Install deepspeech package run: | ls -hal ${{ env.CI_TMP_DIR }}/ npm install ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | npm install electron@${{ matrix.electronjs-version }} - uses: ./.github/actions/run-tests with: runtime: "electronjs" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} timeout-minutes: 5 # Linux Armv7 and Aarch64 jobs tensorflow_opt-LinuxArmv7: name: "LinArmv7|Check TensorFlow cache" runs-on: ubuntu-20.04 outputs: status: ${{ steps.check_artifact_exists.outputs.status }} cache_key: ${{ steps.get_cache_key.outputs.key }} strategy: matrix: arch: [ "armv7" ] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - id: get_cache_key uses: ./.github/actions/get_cache_key with: extras: "0" - id: check_artifact_exists uses: ./.github/actions/check_artifact_exists with: name: ${{ steps.get_cache_key.outputs.key }} tensorflow_opt-LinuxAarch64: name: "LinAarch64|Check TensorFlow cache" runs-on: ubuntu-20.04 outputs: status: ${{ steps.check_artifact_exists.outputs.status }} cache_key: ${{ steps.get_cache_key.outputs.key }} strategy: matrix: arch: [ "aarch64" ] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - id: get_cache_key uses: ./.github/actions/get_cache_key with: extras: "0" - id: check_artifact_exists uses: ./.github/actions/check_artifact_exists with: name: ${{ steps.get_cache_key.outputs.key }} build-tensorflow-LinuxArmv7: name: "LinArmv7|Build TensorFlow (opt)" needs: tensorflow_opt-LinuxArmv7 runs-on: ubuntu-20.04 strategy: matrix: arch: [ "armv7" ] steps: - run: true if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'found' - uses: actions/checkout@v2 with: fetch-depth: 0 submodules: 'recursive' if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' - uses: ./.github/actions/setup-tensorflow if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' - uses: ./.github/actions/build-tensorflow with: flavor: "--linux-${{ matrix.arch }}" if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' - uses: ./.github/actions/package-tensorflow if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' - uses: actions/upload-artifact@v2 with: name: ${{ needs.tensorflow_opt-LinuxArmv7.outputs.cache_key }} path: ${{ github.workspace }}/artifacts/home.tar.xz if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' build-tensorflow-LinuxAarch64: name: "LinAarch64|Build TensorFlow (opt)" needs: tensorflow_opt-LinuxAarch64 runs-on: ubuntu-20.04 strategy: matrix: arch: [ "aarch64" ] steps: - run: true if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'found' - uses: actions/checkout@v2 with: fetch-depth: 0 submodules: 'recursive' if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' - uses: ./.github/actions/setup-tensorflow if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' - uses: ./.github/actions/build-tensorflow with: flavor: "--linux-${{ matrix.arch }}" if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' - uses: ./.github/actions/package-tensorflow if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' - uses: actions/upload-artifact@v2 with: name: ${{ needs.tensorflow_opt-LinuxAarch64.outputs.cache_key }} path: ${{ github.workspace }}/artifacts/home.tar.xz if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' build-lib_LinuxArmv7: name: "LinArmv7|Build libdeepspeech+client" runs-on: ubuntu-20.04 strategy: matrix: build-flavor: ["tflite"] arch: [ "armv7" ] needs: [ build-tensorflow-LinuxArmv7, tensorflow_opt-LinuxArmv7 ] env: SYSTEM_TARGET: rpi3 SYSTEM_RASPBIAN: ${{ github.workspace }}/multistrap-raspbian-buster steps: - uses: actions/checkout@v2 with: fetch-depth: 0 - uses: actions/download-artifact@v2 with: name: ${{ needs.tensorflow_opt-LinuxArmv7.outputs.cache_key }} path: ${{ github.workspace }}/ if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' - uses: ./.github/actions/check_artifact_exists with: name: ${{ needs.tensorflow_opt-LinuxArmv7.outputs.cache_key }} path: ${{ github.workspace }}/ download: true if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'found' - run: | tar -xf ${{ github.workspace }}/home.tar.xz --skip-old-files rm ${{ github.workspace }}/home.tar.xz - run: | git status - name: "Install chroot" uses: ./.github/actions/multistrap with: arch: ${{ matrix.arch }} - uses: ./.github/actions/host-build with: arch: ${{ matrix.arch }} flavor: ${{ matrix.build-flavor }} - uses: ./.github/actions/package - uses: actions/upload-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.linux.${{ matrix.arch }}.tar.xz" path: ${{ github.workspace }}/artifacts/native_client.tar.xz - uses: actions/upload-artifact@v2 with: name: "libdeepspeech.${{ matrix.build-flavor }}.linux.${{ matrix.arch }}.zip" path: ${{ github.workspace }}/artifacts/libdeepspeech.zip build-lib_LinuxAarch64: name: "LinAarch64|Build libdeepspeech+client" runs-on: ubuntu-20.04 strategy: matrix: build-flavor: ["tflite"] arch: [ "aarch64" ] needs: [ build-tensorflow-LinuxAarch64, tensorflow_opt-LinuxAarch64 ] env: SYSTEM_TARGET: rpi3-armv8 SYSTEM_RASPBIAN: ${{ github.workspace }}/multistrap-armbian64-buster steps: - uses: actions/checkout@v2 with: fetch-depth: 0 - uses: actions/download-artifact@v2 with: name: ${{ needs.tensorflow_opt-LinuxAarch64.outputs.cache_key }} path: ${{ github.workspace }}/ if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' - uses: ./.github/actions/check_artifact_exists with: name: ${{ needs.tensorflow_opt-LinuxAarch64.outputs.cache_key }} path: ${{ github.workspace }}/ download: true if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'found' - run: | tar -xf ${{ github.workspace }}/home.tar.xz --skip-old-files rm ${{ github.workspace }}/home.tar.xz - run: | git status - name: "Install chroot" uses: ./.github/actions/multistrap with: arch: ${{ matrix.arch }} - uses: ./.github/actions/host-build with: arch: ${{ matrix.arch }} flavor: ${{ matrix.build-flavor }} - uses: ./.github/actions/package - uses: actions/upload-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.linux.${{ matrix.arch }}.tar.xz" path: ${{ github.workspace }}/artifacts/native_client.tar.xz - uses: actions/upload-artifact@v2 with: name: "libdeepspeech.${{ matrix.build-flavor }}.linux.${{ matrix.arch }}.zip" path: ${{ github.workspace }}/artifacts/libdeepspeech.zip build-python-LinuxArmv7: name: "LinArmv7|Build python bindings" runs-on: ubuntu-20.04 needs: [ build-lib_LinuxArmv7, swig_Linux, tensorflow_opt-LinuxArmv7 ] strategy: matrix: build-flavor: ["tflite"] python-version: [3.7] arch: [ "armv7" ] env: DEBIAN_FRONTEND: "noninteractive" SYSTEM_TARGET: rpi3 SYSTEM_RASPBIAN: ${{ github.workspace }}/multistrap-raspbian-buster steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.linux.${{ matrix.arch }}.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | cd ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ tar xf native_client.tar.xz ls -hal cd ${{ github.workspace }}/ - uses: actions/download-artifact@v2 with: name: "swig_Linux" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - uses: actions/download-artifact@v2 with: name: ${{ needs.tensorflow_opt-LinuxArmv7.outputs.cache_key }} path: ${{ github.workspace }}/ if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' - uses: ./.github/actions/check_artifact_exists with: name: ${{ needs.tensorflow_opt-LinuxArmv7.outputs.cache_key }} path: ${{ github.workspace }}/ download: true if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'found' - run: | tar -xf ${{ github.workspace }}/home.tar.xz --skip-old-files rm ${{ github.workspace }}/home.tar.xz - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - uses: ./.github/actions/install-xldd with: target: ${{ env.SYSTEM_TARGET }} - name: "Install chroot" uses: ./.github/actions/multistrap with: arch: ${{ matrix.arch }} - id: get_numpy uses: ./.github/actions/numpy_vers with: pyver: ${{ matrix.python-version }} - uses: ./.github/actions/python-build with: build_flavor: ${{ matrix.build-flavor }} numpy_build: "${{ steps.get_numpy.outputs.build_version }}" numpy_dep: "${{ steps.get_numpy.outputs.dep_version }}" target: ${{ env.SYSTEM_TARGET }} chroot: ${{ env.SYSTEM_RASPBIAN }} - uses: actions/upload-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}-${{ matrix.python-version }}-${{ matrix.arch }}.whl" path: ${{ github.workspace }}/wheels/*.whl build-nodejs-LinuxArmv7: name: "LinArmv7|Build NodeJS and ElectronJS" runs-on: ubuntu-20.04 needs: [ build-lib_LinuxArmv7, swig_Linux, tensorflow_opt-LinuxArmv7 ] strategy: matrix: build-flavor: ["tflite"] arch: [ "armv7" ] env: SYSTEM_TARGET: rpi3 SYSTEM_RASPBIAN: ${{ github.workspace }}/multistrap-raspbian-buster steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.linux.${{ matrix.arch }}.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | cd ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ tar xf native_client.tar.xz ls -hal cd ${{ github.workspace }}/ - uses: actions/download-artifact@v2 with: name: "swig_Linux" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - uses: actions/download-artifact@v2 with: name: ${{ needs.tensorflow_opt-LinuxArmv7.outputs.cache_key }} path: ${{ github.workspace }}/ if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'missing' - uses: ./.github/actions/check_artifact_exists with: name: ${{ needs.tensorflow_opt-LinuxArmv7.outputs.cache_key }} path: ${{ github.workspace }}/ download: true if: needs.tensorflow_opt-LinuxArmv7.outputs.status == 'found' - run: | tar -xf ${{ github.workspace }}/home.tar.xz --skip-old-files rm ${{ github.workspace }}/home.tar.xz - uses: ./.github/actions/install-xldd with: target: ${{ env.SYSTEM_TARGET }} - name: "Install chroot" uses: ./.github/actions/multistrap with: arch: ${{ matrix.arch }} - uses: actions/setup-node@v2 with: node-version: 12 - uses: actions/cache@v2 id: node-headers-cache with: path: native_client/javascript/headers/nodejs/ key: node-headers-10.0.0_15.0.0 - uses: actions/cache@v2 id: electron-headers-cache with: path: native_client/javascript/headers/electronjs/ key: electron-headers-5.0.13_12.0.0 - uses: ./.github/actions/node-build with: nodejs_versions: "10.0.0 11.0.0 12.7.0 13.0.0 14.0.0 15.0.0" electronjs_versions: "5.0.13 6.0.12 6.1.7 7.0.1 7.1.8 8.0.1 9.0.1 9.1.0 9.2.0 10.0.0 10.1.0 11.0.0 12.0.0" target: ${{ env.SYSTEM_TARGET }} chroot: ${{ env.SYSTEM_RASPBIAN }} - uses: actions/upload-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-Linux_${{ matrix.arch }}.tar.gz" path: ${{ github.workspace }}/native_client/javascript/wrapper.tar.gz - uses: actions/upload-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-${{ matrix.arch }}.tgz" path: ${{ github.workspace }}/native_client/javascript/deepspeech-*.tgz build-python-LinuxAarch64: name: "LinAarch64|Build python bindings" runs-on: ubuntu-20.04 needs: [ build-lib_LinuxAarch64, swig_Linux, tensorflow_opt-LinuxAarch64 ] strategy: matrix: build-flavor: ["tflite"] python-version: [3.7] arch: [ "aarch64" ] env: DEBIAN_FRONTEND: "noninteractive" SYSTEM_TARGET: rpi3-armv8 SYSTEM_RASPBIAN: ${{ github.workspace }}/multistrap-armbian64-buster steps: - run: | sudo apt-get install -y --no-install-recommends - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.linux.${{ matrix.arch }}.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | cd ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ tar xf native_client.tar.xz ls -hal cd ${{ github.workspace }}/ - uses: actions/download-artifact@v2 with: name: "swig_Linux" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - uses: actions/download-artifact@v2 with: name: ${{ needs.tensorflow_opt-LinuxAarch64.outputs.cache_key }} path: ${{ github.workspace }}/ if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' - uses: ./.github/actions/check_artifact_exists with: name: ${{ needs.tensorflow_opt-LinuxAarch64.outputs.cache_key }} path: ${{ github.workspace }}/ download: true if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'found' - run: | tar -xf ${{ github.workspace }}/home.tar.xz --skip-old-files rm ${{ github.workspace }}/home.tar.xz - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - uses: ./.github/actions/install-xldd with: target: ${{ env.SYSTEM_TARGET }} - name: "Install chroot" uses: ./.github/actions/multistrap with: arch: ${{ matrix.arch }} - id: get_numpy uses: ./.github/actions/numpy_vers with: pyver: ${{ matrix.python-version }} - uses: ./.github/actions/python-build with: build_flavor: ${{ matrix.build-flavor }} numpy_build: "${{ steps.get_numpy.outputs.build_version }}" numpy_dep: "${{ steps.get_numpy.outputs.dep_version }}" target: ${{ env.SYSTEM_TARGET }} chroot: ${{ env.SYSTEM_RASPBIAN }} - uses: actions/upload-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}-${{ matrix.python-version }}-${{ matrix.arch }}.whl" path: ${{ github.workspace }}/wheels/*.whl build-nodejs-LinuxAarch64: name: "LinAarch64|Build NodeJS and ElectronJS" runs-on: ubuntu-20.04 needs: [ build-lib_LinuxAarch64, swig_Linux, tensorflow_opt-LinuxAarch64 ] strategy: matrix: build-flavor: ["tflite"] arch: [ "aarch64" ] env: SYSTEM_TARGET: rpi3-armv8 SYSTEM_RASPBIAN: ${{ github.workspace }}/multistrap-armbian64-buster steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.linux.${{ matrix.arch }}.tar.xz" path: ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ - run: | cd ${{ github.workspace }}/tensorflow/bazel-bin/native_client/ tar xf native_client.tar.xz ls -hal cd ${{ github.workspace }}/ - uses: actions/download-artifact@v2 with: name: "swig_Linux" path: ${{ github.workspace }}/native_client/ds-swig/ - name: Link ds-swig into swig run: | ls -hal ${{ github.workspace }}/native_client/ds-swig/bin ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig - uses: actions/download-artifact@v2 with: name: ${{ needs.tensorflow_opt-LinuxAarch64.outputs.cache_key }} path: ${{ github.workspace }}/ if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'missing' - uses: ./.github/actions/check_artifact_exists with: name: ${{ needs.tensorflow_opt-LinuxAarch64.outputs.cache_key }} path: ${{ github.workspace }}/ download: true if: needs.tensorflow_opt-LinuxAarch64.outputs.status == 'found' - run: | tar -xf ${{ github.workspace }}/home.tar.xz --skip-old-files rm ${{ github.workspace }}/home.tar.xz - uses: ./.github/actions/install-xldd with: target: ${{ env.SYSTEM_TARGET }} - name: "Install chroot" uses: ./.github/actions/multistrap with: arch: ${{ matrix.arch }} - uses: actions/setup-node@v2 with: node-version: 12 - uses: actions/cache@v2 id: node-headers-cache with: path: native_client/javascript/headers/nodejs/ key: node-headers-10.0.0_15.0.0 - uses: actions/cache@v2 id: electron-headers-cache with: path: native_client/javascript/headers/electronjs/ key: electron-headers-5.0.13_12.0.0 - uses: ./.github/actions/node-build with: nodejs_versions: "10.0.0 11.0.0 12.7.0 13.0.0 14.0.0 15.0.0" electronjs_versions: "5.0.13 6.0.12 6.1.7 7.0.1 7.1.8 8.0.1 9.0.1 9.1.0 9.2.0 10.0.0 10.1.0 11.0.0 12.0.0" target: ${{ env.SYSTEM_TARGET }} chroot: ${{ env.SYSTEM_RASPBIAN }} - uses: actions/upload-artifact@v2 with: name: "nodewrapper-${{ matrix.build-flavor }}-Linux_${{ matrix.arch }}.tar.gz" path: ${{ github.workspace }}/native_client/javascript/wrapper.tar.gz - uses: actions/upload-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-${{ matrix.arch }}.tgz" path: ${{ github.workspace }}/native_client/javascript/deepspeech-*.tgz build-test-chroot: name: "Lin|Build test chroot" runs-on: ubuntu-20.04 if: ${{ github.event_name == 'pull_request' }} strategy: matrix: arch: [ "armv7", "aarch64" ] env: CI_TMP_DIR: ${{ github.workspace }}/tmp DEBIAN_FRONTEND: "noninteractive" SYSTEM_RASPBIAN: ${{ github.workspace }}/chroot-${{ matrix.arch }} steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - name: "Install and setup chroot" uses: ./.github/actions/multistrap with: arch: ${{ matrix.arch }} packages: "bash wget curl sox xxd libatlas3-base libopenblas-base ca-certificates python3 python3-pip gnupg libatk1.0-0 libatk-bridge2.0-0 libcairo2 libcups2 libdbus-1-3 libgdk-pixbuf2.0-0 libgtk-3-0 libgbm1 libnspr4 libnss3 libpango-1.0-0 libpangocairo-1.0-0 libx11-xcb1 libxcb-dri3-0 libxcomposite1 libxcursor1 libxdamage1 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 xauth" - name: "Create a chroot tarball" run: | sudo tar -cf - -C ${{ env.SYSTEM_RASPBIAN }}/ --one-file-system . | xz -9 -T0 > ${{ github.workspace }}/chroot.tar.xz - uses: actions/upload-artifact@v2 with: name: chroot-${{ matrix.arch }} path: ${{ github.workspace }}/chroot.tar.xz test-cpp-LinuxArm: name: "LinArm*|Test C++ binary" runs-on: ubuntu-20.04 needs: [ build-lib_LinuxArmv7, build-lib_LinuxAarch64, train-test-model-Linux, build-test-chroot ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: arch: [ "armv7", "aarch64" ] build-flavor: ["tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp DEBIAN_FRONTEND: "noninteractive" DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" SYSTEM_RASPBIAN: ${{ github.workspace }}/chroot-${{ matrix.arch }} steps: - name: "Install QEMU" run: | sudo apt-get update -y sudo apt-get install -y --no-install-recommends qemu-user-static - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "chroot-${{ matrix.arch }}" path: ${{ env.CI_TMP_DIR }}/ - run: | mkdir ${{ env.SYSTEM_RASPBIAN }}/ sudo tar -xf ${{ env.CI_TMP_DIR }}/chroot.tar.xz -C ${{ env.SYSTEM_RASPBIAN }}/ rm ${{ env.CI_TMP_DIR }}/chroot.tar.xz - uses: actions/download-artifact@v2 with: name: "native_client.${{ matrix.build-flavor }}.linux.${{ matrix.arch }}.tar.xz" path: ${{ env.CI_TMP_DIR }}/ - run: | cd ${{ env.CI_TMP_DIR }}/ mkdir ds && cd ds && tar xf ../native_client.tar.xz - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - name: "Check tests" run: | file ${{ env.SYSTEM_RASPBIAN }}/${{ env.CI_TMP_DIR }}/ds/* - uses: ./.github/actions/chroot-bind-mount with: mounts: "/dev" - uses: ./.github/actions/run-tests with: runtime: "cpp" chroot: "sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ ${{ github.workspace }}" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-py-LinuxArm: name: "LinArm*|Test Python bindings" runs-on: ubuntu-20.04 needs: [ build-python-LinuxArmv7, build-python-LinuxAarch64, train-test-model-Linux, build-test-chroot ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: arch: [ "armv7", "aarch64" ] python-version: [3.7] build-flavor: ["tflite"] models: ["test", "prod"] bitrate: ["8k", "16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp DEBIAN_FRONTEND: "noninteractive" DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" SYSTEM_RASPBIAN: ${{ github.workspace }}/chroot-${{ matrix.arch }} PIP_EXTRA_INDEX_URL: "https://www.piwheels.org/simple https://lissyx.github.io/deepspeech-python-wheels/" steps: - name: "Install QEMU" run: | sudo apt-get update -y sudo apt-get install -y --no-install-recommends qemu-user-static - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "chroot-${{ matrix.arch }}" path: ${{ env.CI_TMP_DIR }}/ - run: | mkdir ${{ env.SYSTEM_RASPBIAN }}/ sudo tar -xf ${{ env.CI_TMP_DIR }}/chroot.tar.xz -C ${{ env.SYSTEM_RASPBIAN }}/ rm ${{ env.CI_TMP_DIR }}/chroot.tar.xz - uses: actions/download-artifact@v2 with: name: "deepspeech-${{ matrix.build-flavor }}-${{ matrix.python-version }}-${{ matrix.arch }}.whl" path: ${{ env.CI_TMP_DIR }}/ - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: ./.github/actions/chroot-bind-mount with: mounts: "/dev" - run: | ls -hal ${{ env.CI_TMP_DIR }}/ ls -hal ${{ github.workspace }}/ ls -hal ${{ env.SYSTEM_RASPBIAN }}/${{ github.workspace }}/ sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ pip3 install --only-binary :all: --upgrade ${{ env.CI_TMP_DIR }}/deepspeech*.whl - uses: ./.github/actions/run-tests with: runtime: "python" chroot: "sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ ${{ github.workspace }}" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-nodejs-LinuxArm: name: "LinArm*|Test NodeJS bindings" runs-on: ubuntu-20.04 needs: [ build-nodejs-LinuxArmv7, build-nodejs-LinuxAarch64, train-test-model-Linux, build-test-chroot ] if: ${{ github.event_name == 'pull_request' }} strategy: matrix: arch: [ "armv7", "aarch64" ] # https://nodejs.org/en/about/releases/ nodejs-version: [10, 12, 14, 16] build-flavor: ["tflite"] models: ["test"] bitrate: ["16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp DEBIAN_FRONTEND: "noninteractive" DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" SYSTEM_RASPBIAN: ${{ github.workspace }}/chroot-${{ matrix.arch }} steps: - name: "Install QEMU" run: | sudo apt-get update -y sudo apt-get install -y --no-install-recommends qemu-user-static - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "chroot-${{ matrix.arch }}" path: ${{ env.CI_TMP_DIR }}/ - run: | mkdir ${{ env.SYSTEM_RASPBIAN }}/ sudo tar -xf ${{ env.CI_TMP_DIR }}/chroot.tar.xz -C ${{ env.SYSTEM_RASPBIAN }}/ rm ${{ env.CI_TMP_DIR }}/chroot.tar.xz - name: "Install NodeJS" uses: ./.github/actions/node-install with: node: ${{ matrix.nodejs-version }} - uses: actions/download-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-${{ matrix.arch }}.tgz" path: ${{ env.CI_TMP_DIR }}/ - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: ./.github/actions/chroot-bind-mount with: mounts: "/dev" - name: Install deepspeech package run: | sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ npm install --prefix ${{ github.workspace }}/ --verbose ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - uses: ./.github/actions/run-tests with: runtime: "node" chroot: "sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ ${{ github.workspace }}" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} test-electronjs-LinuxArm: name: "LinArm*|Test ElectronJS bindings" runs-on: ubuntu-20.04 needs: [ build-nodejs-LinuxArmv7, build-nodejs-LinuxAarch64, train-test-model-Linux, build-test-chroot ] # Disable this task because it seems qemu does not work super-well with ElectronJS if: ${{ github.event_name == 'disabled' }} strategy: matrix: arch: [ "armv7", "aarch64" ] electronjs-version: [5.0.13, 6.1.7, 7.1.8, 8.0.1, 9.2.0, 10.1.0, 11.0.0, 12.0.0] build-flavor: ["tflite"] models: ["test"] bitrate: ["16k"] env: CI_TMP_DIR: ${{ github.workspace }}/tmp DEBIAN_FRONTEND: "noninteractive" DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" SYSTEM_RASPBIAN: ${{ github.workspace }}/chroot-${{ matrix.arch }} DISPLAY: ":99.0" steps: - name: "Install QEMU" run: | sudo apt-get update -y sudo apt-get install -y --no-install-recommends qemu-user-static xvfb xauth - uses: actions/checkout@v2 with: fetch-depth: 1 - uses: actions/download-artifact@v2 with: name: "chroot-${{ matrix.arch }}" path: ${{ env.CI_TMP_DIR }}/ - run: | mkdir ${{ env.SYSTEM_RASPBIAN }}/ sudo tar -xf ${{ env.CI_TMP_DIR }}/chroot.tar.xz -C ${{ env.SYSTEM_RASPBIAN }}/ rm ${{ env.CI_TMP_DIR }}/chroot.tar.xz - name: "Install NodeJS" uses: ./.github/actions/node-install with: node: 12 - uses: actions/download-artifact@v2 with: name: "deepspeech_intermediate-${{ matrix.build-flavor }}-${{ matrix.arch }}.tgz" path: ${{ env.CI_TMP_DIR }}/ - uses: actions/download-artifact@v2 with: name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" path: ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - run: | ls -hal ${{ env.CI_TMP_DIR }}/ if: matrix.models == 'test' - uses: ./.github/actions/chroot-bind-mount with: mounts: "/dev /proc /sys /run" - name: Install deepspeech package run: | sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ npm install --prefix ${{ github.workspace }}/ ${{ env.CI_TMP_DIR }}/deepspeech*.tgz - run: | sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ npm install --prefix ${{ github.workspace }}/ electron@${{ matrix.electronjs-version }} - name: "Fake X display" run: | sudo Xvfb :99 -screen 0 1024x768x24 > /dev/null 2>&1 & xvfb_process=$! echo $xvfb_process > ${{ env.CI_TMP_DIR }}/xvfb.pid cat ${{ env.CI_TMP_DIR }}/xvfb.pid - name: "Debug missing libs" run: | sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ ls -hal ${{ github.workspace }}/node_modules/electron/dist/electron sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ ldd ${{ github.workspace }}/node_modules/electron/dist/electron - uses: ./.github/actions/run-tests with: runtime: "electronjs" chroot: "sudo --preserve-env chroot --userspec=runner:docker ${{ env.SYSTEM_RASPBIAN }}/ ${{ github.workspace }}" build-flavor: ${{ matrix.build-flavor }} bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} timeout-minutes: 5 - name: "Kill X" run: | cat ${{ env.CI_TMP_DIR }}/xvfb.pid sudo kill -9 $(cat ${{ env.CI_TMP_DIR }}/xvfb.pid) ================================================ FILE: .github/workflows/docker.yml ================================================ name: "Docker Images" on: pull_request: push: branches: - master jobs: make-docker-img: name: "Build Dockerfile" runs-on: ubuntu-20.04 strategy: matrix: template: ["build", "train"] steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - run: | make Dockerfile.${{ matrix.template }} \ DEEPSPEECH_REPO=https://github.com/${{ github.repository }} \ DEEPSPEECH_SHA=${{ github.sha }} - run: | mkdir /tmp/empty - run: | cd /tmp/empty; docker build -t app:${{ matrix.template }} -f ${{ github.workspace }}/Dockerfile.${{ matrix.template }} . - run: | docker save app:${{ matrix.template}} | zstd -o app_${{ matrix.template }}.zstd ================================================ FILE: .github/workflows/lint.yml ================================================ name: "Python linter" on: pull_request: jobs: lint: name: "Running cardboardlinter" runs-on: ubuntu-20.04 container: image: python:3.9.4-slim-buster steps: # https://github.com/actions/checkout/issues/175#issuecomment-595410280 - run: | apt-get -qq -y update apt-get -qq -y install git - uses: actions/checkout@v2 with: fetch-depth: 0 - run: | pip install --upgrade cardboardlint pylint - run: | set -ex # Check if branch can be merged with master (if failing script will stop due to set -e) git config user.email "you@example.com" git config user.name "Your Name" git merge --no-commit --no-ff origin/${{ github.base_ref }} - run: | set -ex # Undo merge changes if any git reset --hard ${{ github.sha }} - run: | set -ex # Lint differences against master cardboardlinter --refspec origin/${{ github.base_ref }} -n auto; ================================================ FILE: .gitignore ================================================ .ipynb_checkpoints *.pyc *.swp *.DS_Store *.egg-info .pit* /.run /werlog.js /runs /logs /exports /data/ldc93s1 /native_client/setup.cfg /native_client/build /native_client/*.egg-info /native_client/dist /native_client/deepspeech /native_client/ds-swig /native_client/libdeepspeech.so /native_client/node_modules /native_client/javascript/build /native_client/javascript/lib /native_client/javascript/package.json /native_client/javascript/package-lock.json /native_client/javascript/client.js /native_client/javascript/deepspeech_wrap.cxx /native_client/javascript/node_modules /native_client/python/MANIFEST.in /native_client/python/dist /native_client/python/impl.py /native_client/python/impl_wrap.cpp /doc/.build/ /doc/xml-c/ /doc/xml-java/ Dockerfile.build Dockerfile.train doc/xml-c doc/xml-java doc/xml-dotnet convert_graphdef_memmapped_format native_client/swift/deepspeech_ios.framework/deepspeech_ios .github/actions/check_artifact_exists/node_modules/ ================================================ FILE: .gitmodules ================================================ [submodule "doc/examples"] path = doc/examples url = https://github.com/mozilla/DeepSpeech-examples.git branch = master [submodule "tensorflow"] path = tensorflow url = https://github.com/mozilla/tensorflow.git [submodule "kenlm"] path = kenlm url = https://github.com/kpu/kenlm ================================================ FILE: .isort.cfg ================================================ [settings] line_length=80 multi_line_output=3 default_section=FIRSTPARTY ================================================ FILE: .pylintrc ================================================ [MASTER] # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code. extension-pkg-whitelist= # Add files or directories to the blacklist. They should be base names, not # paths. ignore=native_client/kenlm # Add files or directories matching the regex patterns to the blacklist. The # regex matches against base names, not paths. ignore-patterns= # Python code to execute, usually for sys.path manipulation such as # pygtk.require(). #init-hook= # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the # number of processors available to use. jobs=1 # Control the amount of potential inferred values when inferring a single # object. This can help the performance when dealing with large functions or # complex, nested conditions. limit-inference-results=100 # List of plugins (as comma separated values of python modules names) to load, # usually to register additional checkers. load-plugins= # Pickle collected data for later comparisons. persistent=yes # Specify a configuration file. #rcfile= # When enabled, pylint would attempt to guess common misconfiguration and emit # user-friendly hints instead of false-positive error messages. suggestion-mode=yes # Allow loading of arbitrary C extensions. Extensions are imported into the # active Python interpreter and may run arbitrary code. unsafe-load-any-extension=no [MESSAGES CONTROL] # Only show warnings with the listed confidence levels. Leave empty to show # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. confidence= # Disable the message, report, category or checker with the given id(s). You # can either give multiple identifiers separated by comma (,) or put this # option multiple times (only on the command line, not in the configuration # file where it should appear only once). You can also use "--disable=all" to # disable everything first and then reenable specific checks. For example, if # you want to run only the similarities checker, you can use "--disable=all # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use "--disable=all --enable=classes # --disable=W". disable=missing-docstring, line-too-long, wrong-import-order, ungrouped-imports, wrong-import-position, import-error, no-name-in-module, no-member, unsubscriptable-object, print-statement, parameter-unpacking, unpacking-in-except, old-raise-syntax, backtick, long-suffix, old-ne-operator, old-octal-literal, import-star-module-level, non-ascii-bytes-literal, raw-checker-failed, bad-inline-option, locally-disabled, file-ignored, suppressed-message, useless-suppression, deprecated-pragma, use-symbolic-message-instead, useless-object-inheritance, too-few-public-methods, too-many-branches, too-many-arguments, too-many-locals, too-many-statements, apply-builtin, basestring-builtin, buffer-builtin, cmp-builtin, coerce-builtin, execfile-builtin, file-builtin, long-builtin, raw_input-builtin, reduce-builtin, standarderror-builtin, unicode-builtin, xrange-builtin, coerce-method, delslice-method, getslice-method, setslice-method, no-absolute-import, old-division, dict-iter-method, dict-view-method, next-method-called, metaclass-assignment, indexing-exception, raising-string, reload-builtin, oct-method, hex-method, nonzero-method, cmp-method, input-builtin, round-builtin, intern-builtin, unichr-builtin, map-builtin-not-iterating, zip-builtin-not-iterating, range-builtin-not-iterating, filter-builtin-not-iterating, using-cmp-argument, eq-without-hash, div-method, idiv-method, rdiv-method, exception-message-attribute, invalid-str-codec, sys-max-int, bad-python3-import, deprecated-string-function, deprecated-str-translate-call, deprecated-itertools-function, deprecated-types-field, next-method-defined, dict-items-not-iterating, dict-keys-not-iterating, dict-values-not-iterating, deprecated-operator-function, deprecated-urllib-function, xreadlines-attribute, deprecated-sys-function, exception-escape, comprehension-escape # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option # multiple time (only on the command line, not in the configuration file where # it should appear only once). See also the "--disable" option for examples. enable=c-extension-no-member [REPORTS] # Python expression which should return a note less than 10 (10 is the highest # note). You have access to the variables errors warning, statement which # respectively contain the number of errors / warnings messages and the total # number of statements analyzed. This is used by the global evaluation report # (RP0004). evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) # Template used to display messages. This is a python new-style format string # used to format the message information. See doc for all details. #msg-template= # Set the output format. Available formats are text, parseable, colorized, json # and msvs (visual studio). You can also give a reporter class, e.g. # mypackage.mymodule.MyReporterClass. output-format=text # Tells whether to display a full report or only the messages. reports=no # Activate the evaluation score. score=yes [REFACTORING] # Maximum number of nested blocks for function / method body max-nested-blocks=5 # Complete name of functions that never returns. When checking for # inconsistent-return-statements if a never returning function is called then # it will be considered as an explicit return statement and no message will be # printed. never-returning-functions=sys.exit [LOGGING] # Format style used to check logging format string. `old` means using % # formatting, while `new` is for `{}` formatting. logging-format-style=old # Logging modules to check that the string format arguments are in logging # function parameter format. logging-modules=logging [SPELLING] # Limits count of emitted suggestions for spelling mistakes. max-spelling-suggestions=4 # Spelling dictionary name. Available dictionaries: none. To make it working # install python-enchant package.. spelling-dict= # List of comma separated words that should not be checked. spelling-ignore-words= # A path to a file that contains private dictionary; one word per line. spelling-private-dict-file= # Tells whether to store unknown words to indicated private dictionary in # --spelling-private-dict-file option instead of raising a message. spelling-store-unknown-words=no [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. notes=FIXME, XXX, TODO [TYPECHECK] # List of decorators that produce context managers, such as # contextlib.contextmanager. Add to this list to register other decorators that # produce valid context managers. contextmanager-decorators=contextlib.contextmanager # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E1101 when accessed. Python regular # expressions are accepted. generated-members= # Tells whether missing members accessed in mixin class should be ignored. A # mixin class is detected if its name ends with "mixin" (case insensitive). ignore-mixin-members=yes # Tells whether to warn about missing members when the owner of the attribute # is inferred to be None. ignore-none=yes # This flag controls whether pylint should warn about no-member and similar # checks whenever an opaque object is returned when inferring. The inference # can return multiple potential results while evaluating a Python object, but # some branches might not be evaluated, which results in partial inference. In # that case, it might be useful to still emit no-member and other checks for # the rest of the inferred objects. ignore-on-opaque-inference=yes # List of class names for which member attributes should not be checked (useful # for classes with dynamically set attributes). This supports the use of # qualified names. ignored-classes=optparse.Values,thread._local,_thread._local # List of module names for which member attributes should not be checked # (useful for modules/projects where namespaces are manipulated during runtime # and thus existing member attributes cannot be deduced by static analysis. It # supports qualified module names, as well as Unix pattern matching. ignored-modules= # Show a hint with possible names when a member name was not found. The aspect # of finding the hint is based on edit distance. missing-member-hint=yes # The minimum edit distance a name should have in order to be considered a # similar match for a missing member name. missing-member-hint-distance=1 # The total number of similar names that should be taken in consideration when # showing a hint for a missing member. missing-member-max-choices=1 [VARIABLES] # List of additional names supposed to be defined in builtins. Remember that # you should avoid defining new builtins when possible. additional-builtins= # Tells whether unused global variables should be treated as a violation. allow-global-unused-variables=yes # List of strings which can identify a callback function by name. A callback # name must start or end with one of those strings. callbacks=cb_, _cb # A regular expression matching the name of dummy variables (i.e. expected to # not be used). dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ # Argument names that match this expression will be ignored. Default to name # with leading underscore. ignored-argument-names=_.*|^ignored_|^unused_ # Tells whether we should check for unused import in __init__ files. init-import=no # List of qualified module names which can have objects that can redefine # builtins. redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io [FORMAT] # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. expected-line-ending-format= # Regexp for a line that is allowed to be longer than the limit. ignore-long-lines=^\s*(# )??$ # Number of spaces of indent required inside a hanging or continued line. indent-after-paren=4 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 # tab). indent-string=' ' # Maximum number of characters on a single line. max-line-length=100 # Maximum number of lines in a module. max-module-lines=1000 # List of optional constructs for which whitespace checking is disabled. `dict- # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. # `trailing-comma` allows a space between comma and closing bracket: (a, ). # `empty-line` allows space-only lines. no-space-check=trailing-comma, dict-separator # Allow the body of a class to be on the same line as the declaration if body # contains single statement. single-line-class-stmt=no # Allow the body of an if to be on the same line as the test if there is no # else. single-line-if-stmt=no [SIMILARITIES] # Ignore comments when computing similarities. ignore-comments=yes # Ignore docstrings when computing similarities. ignore-docstrings=yes # Ignore imports when computing similarities. ignore-imports=no # Minimum lines number of a similarity. min-similarity-lines=4 [BASIC] # Naming style matching correct argument names. argument-naming-style=snake_case # Regular expression matching correct argument names. Overrides argument- # naming-style. argument-rgx=[a-z_][a-z0-9_]{0,30}$ # Naming style matching correct attribute names. attr-naming-style=snake_case # Regular expression matching correct attribute names. Overrides attr-naming- # style. #attr-rgx= # Bad variable names which should always be refused, separated by a comma. bad-names= # Naming style matching correct class attribute names. class-attribute-naming-style=any # Regular expression matching correct class attribute names. Overrides class- # attribute-naming-style. #class-attribute-rgx= # Naming style matching correct class names. class-naming-style=PascalCase # Regular expression matching correct class names. Overrides class-naming- # style. #class-rgx= # Naming style matching correct constant names. const-naming-style=UPPER_CASE # Regular expression matching correct constant names. Overrides const-naming- # style. #const-rgx= # Minimum line length for functions/classes that require docstrings, shorter # ones are exempt. docstring-min-length=-1 # Naming style matching correct function names. function-naming-style=snake_case # Regular expression matching correct function names. Overrides function- # naming-style. #function-rgx= # Good variable names which should always be accepted, separated by a comma. good-names=i, j, k, x, ex, Run, _ # Include a hint for the correct naming format with invalid-name. include-naming-hint=no # Naming style matching correct inline iteration names. inlinevar-naming-style=any # Regular expression matching correct inline iteration names. Overrides # inlinevar-naming-style. #inlinevar-rgx= # Naming style matching correct method names. method-naming-style=snake_case # Regular expression matching correct method names. Overrides method-naming- # style. #method-rgx= # Naming style matching correct module names. module-naming-style=snake_case # Regular expression matching correct module names. Overrides module-naming- # style. #module-rgx= # Colon-delimited sets of names that determine each other's naming style when # the name regexes allow several styles. name-group= # Regular expression which should only match function or class names that do # not require a docstring. no-docstring-rgx=^_ # List of decorators that produce properties, such as abc.abstractproperty. Add # to this list to register other decorators that produce valid properties. # These decorators are taken in consideration only for invalid-name. property-classes=abc.abstractproperty # Naming style matching correct variable names. variable-naming-style=snake_case # Regular expression matching correct variable names. Overrides variable- # naming-style. variable-rgx=[a-z_][a-z0-9_]{0,30}$ [STRING] # This flag controls whether the implicit-str-concat-in-sequence should # generate a warning on implicit string concatenation in sequences defined over # several lines. check-str-concat-over-line-jumps=no [IMPORTS] # Allow wildcard imports from modules that define __all__. allow-wildcard-with-all=no # Analyse import fallback blocks. This can be used to support both Python 2 and # 3 compatible code, which means that the block might have code that exists # only in one or another interpreter, leading to false positives when analysed. analyse-fallback-blocks=no # Deprecated modules which should not be used, separated by a comma. deprecated-modules=optparse,tkinter.tix # Create a graph of external dependencies in the given file (report RP0402 must # not be disabled). ext-import-graph= # Create a graph of every (i.e. internal and external) dependencies in the # given file (report RP0402 must not be disabled). import-graph= # Create a graph of internal dependencies in the given file (report RP0402 must # not be disabled). int-import-graph= # Force import order to recognize a module as part of the standard # compatibility libraries. known-standard-library= # Force import order to recognize a module as part of a third party library. known-third-party=enchant [CLASSES] # List of method names used to declare (i.e. assign) instance attributes. defining-attr-methods=__init__, __new__, setUp # List of member names, which should be excluded from the protected access # warning. exclude-protected=_asdict, _fields, _replace, _source, _make # List of valid names for the first argument in a class method. valid-classmethod-first-arg=cls # List of valid names for the first argument in a metaclass class method. valid-metaclass-classmethod-first-arg=cls [DESIGN] # Maximum number of arguments for function / method. max-args=5 # Maximum number of attributes for a class (see R0902). max-attributes=7 # Maximum number of boolean expressions in an if statement. max-bool-expr=5 # Maximum number of branch for function / method body. max-branches=12 # Maximum number of locals for function / method body. max-locals=15 # Maximum number of parents for a class (see R0901). max-parents=7 # Maximum number of public methods for a class (see R0904). max-public-methods=20 # Maximum number of return / yield for function / method body. max-returns=6 # Maximum number of statements in function / method body. max-statements=50 # Minimum number of public methods for a class (see R0903). min-public-methods=2 [EXCEPTIONS] # Exceptions that will emit a warning when being caught. Defaults to # "BaseException, Exception". overgeneral-exceptions=BaseException, Exception ================================================ FILE: .readthedocs.yml ================================================ # .readthedocs.yml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: builder: html configuration: doc/conf.py # Optionally set the version of Python and requirements required to build your docs python: version: 3.7 install: - requirements: ci_scripts/docs-requirements.txt ================================================ FILE: BIBLIOGRAPHY.md ================================================ This file contains a list of papers in chronological order that have been published using DeepSpeech. To appear ========== * Raghuveer Peri, Haoqi Li, Krishna Somandepalli, Arindam Jati, Shrikanth Narayanan (2020) "An empirical analysis of information encoded in disentangled neural speaker representations". * Rosana Ardila, Megan Branson, Kelly Davis, Michael Henretty, Michael Kohler, Josh Meyer, Reuben Morais, Lindsay Saunders, Francis M. Tyers, and Gregor Weber (2020) "Common Voice: A Massively-Multilingual Speech Corpus". Published ========== 2020 ---------- * Nils Hjortnaes, Niko Partanen, Michael Rießler and Francis M. Tyers (2020) "Towards a Speech Recognizer for Komi, an Endangered and Low-Resource Uralic Language". *Proceedings of the 6th International Workshop on Computational Linguistics of Uralic Languages*. ``` @inproceedings{hjortnaes:2020, author = {Nils Hjortnaes and Niko Partanen and Michael Rießler and Francis M. Tyers}, title = {Towards a Speech Recognizer for Komi, an Endangered and Low-Resource Uralic Language}, booktitle = {Proceedings of the 6th International Workshop on Computational Linguistics of Uralic Languages}, year = 2020 } ``` 2019 ---------- * Aashish Agarwal and Torsten Zesch (2019) "German End-to-end Speech Recognition based on DeepSpeech". *Proceedings of the 15th Conference on Natural Language Processing (KONVENS 2019)* ``` @inproceedings{agarwal:2019, author = {Aashish Agarwal and Torsten Zesch}, title = {German End-to-end Speech Recognition based on DeepSpeech}, booktitle = {Proceedings of the 15th Conference on Natural Language Processing (KONVENS 2019)}, year = 2019 ``` * Yihong Theis (2019) "Learning to detect named entities in bilingual code-mixed open speech corpora". MA Thesis. Kansas State University. ``` @mastersthesis{theis:2019, author = {Yihong Theis}, title = {Learning to detect named entities in bilingual code-mixed open speech corpora}, school = {Kansas State University}, year = 2019 } ``` * Ruswan Efendi (2019) "Automatic Speech Recognition Bahasa Indonesia Menggunakan Bidirectional Long Short-Term Memory dan Connectionist Temporal Classification". MA Thesis. Universitas Sumatera Utara. ``` @mastersthesis{theis:2019, author = {Ruswan Efendi}, title = {Automatic Speech Recognition Bahasa Indonesia Menggunakan Bidirectional Long Short-Term Memory dan Connectionist Temporal Classification}, school = {Universitas Sumatera Utara}, year = 2019 } ``` 2018 ------------ * Deepthi Karkada and Vikram A. Saletore (2018) "Training Speech Recognition Models on HPC Infrastructure". 2018 IEEE/ACM Machine Learning in HPC Environments (MLHPC), Dallas, TX, USA, pp. 124-132. ``` @inproceedings{karkada:2018, author = {Deepthi Karkada and Vikram A. Saletore}, title = {Training Speech Recognition Models on HPC Infrastructure}, booktitle = {2018 IEEE/ACM Machine Learning in HPC Environments (MLHPC)}, doi = {https://doi.org/10.1109/MLHPC.2018.8638637} year = 2018 } ``` ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Community Participation Guidelines This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). ## How to Report For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page. ================================================ FILE: CODE_OWNERS.rst ================================================ DeepSpeech code owners / governance system ========================================== DeepSpeech is run under a governance system inspired (and partially copied from) by the `Mozilla module ownership system `_. The project is roughly divided into modules, and each module has its own owners, which are responsible for reviewing pull requests and deciding on technical direction for their modules. Module ownership authority is given to people who have worked extensively on areas of the project. Module owners also have the authority of naming other module owners or appointing module peers, which are people with authority to review pull requests in that module. They can also sub-divide their module into sub-modules with their own owners. Module owners are not tyrants. They are chartered to make decisions with input from the community and in the best interests of the community. Module owners are not required to make code changes or additions solely because the community wants them to do so. (Like anyone else, the module owners may write code because they want to, because their employers want them to, because the community wants them to, or for some other reason.) Module owners do need to pay attention to patches submitted to that module. However “pay attention” does not mean agreeing to every patch. Some patches may not make sense for the WebThings project; some may be poorly implemented. Module owners have the authority to decline a patch; this is a necessary part of the role. We ask the module owners to describe in the relevant issue their reasons for wanting changes to a patch, for declining it altogether, or for postponing review for some period. We don’t ask or expect them to rewrite patches to make them acceptable. Similarly, module owners may need to delay review of a promising patch due to an upcoming deadline. For example, a patch may be of interest, but not for the next milestone. In such a case it may make sense for the module owner to postpone review of a patch until after matters needed for a milestone have been finalized. Again, we expect this to be described in the relevant issue. And of course, it shouldn’t go on very often or for very long or escalation and review is likely. The work of the various module owners and peers is overseen by the global owners, which are responsible for making final decisions in case there's conflict between owners as well as set the direction for the project as a whole. This file describes module owners who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping. There's overlap in the areas of expertise of each owner, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate person. Global owners ---------------- These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other owners, global owners will make a final decision. - Alexandre Lissy (@lissyx) - Reuben Morais (@reuben) Training, feeding ----------------- - Reuben Morais (@reuben) Model exporting --------------- - Alexandre Lissy (@lissyx) Transfer learning ----------------- - Josh Meyer (@JRMeyer) - Reuben Morais (@reuben) Testing & CI ------------ - Alexandre Lissy (@lissyx) - Reuben Morais (@reuben) Native inference client ----------------------- Everything that goes into libdeepspeech.so and is not specifically covered in another area fits here. - Alexandre Lissy (@lissyx) - Reuben Morais (@reuben) Streaming decoder ----------------- - Reuben Morais (@reuben) - @dabinat Python bindings --------------- - Alexandre Lissy (@lissyx) - Reuben Morais (@reuben) Java Bindings ------------- - Alexandre Lissy (@lissyx) JavaScript/NodeJS/ElectronJS bindings ------------------------------------- - Alexandre Lissy (@lissyx) - Reuben Morais (@reuben) .NET bindings ------------- - Carlos Fonseca (@carlfm01) Swift bindings -------------- - Reuben Morais (@reuben) Android support --------------- - Alexandre Lissy (@lissyx) Raspberry Pi support -------------------- - Alexandre Lissy (@lissyx) Windows support --------------- - Carlos Fonseca (@carlfm01) iOS support ----------- - Reuben Morais (@reuben) Documentation ------------- - Alexandre Lissy (@lissyx) - Reuben Morais (@reuben) Third party bindings -------------------- Hosted externally and owned by the individual authors. See the `list of third-party bindings `_ for more info. ================================================ FILE: CONTRIBUTING.rst ================================================ Contribution guidelines ======================= Welcome to the DeepSpeech project! We are excited to see your interest, and appreciate your support! This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the `Mozilla Community Participation Guidelines `_. How to Make a Good Pull Request ------------------------------- Here's some guidelines on how to make a good PR to DeepSpeech. Bug-fix PR ^^^^^^^^^^ You've found a bug and you were able to squash it! Great job! Please write a short but clear commit message describing the bug, and how you fixed it. This makes review much easier. Also, please name your branch something related to the bug-fix. Documentation PR ^^^^^^^^^^^^^^^^ If you're just making updates or changes to the documentation, there's no need to run all of DeepSpeech's tests for Continuous Integration (i.e. Taskcluster tests). In this case, at the end of your short but clear commit message, you should add **X-DeepSpeech: NOBUILD**. This will trigger the CI tests to skip your PR, saving both time and compute. New Feature PR ^^^^^^^^^^^^^^ You've made some core changes to DeepSpeech, and you would like to share them back with the community -- great! First things first: if you're planning to add a feature (not just fix a bug or docs) let the DeepSpeech team know ahead of time and get some feedback early. A quick check-in with the team can save time during code-review, and also ensure that your new feature fits into the project. The DeepSpeech codebase is made of many connected parts. There is Python code for training DeepSpeech, core C++ code for running inference on trained models, and multiple language bindings to the C++ core so you can use DeepSpeech in your favorite language. Whenever you add a new feature to DeepSpeech and what to contribute that feature back to the project, here are some things to keep in mind: 1. You've made changes to the core C++ code. Core changes can have downstream effects on all parts of the DeepSpeech project, so keep that in mind. You should minimally also make necessary changes to the C client (i.e. **args.h** and **client.cc**). The bindings for Python, Java, and Javascript are SWIG generated, and in the best-case scenario you won't have to worry about them. However, if you've added a whole new feature, you may need to make custom tweaks to those bindings, because SWIG may not automagically work with your new feature, especially if you've exposed new arguments. The bindings for .NET and Swift are not generated automatically. It would be best if you also made the necessary manual changes to these bindings as well. It is best to communicate with the core DeepSpeech team and come to an understanding of where you will likely need to work with the bindings. They can't predict all the bugs you will run into, but they will have a good idea of how to plan for some obvious challenges. 2. You've made changes to the Python code. Make sure you run a linter (described below). 3. Make sure your new feature doesn't regress the project. If you've added a significant feature or amount of code, you want to be sure your new feature doesn't create performance issues. For example, if you've made a change to the DeepSpeech decoder, you should know that inference performance doesn't drop in terms of latency, accuracy, or memory usage. Unless you're proposing a new decoding algorithm, you probably don't have to worry about affecting accuracy. However, it's very possible you've affected latency or memory usage. You should run local performance tests to make sure no bugs have crept in. There are lots of tools to check latency and memory usage, and you should use what is most comfortable for you and gets the job done. If you're on Linux, you might find [[perf](https://perf.wiki.kernel.org/index.php/Main_Page)] to be a useful tool. You can use sample WAV files for testing which are provided in the `DeepSpeech/data/` directory. Requesting review on your PR ---------------------------- Generally, a code owner will be notified of your pull request and will either review it or ask some other code owner for their review. If you'd like to proactively request review as you open the PR, see the the CODE_OWNERS.rst file which describes who's an appropriate reviewer depending on which parts of the code you're changing. Python Linter ------------- Before making a Pull Request for Python code changes, check your changes for basic mistakes and style problems by using a linter. We have cardboardlinter setup in this repository, so for example, if you've made some changes and would like to run the linter on just the changed code, you can use the follow command: .. code-block:: bash pip install pylint cardboardlint cardboardlinter --refspec master This will compare the code against master and run the linter on all the changes. We plan to introduce more linter checks (e.g. for C++) in the future. To run it automatically as a git pre-commit hook, do the following: .. code-block:: bash cat <<\EOF > .git/hooks/pre-commit #!/bin/bash if [ ! -x "$(command -v cardboardlinter)" ]; then exit 0 fi # First, stash index and work dir, keeping only the # to-be-committed changes in the working directory. echo "Stashing working tree changes..." 1>&2 old_stash=$(git rev-parse -q --verify refs/stash) git stash save -q --keep-index new_stash=$(git rev-parse -q --verify refs/stash) # If there were no changes (e.g., `--amend` or `--allow-empty`) # then nothing was stashed, and we should skip everything, # including the tests themselves. (Presumably the tests passed # on the previous commit, so there is no need to re-run them.) if [ "$old_stash" = "$new_stash" ]; then echo "No changes, skipping lint." 1>&2 exit 0 fi # Run tests cardboardlinter --refspec HEAD -n auto status=$? # Restore changes echo "Restoring working tree changes..." 1>&2 git reset --hard -q && git stash apply --index -q && git stash drop -q # Exit with status from test-run: nonzero prevents commit exit $status EOF chmod +x .git/hooks/pre-commit This will run the linters on just the changes made in your commit. ================================================ FILE: DeepSpeech.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function if __name__ == '__main__': try: from deepspeech_training import train as ds_train except ImportError: print('Training package is not installed. See training documentation.') raise ds_train.run_script() ================================================ FILE: Dockerfile.build.tmpl ================================================ # Please refer to the USING documentation, "Dockerfile for building from source" # Need devel version cause we need /usr/include/cudnn.h FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \ DEEPSPEECH_SHA=#DEEPSPEECH_SHA# # >> START Install base software # Get basic packages RUN apt-get update && apt-get install -y --no-install-recommends \ apt-utils \ bash-completion \ build-essential \ ca-certificates \ cmake \ curl \ g++ \ gcc \ git \ libbz2-dev \ libboost-all-dev \ libgsm1-dev \ libltdl-dev \ liblzma-dev \ libmagic-dev \ libpng-dev \ libsox-fmt-mp3 \ libsox-dev \ locales \ openjdk-8-jdk \ pkg-config \ python3 \ python3-dev \ python3-pip \ python3-wheel \ python3-numpy \ sox \ unzip \ wget \ zlib1g-dev; \ update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \ update-alternatives --install /usr/bin/python python /usr/bin/python3 1; \ # Install Bazel \ curl -LO "https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel_3.1.0-linux-x86_64.deb" && dpkg -i bazel_*.deb; \ # Try and free some space \ rm -rf /var/lib/apt/lists/* bazel_*.deb # << END Install base software # >> START Configure Tensorflow Build # GPU Environment Setup ENV TF_NEED_ROCM=0 \ TF_NEED_OPENCL_SYCL=0 \ TF_NEED_OPENCL=0 \ TF_NEED_CUDA=1 \ TF_CUDA_PATHS="/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" \ TF_CUDA_VERSION=10.1 \ TF_CUDNN_VERSION=7.6 \ TF_CUDA_COMPUTE_CAPABILITIES=6.0 \ TF_NCCL_VERSION=2.8 \ # Common Environment Setup \ TF_BUILD_CONTAINER_TYPE=GPU \ TF_BUILD_OPTIONS=OPT \ TF_BUILD_DISABLE_GCP=1 \ TF_BUILD_ENABLE_XLA=0 \ TF_BUILD_PYTHON_VERSION=PYTHON3 \ TF_BUILD_IS_OPT=OPT \ TF_BUILD_IS_PIP=PIP \ # Build client.cc and install Python client and decoder bindings \ TFDIR=/DeepSpeech/tensorflow \ # Allow Python printing utf-8 \ PYTHONIOENCODING=UTF-8 \ # Other Parameters \ CC_OPT_FLAGS="-mavx -mavx2 -msse4.1 -msse4.2 -mfma" \ TF_NEED_GCP=0 \ TF_NEED_HDFS=0 \ TF_NEED_JEMALLOC=1 \ TF_NEED_OPENCL=0 \ TF_CUDA_CLANG=0 \ TF_NEED_MKL=0 \ TF_ENABLE_XLA=0 \ TF_NEED_AWS=0 \ TF_NEED_KAFKA=0 \ TF_NEED_NGRAPH=0 \ TF_DOWNLOAD_CLANG=0 \ TF_NEED_TENSORRT=0 \ TF_NEED_GDR=0 \ TF_NEED_VERBS=0 \ TF_NEED_OPENCL_SYCL=0 \ PYTHON_BIN_PATH=/usr/bin/python3.6 \ PYTHON_LIB_PATH=/usr/local/lib/python3.6/dist-packages # << END Configure Tensorflow Build # >> START Configure Bazel # Running bazel inside a `docker build` command causes trouble, cf: # https://github.com/bazelbuild/bazel/issues/134 # The easiest solution is to set up a bazelrc file forcing --batch. # Similarly, we need to workaround sandboxing issues: # https://github.com/bazelbuild/bazel/issues/418 RUN echo "startup --batch" >>/etc/bazel.bazelrc; \ echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >> /etc/bazel.bazelrc # << END Configure Bazel WORKDIR / RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech && \ cd /DeepSpeech && \ git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA; \ git submodule sync tensorflow/ && git submodule update --init tensorflow/; \ git submodule sync kenlm/ && git submodule update --init kenlm/ # >> START Build and bind # Fix for not found script https://github.com/tensorflow/tensorflow/issues/471 # Using CPU optimizations: # -mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx. # Adding --config=cuda flag to build using CUDA. # passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment # Build DeepSpeech RUN cd /DeepSpeech/tensorflow && ./configure && bazel build \ --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \ --config=monolithic \ --config=cuda \ -c opt \ --copt=-O3 \ --copt="-D_GLIBCXX_USE_CXX11_ABI=0" \ --copt=-mtune=generic \ --copt=-march=x86-64 \ --copt=-msse \ --copt=-msse2 \ --copt=-msse3 \ --copt=-msse4.1 \ --copt=-msse4.2 \ --copt=-mavx \ --copt=-fvisibility=hidden \ //native_client:libdeepspeech.so \ --verbose_failures \ --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} && \ cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ && \ rm -fr /root/.cache/* RUN cd /DeepSpeech/native_client && make NUM_PROCESSES=$(nproc) deepspeech ; \ cd /DeepSpeech/native_client/python && make NUM_PROCESSES=$(nproc) bindings; \ pip3 install --upgrade dist/*.whl; \ cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings; \ pip3 install --upgrade dist/*.whl # << END Build and bind # Build KenLM in /DeepSpeech/kenlm folder WORKDIR /DeepSpeech/kenlm RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj; \ mkdir -p build && \ cd build && \ EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \ make -j $(nproc) # Done WORKDIR /DeepSpeech ================================================ FILE: Dockerfile.train.tmpl ================================================ # Please refer to the TRAINING documentation, "Basic Dockerfile for training" FROM tensorflow/tensorflow:1.15.4-gpu-py3 ENV DEBIAN_FRONTEND=noninteractive \ DEEPSPEECH_REPO=#DEEPSPEECH_REPO# \ DEEPSPEECH_SHA=#DEEPSPEECH_SHA# RUN apt-get update && apt-get install -y --no-install-recommends \ apt-utils \ bash-completion \ build-essential \ cmake \ curl \ git \ libboost-all-dev \ libbz2-dev \ liblzma-dev \ locales \ python3-venv \ unzip \ xz-utils \ wget && \ # We need to remove it because it's breaking deepspeech install later with \ # weird errors about setuptools \ apt-get purge -y python3-xdg && \ # Install dependencies for audio augmentation \ apt-get install -y --no-install-recommends libopus0 libsndfile1 && \ # Try and free some space \ rm -rf /var/lib/apt/lists/* WORKDIR / RUN git clone $DEEPSPEECH_REPO DeepSpeech && \ cd /DeepSpeech && git fetch origin $DEEPSPEECH_SHA && git checkout $DEEPSPEECH_SHA && \ git submodule sync kenlm/ && git submodule update --init kenlm/ # Build CTC decoder first, to avoid clashes on incompatible versions upgrades RUN cd /DeepSpeech/native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings && \ pip3 install --upgrade dist/*.whl # Prepare deps RUN cd /DeepSpeech && pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0 && \ # Install DeepSpeech \ # - No need for the decoder since we did it earlier \ # - There is already correct TensorFlow GPU installed on the base image, \ # we don't want to break that \ DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . && \ # Tool to convert output graph for inference \ curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > convert_graphdef_memmapped_format && \ chmod +x convert_graphdef_memmapped_format # Build KenLM to generate new scorers WORKDIR /DeepSpeech/kenlm RUN wget -O - https://gitlab.com/libeigen/eigen/-/archive/3.3.8/eigen-3.3.8.tar.bz2 | tar xj && \ mkdir -p build && \ cd build && \ EIGEN3_ROOT=/DeepSpeech/kenlm/eigen-3.3.8 cmake .. && \ make -j $(nproc) WORKDIR /DeepSpeech RUN ./bin/run-ldc93s1.sh ================================================ FILE: ISSUE_TEMPLATE.md ================================================ For support and discussions, please use our [Discourse forums](https://discourse.mozilla.org/c/deep-speech). If you've found a bug, or have a feature request, then please create an issue with the following information: - **Have I written custom code (as opposed to running examples on an unmodified clone of the repository)**: - **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**: - **TensorFlow installed from (our builds, or upstream TensorFlow)**: - **TensorFlow version (use command below)**: - **Python version**: - **Bazel version (if compiling from source)**: - **GCC/Compiler version (if compiling from source)**: - **CUDA/cuDNN version**: - **GPU model and memory**: - **Exact command to reproduce**: You can obtain the TensorFlow version with ```bash python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)" ``` Please describe the problem clearly. Be sure to convey here why it's a bug or a feature request. Include any logs or source code that would be helpful to diagnose the problem. For larger logs, link to a Gist, not a screenshot. If including tracebacks, please include the full traceback. Try to provide a reproducible test case. ================================================ FILE: LICENSE ================================================ Mozilla Public License Version 2.0 ================================== 1. Definitions -------------- 1.1. "Contributor" means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. 1.2. "Contributor Version" means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution. 1.3. "Contribution" means Covered Software of a particular Contributor. 1.4. "Covered Software" means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. 1.5. "Incompatible With Secondary Licenses" means (a) that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or (b) that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. 1.6. "Executable Form" means any form of the work other than Source Code Form. 1.7. "Larger Work" means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" means this document. 1.9. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. 1.10. "Modifications" means any of the following: (a) any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or (b) any new file in Source Code Form that contains any Covered Software. 1.11. "Patent Claims" of a Contributor means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. 1.12. "Secondary License" means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. 1.13. "Source Code Form" means the form of the work preferred for making modifications. 1.14. "You" (or "Your") means an individual or a legal entity exercising rights under this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. 2. License Grants and Conditions -------------------------------- 2.1. Grants Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: (a) under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and (b) under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. 2.2. Effective Date The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. 2.3. Limitations on Grant Scope The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: (a) for any code that a Contributor has removed from Covered Software; or (b) for infringements caused by: (i) Your and any other third party's modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or (c) under Patent Claims infringed by Covered Software in the absence of its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). 2.4. Subsequent Licenses No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). 2.5. Representation Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. 2.6. Fair Use This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. 3. Responsibilities ------------------- 3.1. Distribution of Source Form All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form. 3.2. Distribution of Executable Form If You distribute Covered Software in Executable Form then: (a) such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and (b) You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License. 3.3. Distribution of a Larger Work You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). 3.4. Notices You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. 3.5. Application of Additional Terms You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. 4. Inability to Comply Due to Statute or Regulation --------------------------------------------------- If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. 5. Termination -------------- 5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. 5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. ************************************************************************ * * * 6. Disclaimer of Warranty * * ------------------------- * * * * Covered Software is provided under this License on an "as is" * * basis, without warranty of any kind, either expressed, implied, or * * statutory, including, without limitation, warranties that the * * Covered Software is free of defects, merchantable, fit for a * * particular purpose or non-infringing. The entire risk as to the * * quality and performance of the Covered Software is with You. * * Should any Covered Software prove defective in any respect, You * * (not any Contributor) assume the cost of any necessary servicing, * * repair, or correction. This disclaimer of warranty constitutes an * * essential part of this License. No use of any Covered Software is * * authorized under this License except under this disclaimer. * * * ************************************************************************ ************************************************************************ * * * 7. Limitation of Liability * * -------------------------- * * * * Under no circumstances and under no legal theory, whether tort * * (including negligence), contract, or otherwise, shall any * * Contributor, or anyone who distributes Covered Software as * * permitted above, be liable to You for any direct, indirect, * * special, incidental, or consequential damages of any character * * including, without limitation, damages for lost profits, loss of * * goodwill, work stoppage, computer failure or malfunction, or any * * and all other commercial damages or losses, even if such party * * shall have been informed of the possibility of such damages. This * * limitation of liability shall not apply to liability for death or * * personal injury resulting from such party's negligence to the * * extent applicable law prohibits such limitation. Some * * jurisdictions do not allow the exclusion or limitation of * * incidental or consequential damages, so this exclusion and * * limitation may not apply to You. * * * ************************************************************************ 8. Litigation ------------- Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims. 9. Miscellaneous ---------------- This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. 10. Versions of the License --------------------------- 10.1. New Versions Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. 10.2. Effect of New Versions You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. 10.3. Modified Versions If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. Exhibit A - Source Code Form License Notice ------------------------------------------- This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. You may add additional accurate notices of copyright ownership. Exhibit B - "Incompatible With Secondary Licenses" Notice --------------------------------------------------------- This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0. ================================================ FILE: Makefile ================================================ DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git DEEPSPEECH_SHA ?= master Dockerfile%: Dockerfile%.tmpl sed \ -e "s|#DEEPSPEECH_REPO#|$(DEEPSPEECH_REPO)|g" \ -e "s|#DEEPSPEECH_SHA#|$(DEEPSPEECH_SHA)|g" \ < $< > $@ ================================================ FILE: README.rst ================================================ Status ====== This project is now discontinued. Project DeepSpeech ================== .. image:: https://readthedocs.org/projects/deepspeech/badge/?version=latest :target: https://deepspeech.readthedocs.io/?badge=latest :alt: Documentation .. image:: https://github.com/mozilla/DeepSpeech/actions/workflows/macOS-amd64.yml/badge.svg :target: https://github.com/mozilla/DeepSpeech/actions/workflows/macOS-amd64.yml :alt: macOS builds .. image:: https://github.com/mozilla/DeepSpeech/actions/workflows/lint.yml/badge.svg :target: https://github.com/mozilla/DeepSpeech/actions/workflows/lint.yml :alt: Linters .. image:: https://github.com/mozilla/DeepSpeech/actions/workflows/docker.yml/badge.svg :target: https://github.com/mozilla/DeepSpeech/actions/workflows/docker.yml :alt: Docker Images DeepSpeech is an open-source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper `_. Project DeepSpeech uses Google's `TensorFlow `_ to make the implementation easier. Documentation for installation, usage, and training models are available on `deepspeech.readthedocs.io `_. For the latest release, including pre-trained models and checkpoints, `see the latest release on GitHub `_. For contribution guidelines, see `CONTRIBUTING.rst `_. For contact and support information, see `SUPPORT.rst `_. ================================================ FILE: RELEASE.rst ================================================ Making a (new) release of the codebase ====================================== * Update version in VERSION file, commit * Open PR, ensure all tests are passing properly * Merge the PR * Fetch the new master, tag it with (hopefully) the same version as in VERSION * Push that to Github * New build should be triggered and new packages should be made * TaskCluster should schedule a merge build **including** a "DeepSpeech Packages" task ================================================ FILE: SUPPORT.rst ================================================ .. _support: Contact/Getting Help ==================== There are several ways to contact us or to get help: #. `Discourse Forums `_ - The `Deep Speech category on Discourse `_ is the first place to look. Search for keywords related to your question or problem to see if someone else has run into it already. If you can't find anything relevant there, search on our `issue tracker `_ to see if there is an existing issue about your problem. #. `Matrix chat `_ - If your question is not addressed by either the `FAQ `_ or `Discourse Forums `_\ , you can contact us on the ``#machinelearning`` channel on `Mozilla Matrix `_\ ; people there can try to answer/help #. `Create a new issue `_ - Finally, if you have a bug report or a feature request that isn't already covered by an existing issue, please open an issue in our repo and fill the appropriate information on your hardware and software setup. ================================================ FILE: bazel.patch ================================================ diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java index c7aa4cb63..e084bc27c 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java @@ -28,6 +28,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.io.PrintWriter; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; @@ -73,6 +74,8 @@ public final class FileWriteAction extends AbstractFileWriteAction { */ private final CharSequence fileContents; + private final Artifact output; + /** Minimum length (in chars) for content to be eligible for compression. */ private static final int COMPRESS_CHARS_THRESHOLD = 256; @@ -90,6 +93,7 @@ public final class FileWriteAction extends AbstractFileWriteAction { fileContents = new CompressedString((String) fileContents); } this.fileContents = fileContents; + this.output = output; } /** @@ -230,11 +234,32 @@ public final class FileWriteAction extends AbstractFileWriteAction { */ @Override protected String computeKey() { + // System.err.println("src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java => output: " + output.getExecPath()); + // ".ckd" Compute Key Debug + PrintWriter computeKeyDebugWriter = null; + String computeKeyDebugFile = output.getExecPath() + ".FileWriteAction.ckd"; + try { + computeKeyDebugWriter = new PrintWriter(computeKeyDebugFile, "UTF-8"); + } catch (java.io.FileNotFoundException ex) { + System.err.println("Unable to create " + computeKeyDebugFile); + } catch (java.io.UnsupportedEncodingException ex) { + System.err.println("Unsupported encoding"); + } + Fingerprint f = new Fingerprint(); f.addString(GUID); + computeKeyDebugWriter.println("GUID: " + GUID); + f.addString(String.valueOf(makeExecutable)); + computeKeyDebugWriter.println("MAKEEXECUTABLE: " + String.valueOf(makeExecutable)); + f.addString(getFileContents()); - return f.hexDigestAndReset(); + computeKeyDebugWriter.println("FILECONTENTS: " + getFileContents()); + + String rv = f.hexDigestAndReset(); + computeKeyDebugWriter.println("KEY: " + rv); + computeKeyDebugWriter.close(); + return rv; } /** diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/SpawnAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/SpawnAction.java index 580788160..26883eb92 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/SpawnAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/SpawnAction.java @@ -60,6 +60,7 @@ import com.google.devtools.build.lib.util.ShellEscaper; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.protobuf.GeneratedMessage.GeneratedExtension; import java.nio.charset.Charset; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; @@ -91,6 +92,9 @@ public class SpawnAction extends AbstractAction implements ExecutionInfoSpecifie private final CommandLine argv; + private final Iterable inputs; + private final Iterable outputs; + private final boolean executeUnconditionally; private final boolean isShellCommand; private final String progressMessage; @@ -197,6 +201,9 @@ public class SpawnAction extends AbstractAction implements ExecutionInfoSpecifie this.mnemonic = mnemonic; this.executeUnconditionally = executeUnconditionally; this.extraActionInfoSupplier = extraActionInfoSupplier; + + this.inputs = inputs; + this.outputs = outputs; } @Override @@ -312,23 +319,89 @@ public class SpawnAction extends AbstractAction implements ExecutionInfoSpecifie @Override protected String computeKey() { + boolean genruleSetup = String.valueOf(Iterables.get(inputs, 0).getExecPath()).contains("genrule/genrule-setup.sh"); + boolean validGenrule = genruleSetup && (Iterables.size(inputs) > 1); + + String genruleScript = null; + if (validGenrule) { + genruleScript = String.valueOf(Iterables.get(inputs, 1).getExecPath()); + } + + // ".ckd" Compute Key Debug + PrintWriter computeKeyDebugWriter = null; + if (validGenrule) { + String computeKeyDebugFile = genruleScript + ".SpawnAction.ckd"; + try { + computeKeyDebugWriter = new PrintWriter(computeKeyDebugFile, "UTF-8"); + } catch (java.io.FileNotFoundException ex) { + System.err.println("Unable to create " + computeKeyDebugFile); + } catch (java.io.UnsupportedEncodingException ex) { + System.err.println("Unsupported encoding"); + } + } + + validGenrule = validGenrule && (computeKeyDebugWriter != null); + Fingerprint f = new Fingerprint(); f.addString(GUID); + if (validGenrule) { computeKeyDebugWriter.println("GUID: " + GUID); } + f.addStrings(argv.arguments()); + if (validGenrule) { + for (String input : argv.arguments()) { + computeKeyDebugWriter.println("ARGUMENTS: " + input); + } + } + f.addString(getMnemonic()); + if (validGenrule) { computeKeyDebugWriter.println("MNEMONIC: " + getMnemonic()); } + // We don't need the toolManifests here, because they are a subset of the inputManifests by // definition and the output of an action shouldn't change whether something is considered a // tool or not. f.addPaths(getRunfilesSupplier().getRunfilesDirs()); + if (validGenrule) { + for (PathFragment path : getRunfilesSupplier().getRunfilesDirs()) { + computeKeyDebugWriter.println("RUNFILESDIRS: " + path.getPathString()); + } + } + ImmutableList runfilesManifests = getRunfilesSupplier().getManifests(); f.addInt(runfilesManifests.size()); + if (validGenrule) { computeKeyDebugWriter.println("RUNFILESMANIFESTSSIZE: " + runfilesManifests.size()); } + for (Artifact runfilesManifest : runfilesManifests) { f.addPath(runfilesManifest.getExecPath()); + if (validGenrule) { computeKeyDebugWriter.println("RUNFILESMANIFEST: " + runfilesManifest.getExecPath().getPathString()); } } + f.addStringMap(getEnvironment()); + if (validGenrule) { + for (Map.Entry entry : getEnvironment().entrySet()) { + computeKeyDebugWriter.println("ENV: " + entry.getKey() + "=" + entry.getValue()); + } + } + f.addStrings(getClientEnvironmentVariables()); + if (validGenrule) { + for (String input : argv.arguments()) { + computeKeyDebugWriter.println("CLIENTENV: " + input); + } + } + f.addStringMap(getExecutionInfo()); - return f.hexDigestAndReset(); + if (validGenrule) { + for (Map.Entry entry : executionInfo.entrySet()) { + computeKeyDebugWriter.println("EXECINFO: " + entry.getKey() + "=" + entry.getValue()); + } + } + + String rv = f.hexDigestAndReset(); + if (validGenrule) { + computeKeyDebugWriter.println("KEY: " + rv); + computeKeyDebugWriter.close(); + } + return rv; } @Override diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java index 3559fffde..3ba39617c 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java @@ -1111,10 +1111,30 @@ public class CppCompileAction extends AbstractAction @Override public String computeKey() { + // ".ckd" Compute Key Debug + PrintWriter computeKeyDebugWriter = null; + String computeKeyDebugFile = getInternalOutputFile() + ".CppCompileAction.ckd"; + try { + computeKeyDebugWriter = new PrintWriter(computeKeyDebugFile, "UTF-8"); + } catch (java.io.FileNotFoundException ex) { + System.err.println("Unable to create " + computeKeyDebugFile); + } catch (java.io.UnsupportedEncodingException ex) { + System.err.println("Unsupported encoding"); + } + Fingerprint f = new Fingerprint(); f.addUUID(actionClassId); + computeKeyDebugWriter.println("UUID: " + actionClassId); + f.addStringMap(getEnvironment()); + for (Map.Entry entry : getEnvironment().entrySet()) { + computeKeyDebugWriter.println("ENV: " + entry.getKey() + "=" + entry.getValue()); + } + f.addStringMap(executionInfo); + for (Map.Entry entry : executionInfo.entrySet()) { + computeKeyDebugWriter.println("EXECINFO: " + entry.getKey() + "=" + entry.getValue()); + } // For the argv part of the cache key, ignore all compiler flags that explicitly denote module // file (.pcm) inputs. Depending on input discovery, some of the unused ones are removed from @@ -1124,6 +1144,9 @@ public class CppCompileAction extends AbstractAction // A better long-term solution would be to make the compiler to find them automatically and // never hand in the .pcm files explicitly on the command line in the first place. f.addStrings(compileCommandLine.getArgv(getInternalOutputFile(), null)); + for (String input : compileCommandLine.getArgv(getInternalOutputFile(), null)) { + computeKeyDebugWriter.println("COMMAND: " + input); + } /* * getArgv() above captures all changes which affect the compilation @@ -1133,19 +1156,31 @@ public class CppCompileAction extends AbstractAction * have changed, otherwise we might miss some errors. */ f.addPaths(context.getDeclaredIncludeDirs()); + for (PathFragment path : context.getDeclaredIncludeDirs()) { + computeKeyDebugWriter.println("DECLAREDINCLUDEDIRS: " + path.getPathString()); + } f.addPaths(context.getDeclaredIncludeWarnDirs()); + for (PathFragment path : context.getDeclaredIncludeWarnDirs()) { + computeKeyDebugWriter.println("DECLAREDINCLUDEWARNDIRS: " + path.getPathString()); + } for (Artifact declaredIncludeSrc : context.getDeclaredIncludeSrcs()) { f.addPath(declaredIncludeSrc.getExecPath()); + computeKeyDebugWriter.println("DECLAREDINCLUDESRCS: " + declaredIncludeSrc.getExecPath().getPathString()); } f.addInt(0); // mark the boundary between input types for (Artifact input : getMandatoryInputs()) { f.addPath(input.getExecPath()); + computeKeyDebugWriter.println("MANDATORYINPUTS: " + input.getExecPath().getPathString()); } f.addInt(0); for (Artifact input : prunableInputs) { f.addPath(input.getExecPath()); + computeKeyDebugWriter.println("PRUNABLEINPUTS: " + input.getExecPath().getPathString()); } - return f.hexDigestAndReset(); + String rv = f.hexDigestAndReset(); + computeKeyDebugWriter.println("KEY: " + rv); + computeKeyDebugWriter.close(); + return rv; } @Override ================================================ FILE: bin/README.rst ================================================ Utility scripts =============== This folder contains scripts that can be used to do training on the various included importers from the command line. This is useful to be able to run training without a browser open, or unattended on a remote machine. They should be run from the base directory of the repository. Note that the default settings assume a very well-specified machine. In the situation that out-of-memory errors occur, you may find decreasing the values of ``--train_batch_size``\ , ``--dev_batch_size`` and ``--test_batch_size`` will allow you to continue, at the expense of speed. ================================================ FILE: bin/compare_samples.py ================================================ #!/usr/bin/env python """ Tool for comparing two wav samples """ import sys import argparse import numpy as np from deepspeech_training.util.audio import AUDIO_TYPE_NP, mean_dbfs from deepspeech_training.util.sample_collections import load_sample def fail(message): print(message, file=sys.stderr, flush=True) sys.exit(1) def compare_samples(): sample1 = load_sample(CLI_ARGS.sample1).unpack() sample2 = load_sample(CLI_ARGS.sample2).unpack() if sample1.audio_format != sample2.audio_format: fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format)) if abs(sample1.duration - sample2.duration) > 0.001: fail('Samples differ on: duration ({} and {})'.format(sample1.duration, sample2.duration)) sample1.change_audio_type(AUDIO_TYPE_NP) sample2.change_audio_type(AUDIO_TYPE_NP) samples = [sample1, sample2] largest = np.argmax([sample1.audio.shape[0], sample2.audio.shape[0]]) smallest = (largest + 1) % 2 samples[largest].audio = samples[largest].audio[:len(samples[smallest].audio)] audio_diff = samples[largest].audio - samples[smallest].audio diff_dbfs = mean_dbfs(audio_diff) differ_msg = 'Samples differ on: sample data ({:0.2f} dB difference) '.format(diff_dbfs) equal_msg = 'Samples are considered equal ({:0.2f} dB difference)'.format(diff_dbfs) if CLI_ARGS.if_differ: if diff_dbfs <= CLI_ARGS.threshold: fail(equal_msg) if not CLI_ARGS.no_success_output: print(differ_msg, file=sys.stderr, flush=True) else: if diff_dbfs > CLI_ARGS.threshold: fail(differ_msg) if not CLI_ARGS.no_success_output: print(equal_msg, file=sys.stderr, flush=True) def handle_args(): parser = argparse.ArgumentParser( description="Tool for checking similarity of two samples" ) parser.add_argument("sample1", help="Filename of sample 1 to compare") parser.add_argument("sample2", help="Filename of sample 2 to compare") parser.add_argument("--threshold", type=float, default=-60.0, help="dB of sample deltas above which they are considered different") parser.add_argument( "--if-differ", action="store_true", help="If to succeed and return status code 0 on different signals and fail on equal ones (inverse check)." "This will still fail on different formats or durations.", ) parser.add_argument( "--no-success-output", action="store_true", help="Stay silent on success (if samples are equal of - with --if-differ - samples are not equal)", ) return parser.parse_args() if __name__ == "__main__": CLI_ARGS = handle_args() compare_samples() ================================================ FILE: bin/data_set_tool.py ================================================ #!/usr/bin/env python ''' Tool for building a combined SDB or CSV sample-set from other sets Use 'python3 data_set_tool.py -h' for help ''' import sys import argparse import progressbar from pathlib import Path from deepspeech_training.util.audio import ( AUDIO_TYPE_PCM, AUDIO_TYPE_OPUS, AUDIO_TYPE_WAV, change_audio_types, ) from deepspeech_training.util.downloader import SIMPLE_BAR from deepspeech_training.util.sample_collections import ( CSVWriter, DirectSDBWriter, TarWriter, samples_from_sources, ) from deepspeech_training.util.augmentations import ( parse_augmentations, apply_sample_augmentations, SampleAugmentation ) AUDIO_TYPE_LOOKUP = {'wav': AUDIO_TYPE_WAV, 'opus': AUDIO_TYPE_OPUS} def build_data_set(): audio_type = AUDIO_TYPE_LOOKUP[CLI_ARGS.audio_type] augmentations = parse_augmentations(CLI_ARGS.augment) if any(not isinstance(a, SampleAugmentation) for a in augmentations): print('Warning: Some of the specified augmentations will not get applied, as this tool only supports ' 'overlay, codec, reverb, resample and volume.') extension = Path(CLI_ARGS.target).suffix.lower() labeled = not CLI_ARGS.unlabeled if extension == '.csv': writer = CSVWriter(CLI_ARGS.target, absolute_paths=CLI_ARGS.absolute_paths, labeled=labeled) elif extension == '.sdb': writer = DirectSDBWriter(CLI_ARGS.target, audio_type=audio_type, labeled=labeled) elif extension == '.tar': writer = TarWriter(CLI_ARGS.target, labeled=labeled, gz=False, include=CLI_ARGS.include) elif extension == '.tgz' or CLI_ARGS.target.lower().endswith('.tar.gz'): writer = TarWriter(CLI_ARGS.target, labeled=labeled, gz=True, include=CLI_ARGS.include) else: print('Unknown extension of target file - has to be either .csv, .sdb, .tar, .tar.gz or .tgz') sys.exit(1) with writer: samples = samples_from_sources(CLI_ARGS.sources, labeled=not CLI_ARGS.unlabeled) num_samples = len(samples) if augmentations: samples = apply_sample_augmentations(samples, audio_type=AUDIO_TYPE_PCM, augmentations=augmentations) bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for sample in bar(change_audio_types( samples, audio_type=audio_type, bitrate=CLI_ARGS.bitrate, processes=CLI_ARGS.workers)): writer.add(sample) def handle_args(): parser = argparse.ArgumentParser( description='Tool for building a combined SDB or CSV sample-set from other sets' ) parser.add_argument( 'sources', nargs='+', help='Source CSV and/or SDB files - ' 'Note: For getting a correctly ordered target set, source SDBs have to have their samples ' 'already ordered from shortest to longest.', ) parser.add_argument( 'target', help='SDB, CSV or TAR(.gz) file to create' ) parser.add_argument( '--audio-type', default='opus', choices=AUDIO_TYPE_LOOKUP.keys(), help='Audio representation inside target SDB', ) parser.add_argument( '--bitrate', type=int, help='Bitrate for lossy compressed SDB samples like in case of --audio-type opus', ) parser.add_argument( '--workers', type=int, default=None, help='Number of encoding SDB workers' ) parser.add_argument( '--unlabeled', action='store_true', help='If to build an data-set with unlabeled (audio only) samples - ' 'typically used for building noise augmentation corpora', ) parser.add_argument( '--absolute-paths', action='store_true', help='If to reference samples by their absolute paths when writing CSV files', ) parser.add_argument( '--augment', action='append', help='Add an augmentation operation', ) parser.add_argument( '--include', action='append', help='Adds a file to the root directory of .tar(.gz) targets', ) return parser.parse_args() if __name__ == '__main__': CLI_ARGS = handle_args() build_data_set() ================================================ FILE: bin/graphdef_binary_to_text.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import sys import tensorflow.compat.v1 as tfv1 from google.protobuf import text_format def main(): # Load and export as string with tfv1.gfile.FastGFile(sys.argv[1], "rb") as fin: graph_def = tfv1.GraphDef() graph_def.ParseFromString(fin.read()) with tfv1.gfile.FastGFile(sys.argv[1] + "txt", "w") as fout: fout.write(text_format.MessageToString(graph_def)) if __name__ == "__main__": main() ================================================ FILE: bin/import_aidatatang.py ================================================ #!/usr/bin/env python import glob import os import tarfile import pandas from deepspeech_training.util.importers import get_importers_parser COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"] def extract(archive_path, target_dir): print("Extracting {} into {}...".format(archive_path, target_dir)) with tarfile.open(archive_path) as tar: tar.extractall(target_dir) def preprocess_data(tgz_file, target_dir): # First extract main archive and sub-archives extract(tgz_file, target_dir) main_folder = os.path.join(target_dir, "aidatatang_200zh") for targz in glob.glob(os.path.join(main_folder, "corpus", "*", "*.tar.gz")): extract(targz, os.path.dirname(targz)) # Folder structure is now: # - aidatatang_200zh/ # - transcript/aidatatang_200_zh_transcript.txt # - corpus/train/*.tar.gz # - corpus/train/*/*.{wav,txt,trn,metadata} # - corpus/dev/*.tar.gz # - corpus/dev/*/*.{wav,txt,trn,metadata} # - corpus/test/*.tar.gz # - corpus/test/*/*.{wav,txt,trn,metadata} # Transcripts file has one line per WAV file, where each line consists of # the WAV file name without extension followed by a single space followed # by the transcript. # Since the transcripts themselves can contain spaces, we split on space but # only once, then build a mapping from file name to transcript transcripts_path = os.path.join( main_folder, "transcript", "aidatatang_200_zh_transcript.txt" ) with open(transcripts_path) as fin: transcripts = dict((line.split(" ", maxsplit=1) for line in fin)) def load_set(glob_path): set_files = [] for wav in glob.glob(glob_path): try: wav_filename = wav wav_filesize = os.path.getsize(wav) transcript_key = os.path.splitext(os.path.basename(wav))[0] transcript = transcripts[transcript_key].strip("\n") set_files.append((wav_filename, wav_filesize, transcript)) except KeyError: print("Warning: Missing transcript for WAV file {}.".format(wav)) return set_files for subset in ("train", "dev", "test"): print("Loading {} set samples...".format(subset)) subset_files = load_set( os.path.join(main_folder, "corpus", subset, "*", "*.wav") ) df = pandas.DataFrame(data=subset_files, columns=COLUMN_NAMES) # Trim train set to under 10s by removing the last couple hundred samples if subset == "train": durations = (df["wav_filesize"] - 44) / 16000 / 2 df = df[durations <= 10.0] print("Trimming {} samples > 10 seconds".format((durations > 10.0).sum())) dest_csv = os.path.join(target_dir, "aidatatang_{}.csv".format(subset)) print("Saving {} set into {}...".format(subset, dest_csv)) df.to_csv(dest_csv, index=False) def main(): # https://www.openslr.org/62/ parser = get_importers_parser(description="Import aidatatang_200zh corpus") parser.add_argument("tgz_file", help="Path to aidatatang_200zh.tgz") parser.add_argument( "--target_dir", default="", help="Target folder to extract files into and put the resulting CSVs. Defaults to same folder as the main archive.", ) params = parser.parse_args() if not params.target_dir: params.target_dir = os.path.dirname(params.tgz_file) preprocess_data(params.tgz_file, params.target_dir) if __name__ == "__main__": main() ================================================ FILE: bin/import_aishell.py ================================================ #!/usr/bin/env python import glob import os import tarfile import pandas from deepspeech_training.util.importers import get_importers_parser COLUMNNAMES = ["wav_filename", "wav_filesize", "transcript"] def extract(archive_path, target_dir): print("Extracting {} into {}...".format(archive_path, target_dir)) with tarfile.open(archive_path) as tar: tar.extractall(target_dir) def preprocess_data(tgz_file, target_dir): # First extract main archive and sub-archives extract(tgz_file, target_dir) main_folder = os.path.join(target_dir, "data_aishell") wav_archives_folder = os.path.join(main_folder, "wav") for targz in glob.glob(os.path.join(wav_archives_folder, "*.tar.gz")): extract(targz, main_folder) # Folder structure is now: # - data_aishell/ # - train/S****/*.wav # - dev/S****/*.wav # - test/S****/*.wav # - wav/S****.tar.gz # - transcript/aishell_transcript_v0.8.txt # Transcripts file has one line per WAV file, where each line consists of # the WAV file name without extension followed by a single space followed # by the transcript. # Since the transcripts themselves can contain spaces, we split on space but # only once, then build a mapping from file name to transcript transcripts_path = os.path.join( main_folder, "transcript", "aishell_transcript_v0.8.txt" ) with open(transcripts_path) as fin: transcripts = dict((line.split(" ", maxsplit=1) for line in fin)) def load_set(glob_path): set_files = [] for wav in glob.glob(glob_path): try: wav_filename = wav wav_filesize = os.path.getsize(wav) transcript_key = os.path.splitext(os.path.basename(wav))[0] transcript = transcripts[transcript_key].strip("\n") set_files.append((wav_filename, wav_filesize, transcript)) except KeyError: print("Warning: Missing transcript for WAV file {}.".format(wav)) return set_files for subset in ("train", "dev", "test"): print("Loading {} set samples...".format(subset)) subset_files = load_set(os.path.join(main_folder, subset, "S*", "*.wav")) df = pandas.DataFrame(data=subset_files, columns=COLUMNNAMES) # Trim train set to under 10s by removing the last couple hundred samples if subset == "train": durations = (df["wav_filesize"] - 44) / 16000 / 2 df = df[durations <= 10.0] print("Trimming {} samples > 10 seconds".format((durations > 10.0).sum())) dest_csv = os.path.join(target_dir, "aishell_{}.csv".format(subset)) print("Saving {} set into {}...".format(subset, dest_csv)) df.to_csv(dest_csv, index=False) def main(): # http://www.openslr.org/33/ parser = get_importers_parser(description="Import AISHELL corpus") parser.add_argument("aishell_tgz_file", help="Path to data_aishell.tgz") parser.add_argument( "--target_dir", default="", help="Target folder to extract files into and put the resulting CSVs. Defaults to same folder as the main archive.", ) params = parser.parse_args() if not params.target_dir: params.target_dir = os.path.dirname(params.aishell_tgz_file) preprocess_data(params.aishell_tgz_file, params.target_dir) if __name__ == "__main__": main() ================================================ FILE: bin/import_ccpmf.py ================================================ #!/usr/bin/env python """ Importer for dataset published from Centre de Conférence Pierre Mendès-France Ministère de l'Économie, des Finances et de la Relance """ import csv import sys import os import progressbar import subprocess import zipfile from glob import glob from multiprocessing import Pool import hashlib import decimal import math import unicodedata import re import sox import xml.etree.ElementTree as ET try: from num2words import num2words except ImportError as ex: print("pip install num2words") sys.exit(1) import requests import json from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download from deepspeech_training.util.helpers import secs_to_hours from deepspeech_training.util.importers import ( get_counter, get_importers_parser, get_imported_samples, get_validate_label, print_import_report, ) from ds_ctcdecoder import Alphabet FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] SAMPLE_RATE = 16000 CHANNELS = 1 BIT_DEPTH = 16 MAX_SECS = 10 MIN_SECS = 0.85 DATASET_RELEASE_CSV = "https://data.economie.gouv.fr/explore/dataset/transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020/download/?format=csv&timezone=Europe/Berlin&lang=fr&use_labels_for_header=true&csv_separator=%3B" DATASET_RELEASE_SHA = [ ("863d39a06a388c6491c6ff2f6450b151f38f1b57", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.001"), ("2f3a0305aa04c61220bb00b5a4e553e45dbf12e1", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.002"), ("5e55e9f1f844097349188ac875947e5a3d7fe9f1", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.003"), ("8bf54842cf07948ca5915e27a8bd5fa5139c06ae", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.004"), ("c8963504aadc015ac48f9af80058a0bb3440b94f", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.005"), ("d95e225e908621d83ce4e9795fd108d9d310e244", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.006"), ("de6ed9c2b0ee80ca879aae8ba7923cc93217d811", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.007"), ("234283c47dacfcd4450d836c52c25f3e807fc5f2", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.008"), ("4e6b67a688639bb72f8cd81782eaba604a8d32a6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.009"), ("4165a51389777c8af8e6253d87bdacb877e8b3b0", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.010"), ("34322e7009780d97ef5bd02bf2f2c7a31f00baff", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.011"), ("48c5be3b2ca9d6108d525da6a03e91d93a95dbac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.012"), ("87573172f506a189c2ebc633856fe11a2e9cd213", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.013"), ("6ab2c9e508e9278d5129f023e018725c4a7c69e8", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.014"), ("4f84df831ef46dce5d3ab3e21817687a2d8c12d0", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.015"), ("e69bfb079885c299cb81080ef88b1b8b57158aa6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.016"), ("5f764ba788ee273981cf211b242c29b49ca22c5e", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.017"), ("b6aa81a959525363223494830c1e7307d4c4bae6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.018"), ("91ddcf43c7bf113a6f2528b857c7ec22a50a148a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.019"), ("fa1b29273dd77b9a7494983a2f9ae52654b931d7", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.020"), ("1113aef4f5e2be2f7fbf2d54b6c710c1c0e7135f", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.021"), ("ce6420d5d0b6b5135ba559f83e1a82d4d615c470", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.022"), ("d0976ed292ac24fcf1590d1ea195077c74b05471", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.023"), ("ec746cd6af066f62d9bf8d3b2f89174783ff4e3c", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.024"), ("570d9e1e84178e32fd867171d4b3aaecda1fd4fb", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.025"), ("c29ccc7467a75b2cae3d7f2e9fbbb2ab276cb8ac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.026"), ("08406a51146d88e208704ce058c060a1e44efa50", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.027"), ("199aedad733a78ea1e7d47def9c71c6fd5795e02", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.028"), ("db856a068f92fb4f01f410bba42c7271de0f231a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.029"), ("e3c0135f16c6c9d25a09dcb4f99a685438a84740", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.030"), ("e51b8bb9c0ae4339f98b4f21e6d29b825109f0ac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.031"), ("be5e80cbc49b59b31ae33c30576ef0e1a162d84e", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.032"), ("501df58e3ff55fcfd75b93dab57566dc536948b8", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.033"), ("1a114875811a8cdcb8d85a9f6dbee78be3e05131", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.034"), ("465d824e7ee46448369182c0c28646d155a2249b", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.035"), ("37f341b1b266d143eb73138c31cfff3201b9d619", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.036"), ("9e7d8255987a8a77a90e0d4b55c8fd38b9fb5694", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.037"), ("54886755630cb080a53098cb1b6c951c6714a143", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.038"), ("4b7cbb0154697be795034f7a49712e882a97197a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.039"), ("c8e1e565a0e7a1f6ff1dbfcefe677aa74a41d2f2", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.040"), ] def _download_and_preprocess_data(csv_url, target_dir): dataset_sources = os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "data.txt") if os.path.exists(dataset_sources): return dataset_sources # Making path absolute target_dir = os.path.abspath(target_dir) csv_ref = requests.get(csv_url).text.split('\r\n')[1:-1] for part in csv_ref: part_filename = requests.head(part).headers.get("Content-Disposition").split(" ")[1].split("=")[1].replace('"', "") if not os.path.exists(os.path.join(target_dir, part_filename)): part_path = maybe_download(part_filename, target_dir, part) def _big_sha1(fname): s = hashlib.sha1() buffer_size = 65536 with open(fname, "rb") as f: while True: data = f.read(buffer_size) if not data: break s.update(data) return s.hexdigest() for (sha1, filename) in DATASET_RELEASE_SHA: print("Checking {} SHA1:".format(filename)) csum = _big_sha1(os.path.join(target_dir, filename)) if csum == sha1: print("\t{}: OK {}".format(filename, sha1)) else: print("\t{}: ERROR: expected {}, computed {}".format(filename, sha1, csum)) assert csum == sha1 # Conditionally extract data _maybe_extract(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip", "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020.zip") # Produce source text for extraction / conversion return _maybe_create_sources(os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020")) def _maybe_extract(target_dir, extracted_data, archive, final): # If target_dir/extracted_data does not exist, extract archive in target_dir extracted_path = os.path.join(target_dir, extracted_data) archive_path = os.path.join(target_dir, archive) final_archive = os.path.join(extracted_path, final) if not os.path.exists(extracted_path): if not os.path.exists(archive_path): print('No archive "%s" - building ...' % archive_path) all_zip_parts = glob(archive_path + ".*") all_zip_parts.sort() cmdline = "cat {} > {}".format(" ".join(all_zip_parts), archive_path) print('Building with "%s"' % cmdline) subprocess.check_call(cmdline, shell=True, cwd=target_dir) assert os.path.exists(archive_path) print('No directory "%s" - extracting archive %s ...' % (extracted_path, archive_path)) with zipfile.ZipFile(archive_path) as zip_f: zip_f.extractall(extracted_path) with zipfile.ZipFile(final_archive) as zip_f: zip_f.extractall(target_dir) else: print('Found directory "%s" - not extracting it from archive.' % extracted_path) def _maybe_create_sources(dir): dataset_sources = os.path.join(dir, "data.txt") MP3 = glob(os.path.join(dir, "**", "*.mp3")) XML = glob(os.path.join(dir, "**", "*.xml")) MP3_XML_Scores = [] MP3_XML_Fin = {} for f_mp3 in MP3: for f_xml in XML: b_mp3 = os.path.splitext(os.path.basename(f_mp3))[0] b_xml = os.path.splitext(os.path.basename(f_xml))[0] a_mp3 = b_mp3.split('_') a_xml = b_xml.split('_') score = 0 date_mp3 = a_mp3[0] date_xml = a_xml[0] if date_mp3 != date_xml: continue for i in range(min(len(a_mp3), len(a_xml))): if (a_mp3[i] == a_xml[i]): score += 1 if score >= 1: MP3_XML_Scores.append((f_mp3, f_xml, score)) # sort by score MP3_XML_Scores.sort(key=lambda x: x[2], reverse=True) for s_mp3, s_xml, score in MP3_XML_Scores: #print(s_mp3, s_xml, score) if score not in MP3_XML_Fin: MP3_XML_Fin[score] = {} if s_mp3 not in MP3_XML_Fin[score]: try: MP3.index(s_mp3) MP3.remove(s_mp3) MP3_XML_Fin[score][s_mp3] = s_xml except ValueError as ex: pass else: print("here:", MP3_XML_Fin[score][s_mp3], s_xml, file=sys.stderr) with open(dataset_sources, "w") as ds: for score in MP3_XML_Fin: for mp3 in MP3_XML_Fin[score]: xml = MP3_XML_Fin[score][mp3] if os.path.getsize(mp3) > 0 and os.path.getsize(xml) > 0: mp3 = os.path.relpath(mp3, dir) xml = os.path.relpath(xml, dir) ds.write('{},{},{:0.2e}\n'.format(xml, mp3, 2.5e-4)) else: print("Empty file {} or {}".format(mp3, xml), file=sys.stderr) print("Missing XML pairs:", MP3, file=sys.stderr) return dataset_sources def maybe_normalize_for_digits(label): # first, try to identify numbers like "50 000", "260 000" if " " in label: if any(s.isdigit() for s in label): thousands = re.compile(r"(\d{1,3}(?:\s*\d{3})*(?:,\d+)?)") maybe_thousands = thousands.findall(label) if len(maybe_thousands) > 0: while True: (label, r) = re.subn(r"(\d)\s(\d{3})", "\\1\\2", label) if r == 0: break # this might be a time or duration in the form "hh:mm" or "hh:mm:ss" if ":" in label: for s in label.split(" "): if any(i.isdigit() for i in s): date_or_time = re.compile(r"(\d{1,2}):(\d{2}):?(\d{2})?") maybe_date_or_time = date_or_time.findall(s) if len(maybe_date_or_time) > 0: maybe_hours = maybe_date_or_time[0][0] maybe_minutes = maybe_date_or_time[0][1] maybe_seconds = maybe_date_or_time[0][2] if len(maybe_seconds) > 0: label = label.replace("{}:{}:{}".format(maybe_hours, maybe_minutes, maybe_seconds), "{} heures {} minutes et {} secondes".format(maybe_hours, maybe_minutes, maybe_seconds)) else: label = label.replace("{}:{}".format(maybe_hours, maybe_minutes), "{} heures et {} minutes".format(maybe_hours, maybe_minutes)) new_label = [] # pylint: disable=too-many-nested-blocks for s in label.split(" "): if any(i.isdigit() for i in s): s = s.replace(",", ".") # num2words requires "." for floats s = s.replace("\"", "") # clean some data, num2words would choke on 1959" last_c = s[-1] if not last_c.isdigit(): # num2words will choke on "0.6.", "24 ?" s = s[:-1] if any(i.isalpha() for i in s): # So we have any(isdigit()) **and** any(sialpha), like "3D" ns = [] for c in s: nc = c if c.isdigit(): # convert "3" to "trois-" try: nc = num2words(c, lang="fr") + "-" except decimal.InvalidOperation as ex: print("decimal.InvalidOperation: '{}'".format(s)) raise ex ns.append(nc) s = "".join(s) else: try: s = num2words(s, lang="fr") except decimal.InvalidOperation as ex: print("decimal.InvalidOperation: '{}'".format(s)) raise ex new_label.append(s) return " ".join(new_label) def maybe_normalize_for_specials_chars(label): label = label.replace("%", "pourcents") label = label.replace("/", ", ") # clean intervals like 2019/2022 to "2019 2022" label = label.replace("-", ", ") # clean intervals like 70-80 to "70 80" label = label.replace("+", " plus ") # clean + and make it speakable label = label.replace("€", " euros ") # clean euro symbol and make it speakable label = label.replace("., ", ", ") # clean some strange "4.0., " (20181017_Innovation.xml) label = label.replace("°", " degré ") # clean some strange "°5" (20181210_EtatsGeneraux-1000_fre_750_und.xml) label = label.replace("...", ".") # remove ellipsis label = label.replace("..", ".") # remove broken ellipsis label = label.replace("m²", "mètre-carrés") # 20150616_Defi_Climat_3_wmv_0_fre_minefi.xml label = label.replace("[end]", "") # broken tag in 20150123_Entretiens_Tresor_PGM_wmv_0_fre_minefi.xml label = label.replace(u'\xB8c', " ç") # strange cedilla in 20150417_Printemps_Economie_2_wmv_0_fre_minefi.xml label = label.replace("C0²", "CO 2") # 20121016_Syteme_sante_copie_wmv_0_fre_minefi.xml return label def maybe_normalize_for_anglicisms(label): label = label.replace("B2B", "B to B") label = label.replace("B2C", "B to C") label = label.replace("#", "hashtag ") label = label.replace("@", "at ") return label def maybe_normalize(label): label = maybe_normalize_for_specials_chars(label) label = maybe_normalize_for_anglicisms(label) label = maybe_normalize_for_digits(label) return label def one_sample(sample): file_size = -1 frames = 0 audio_source = sample[0] target_dir = sample[1] dataset_basename = sample[2] start_time = sample[3] duration = sample[4] label = label_filter_fun(sample[5]) sample_id = sample[6] _wav_filename = os.path.basename(audio_source.replace(".wav", "_{:06}.wav".format(sample_id))) wav_fullname = os.path.join(target_dir, dataset_basename, _wav_filename) if not os.path.exists(wav_fullname): subprocess.check_output(["ffmpeg", "-i", audio_source, "-ss", str(start_time), "-t", str(duration), "-c", "copy", wav_fullname], stdin=subprocess.DEVNULL, stderr=subprocess.STDOUT) file_size = os.path.getsize(wav_fullname) frames = int(subprocess.check_output(["soxi", "-s", wav_fullname], stderr=subprocess.STDOUT)) _counter = get_counter() _rows = [] if file_size == -1: # Excluding samples that failed upon conversion _counter["failed"] += 1 elif label is None: # Excluding samples that failed on label validation _counter["invalid_label"] += 1 elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)): # Excluding samples that are too short to fit the transcript _counter["too_short"] += 1 elif frames/SAMPLE_RATE < MIN_SECS: # Excluding samples that are too short _counter["too_short"] += 1 elif frames/SAMPLE_RATE > MAX_SECS: # Excluding very long samples to keep a reasonable batch-size _counter["too_long"] += 1 else: # This one is good - keep it for the target CSV _rows.append((os.path.join(dataset_basename, _wav_filename), file_size, label)) _counter["imported_time"] += frames _counter["all"] += 1 _counter["total_time"] += frames return (_counter, _rows) def _maybe_import_data(xml_file, audio_source, target_dir, rel_tol=1e-1): dataset_basename = os.path.splitext(os.path.split(xml_file)[1])[0] wav_root = os.path.join(target_dir, dataset_basename) if not os.path.exists(wav_root): os.makedirs(wav_root) source_frames = int(subprocess.check_output(["soxi", "-s", audio_source], stderr=subprocess.STDOUT)) print("Source audio length: %s" % secs_to_hours(source_frames / SAMPLE_RATE)) # Get audiofile path and transcript for each sentence in tsv samples = [] tree = ET.parse(xml_file) root = tree.getroot() seq_id = 0 this_time = 0.0 this_duration = 0.0 prev_time = 0.0 prev_duration = 0.0 this_text = "" for child in root: if child.tag == "row": cur_time = float(child.attrib["timestamp"]) cur_duration = float(child.attrib["timedur"]) cur_text = child.text if this_time == 0.0: this_time = cur_time delta = cur_time - (prev_time + prev_duration) # rel_tol value is made from trial/error to try and compromise between: # - cutting enough to skip missing words # - not too short, not too long sentences is_close = math.isclose(cur_time, this_time + this_duration, rel_tol=rel_tol) is_short = ((this_duration + cur_duration + delta) < MAX_SECS) # when the previous element is close enough **and** this does not # go over MAX_SECS, we append content if (is_close and is_short): this_duration += cur_duration + delta this_text += cur_text else: samples.append((audio_source, target_dir, dataset_basename, this_time, this_duration, this_text, seq_id)) this_time = cur_time this_duration = cur_duration this_text = cur_text seq_id += 1 prev_time = cur_time prev_duration = cur_duration # Keep track of how many samples are good vs. problematic _counter = get_counter() num_samples = len(samples) _rows = [] print("Processing XML data: {}".format(xml_file)) pool = Pool() bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1): _counter += processed[0] _rows += processed[1] bar.update(i) bar.update(num_samples) pool.close() pool.join() imported_samples = get_imported_samples(_counter) assert _counter["all"] == num_samples assert len(_rows) == imported_samples print_import_report(_counter, SAMPLE_RATE, MAX_SECS) print("Import efficiency: %.1f%%" % ((_counter["total_time"] / source_frames)*100)) print("") return _counter, _rows def _maybe_convert_wav(mp3_filename, _wav_filename): if not os.path.exists(_wav_filename): print("Converting {} to WAV file: {}".format(mp3_filename, _wav_filename)) transformer = sox.Transformer() transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS, bitdepth=BIT_DEPTH) try: transformer.build(mp3_filename, _wav_filename) except sox.core.SoxError: pass def write_general_csv(target_dir, _rows, _counter): target_csv_template = os.path.join(target_dir, "ccpmf_{}.csv") with open(target_csv_template.format("train"), "w") as train_csv_file: # 80% with open(target_csv_template.format("dev"), "w") as dev_csv_file: # 10% with open(target_csv_template.format("test"), "w") as test_csv_file: # 10% train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) train_writer.writeheader() dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) dev_writer.writeheader() test_writer = csv.DictWriter(test_csv_file, fieldnames=FIELDNAMES) test_writer.writeheader() bar = progressbar.ProgressBar(max_value=len(_rows), widgets=SIMPLE_BAR) for i, item in enumerate(bar(_rows)): i_mod = i % 10 if i_mod == 0: writer = test_writer elif i_mod == 1: writer = dev_writer else: writer = train_writer writer.writerow({"wav_filename": item[0], "wav_filesize": item[1], "transcript": item[2]}) print("") print("~~~~ FINAL STATISTICS ~~~~") print_import_report(_counter, SAMPLE_RATE, MAX_SECS) print("~~~~ (FINAL STATISTICS) ~~~~") print("") if __name__ == "__main__": PARSER = get_importers_parser(description="Import XML from Conference Centre for Economics, France") PARSER.add_argument("target_dir", help="Destination directory") PARSER.add_argument("--filter_alphabet", help="Exclude samples with characters not in provided alphabet") PARSER.add_argument("--normalize", action="store_true", help="Converts diacritic characters to their base ones") PARAMS = PARSER.parse_args() validate_label = get_validate_label(PARAMS) ALPHABET = Alphabet(PARAMS.filter_alphabet) if PARAMS.filter_alphabet else None def label_filter_fun(label): if PARAMS.normalize: label = unicodedata.normalize("NFKD", label.strip()) \ .encode("ascii", "ignore") \ .decode("ascii", "ignore") label = maybe_normalize(label) label = validate_label(label) if ALPHABET and label: try: ALPHABET.encode(label) except KeyError: label = None return label dataset_sources = _download_and_preprocess_data(csv_url=DATASET_RELEASE_CSV, target_dir=PARAMS.target_dir) sources_root_dir = os.path.dirname(dataset_sources) all_counter = get_counter() all_rows = [] with open(dataset_sources, "r") as sources: for line in sources.readlines(): d = line.split(",") this_xml = os.path.join(sources_root_dir, d[0]) this_mp3 = os.path.join(sources_root_dir, d[1]) this_rel = float(d[2]) wav_filename = os.path.join(sources_root_dir, os.path.splitext(os.path.basename(this_mp3))[0] + ".wav") _maybe_convert_wav(this_mp3, wav_filename) counter, rows = _maybe_import_data(this_xml, wav_filename, sources_root_dir, this_rel) all_counter += counter all_rows += rows write_general_csv(sources_root_dir, _counter=all_counter, _rows=all_rows) ================================================ FILE: bin/import_cv.py ================================================ #!/usr/bin/env python import csv import os import sys import subprocess import tarfile from glob import glob from multiprocessing import Pool import progressbar import sox from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download from deepspeech_training.util.importers import ( get_counter, get_imported_samples, print_import_report, ) from deepspeech_training.util.importers import validate_label_eng as validate_label FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] SAMPLE_RATE = 16000 MAX_SECS = 10 ARCHIVE_DIR_NAME = "cv_corpus_v1" ARCHIVE_NAME = ARCHIVE_DIR_NAME + ".tar.gz" ARCHIVE_URL = ( "https://s3.us-east-2.amazonaws.com/common-voice-data-download/" + ARCHIVE_NAME ) def _download_and_preprocess_data(target_dir): # Making path absolute target_dir = os.path.abspath(target_dir) # Conditionally download data archive_path = maybe_download(ARCHIVE_NAME, target_dir, ARCHIVE_URL) # Conditionally extract common voice data _maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path) # Conditionally convert common voice CSV files and mp3 data to DeepSpeech CSVs and wav _maybe_convert_sets(target_dir, ARCHIVE_DIR_NAME) def _maybe_extract(target_dir, extracted_data, archive_path): # If target_dir/extracted_data does not exist, extract archive in target_dir extracted_path = os.join(target_dir, extracted_data) if not os.path.exists(extracted_path): print('No directory "%s" - extracting archive...' % extracted_path) with tarfile.open(archive_path) as tar: tar.extractall(target_dir) else: print('Found directory "%s" - not extracting it from archive.' % extracted_path) def _maybe_convert_sets(target_dir, extracted_data): extracted_dir = os.path.join(target_dir, extracted_data) for source_csv in glob(os.path.join(extracted_dir, "*.csv")): _maybe_convert_set( extracted_dir, source_csv, os.path.join(target_dir, os.path.split(source_csv)[-1]), ) def one_sample(sample): mp3_filename = sample[0] # Storing wav files next to the mp3 ones - just with a different suffix wav_filename = path.splitext(mp3_filename)[0] + ".wav" _maybe_convert_wav(mp3_filename, wav_filename) frames = int( subprocess.check_output(["soxi", "-s", wav_filename], stderr=subprocess.STDOUT) ) file_size = -1 if os.path.exists(wav_filename): file_size = path.getsize(wav_filename) frames = int( subprocess.check_output( ["soxi", "-s", wav_filename], stderr=subprocess.STDOUT ) ) label = validate_label(sample[1]) rows = [] counter = get_counter() if file_size == -1: # Excluding samples that failed upon conversion counter["failed"] += 1 elif label is None: # Excluding samples that failed on label validation counter["invalid_label"] += 1 elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)): # Excluding samples that are too short to fit the transcript counter["too_short"] += 1 elif frames / SAMPLE_RATE > MAX_SECS: # Excluding very long samples to keep a reasonable batch-size counter["too_long"] += 1 else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) def _maybe_convert_set(extracted_dir, source_csv, target_csv): print() if os.path.exists(target_csv): print('Found CSV file "%s" - not importing "%s".' % (target_csv, source_csv)) return print('No CSV file "%s" - importing "%s"...' % (target_csv, source_csv)) samples = [] with open(source_csv) as source_csv_file: reader = csv.DictReader(source_csv_file) for row in reader: samples.append((os.path.join(extracted_dir, row["filename"]), row["text"])) # Mutable counters for the concurrent embedded routine counter = get_counter() num_samples = len(samples) rows = [] print("Importing mp3 files...") pool = Pool() bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1): counter += processed[0] rows += processed[1] bar.update(i) bar.update(num_samples) pool.close() pool.join() print('Writing "%s"...' % target_csv) with open(target_csv, "w", encoding="utf-8", newline="") as target_csv_file: writer = csv.DictWriter(target_csv_file, fieldnames=FIELDNAMES) writer.writeheader() bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR) for filename, file_size, transcript in bar(rows): writer.writerow( { "wav_filename": filename, "wav_filesize": file_size, "transcript": transcript, } ) imported_samples = get_imported_samples(counter) assert counter["all"] == num_samples assert len(rows) == imported_samples print_import_report(counter, SAMPLE_RATE, MAX_SECS) def _maybe_convert_wav(mp3_filename, wav_filename): if not os.path.exists(wav_filename): transformer = sox.Transformer() transformer.convert(samplerate=SAMPLE_RATE) try: transformer.build(mp3_filename, wav_filename) except sox.core.SoxError: pass if __name__ == "__main__": _download_and_preprocess_data(sys.argv[1]) ================================================ FILE: bin/import_cv2.py ================================================ #!/usr/bin/env python """ Broadly speaking, this script takes the audio downloaded from Common Voice for a certain language, in addition to the *.tsv files output by CorporaCreator, and the script formats the data and transcripts to be in a state usable by DeepSpeech.py Use "python3 import_cv2.py -h" for help """ import csv import os import subprocess import unicodedata from multiprocessing import Pool import progressbar import sox from deepspeech_training.util.downloader import SIMPLE_BAR from deepspeech_training.util.importers import ( get_counter, get_imported_samples, get_importers_parser, get_validate_label, print_import_report, ) from ds_ctcdecoder import Alphabet FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] SAMPLE_RATE = 16000 CHANNELS = 1 MAX_SECS = 10 PARAMS = None FILTER_OBJ = None class LabelFilter: def __init__(self, normalize, alphabet, validate_fun): self.normalize = normalize self.alphabet = alphabet self.validate_fun = validate_fun def filter(self, label): if self.normalize: label = unicodedata.normalize("NFKD", label.strip()).encode("ascii", "ignore").decode("ascii", "ignore") label = self.validate_fun(label) if self.alphabet and label and not self.alphabet.CanEncode(label): label = None return label def init_worker(params): global FILTER_OBJ # pylint: disable=global-statement validate_label = get_validate_label(params) alphabet = Alphabet(params.filter_alphabet) if params.filter_alphabet else None FILTER_OBJ = LabelFilter(params.normalize, alphabet, validate_label) def one_sample(sample): """ Take an audio file, and optionally convert it to 16kHz WAV """ mp3_filename = sample[0] if not os.path.splitext(mp3_filename.lower())[1] == ".mp3": mp3_filename += ".mp3" # Storing wav files next to the mp3 ones - just with a different suffix wav_filename = os.path.splitext(mp3_filename)[0] + ".wav" _maybe_convert_wav(mp3_filename, wav_filename) file_size = -1 frames = 0 if os.path.exists(wav_filename): file_size = os.path.getsize(wav_filename) frames = int( subprocess.check_output( ["soxi", "-s", wav_filename], stderr=subprocess.STDOUT ) ) label = FILTER_OBJ.filter(sample[1]) rows = [] counter = get_counter() if file_size == -1: # Excluding samples that failed upon conversion counter["failed"] += 1 elif label is None: # Excluding samples that failed on label validation counter["invalid_label"] += 1 elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)): # Excluding samples that are too short to fit the transcript counter["too_short"] += 1 elif frames / SAMPLE_RATE > MAX_SECS: # Excluding very long samples to keep a reasonable batch-size counter["too_long"] += 1 else: # This one is good - keep it for the target CSV rows.append((os.path.split(wav_filename)[-1], file_size, label, sample[2])) counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) def _maybe_convert_set(dataset, tsv_dir, audio_dir, filter_obj, space_after_every_character=None, rows=None, exclude=None): exclude_transcripts = set() exclude_speakers = set() if exclude is not None: for sample in exclude: exclude_transcripts.add(sample[2]) exclude_speakers.add(sample[3]) if rows is None: rows = [] input_tsv = os.path.join(os.path.abspath(tsv_dir), dataset + ".tsv") if not os.path.isfile(input_tsv): return rows print("Loading TSV file: ", input_tsv) # Get audiofile path and transcript for each sentence in tsv samples = [] with open(input_tsv, encoding="utf-8") as input_tsv_file: reader = csv.DictReader(input_tsv_file, delimiter="\t") for row in reader: samples.append((os.path.join(audio_dir, row["path"]), row["sentence"], row["client_id"])) counter = get_counter() num_samples = len(samples) print("Importing mp3 files...") pool = Pool(initializer=init_worker, initargs=(PARAMS,)) bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1): counter += processed[0] rows += processed[1] bar.update(i) bar.update(num_samples) pool.close() pool.join() imported_samples = get_imported_samples(counter) assert counter["all"] == num_samples assert len(rows) == imported_samples print_import_report(counter, SAMPLE_RATE, MAX_SECS) output_csv = os.path.join(os.path.abspath(audio_dir), dataset + ".csv") print("Saving new DeepSpeech-formatted CSV file to: ", output_csv) with open(output_csv, "w", encoding="utf-8", newline="") as output_csv_file: print("Writing CSV file for DeepSpeech.py as: ", output_csv) writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES) writer.writeheader() bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR) for filename, file_size, transcript, speaker in bar(rows): if transcript in exclude_transcripts or speaker in exclude_speakers: continue if space_after_every_character: writer.writerow( { "wav_filename": filename, "wav_filesize": file_size, "transcript": " ".join(transcript), } ) else: writer.writerow( { "wav_filename": filename, "wav_filesize": file_size, "transcript": transcript, } ) return rows def _preprocess_data(tsv_dir, audio_dir, space_after_every_character=False): exclude = [] for dataset in ["test", "dev", "train", "validated", "other"]: set_samples = _maybe_convert_set(dataset, tsv_dir, audio_dir, space_after_every_character) if dataset in ["test", "dev"]: exclude += set_samples if dataset == "validated": _maybe_convert_set("train-all", tsv_dir, audio_dir, space_after_every_character, rows=set_samples, exclude=exclude) def _maybe_convert_wav(mp3_filename, wav_filename): if not os.path.exists(wav_filename): transformer = sox.Transformer() transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS) try: transformer.build(mp3_filename, wav_filename) except sox.core.SoxError: pass def parse_args(): parser = get_importers_parser(description="Import CommonVoice v2.0 corpora") parser.add_argument("tsv_dir", help="Directory containing tsv files") parser.add_argument( "--audio_dir", help='Directory containing the audio clips - defaults to "/clips"', ) parser.add_argument( "--filter_alphabet", help="Exclude samples with characters not in provided alphabet", ) parser.add_argument( "--normalize", action="store_true", help="Converts diacritic characters to their base ones", ) parser.add_argument( "--space_after_every_character", action="store_true", help="To help transcript join by white space", ) return parser.parse_args() def main(): audio_dir = PARAMS.audio_dir if PARAMS.audio_dir else os.path.join(PARAMS.tsv_dir, "clips") _preprocess_data(PARAMS.tsv_dir, audio_dir, PARAMS.space_after_every_character) if __name__ == "__main__": PARAMS = parse_args() main() ================================================ FILE: bin/import_fisher.py ================================================ #!/usr/bin/env python import codecs import fnmatch import os import random import subprocess import sys import unicodedata import librosa import pandas import soundfile # <= Has an external dependency on libsndfile from deepspeech_training.util.importers import validate_label_eng as validate_label # Prerequisite: Having the sph2pipe tool in your PATH: # https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools def _download_and_preprocess_data(data_dir): # Assume data_dir contains extracted LDC2004S13, LDC2004T19, LDC2005S13, LDC2005T19 # Conditionally convert Fisher sph data to wav _maybe_convert_wav(data_dir, "LDC2004S13", "fisher-2004-wav") _maybe_convert_wav(data_dir, "LDC2005S13", "fisher-2005-wav") # Conditionally split Fisher wav data all_2004 = _split_wav_and_sentences( data_dir, original_data="fisher-2004-wav", converted_data="fisher-2004-split-wav", trans_data=os.path.join("LDC2004T19", "fe_03_p1_tran", "data", "trans"), ) all_2005 = _split_wav_and_sentences( data_dir, original_data="fisher-2005-wav", converted_data="fisher-2005-split-wav", trans_data=os.path.join("LDC2005T19", "fe_03_p2_tran", "data", "trans"), ) # The following files have incorrect transcripts that are much longer than # their audio source. The result is that we end up with more labels than time # slices, which breaks CTC. all_2004.loc[ all_2004["wav_filename"].str.endswith("fe_03_00265-33.53-33.81.wav"), "transcript", ] = "correct" all_2004.loc[ all_2004["wav_filename"].str.endswith("fe_03_00991-527.39-528.3.wav"), "transcript", ] = "that's one of those" all_2005.loc[ all_2005["wav_filename"].str.endswith("fe_03_10282-344.42-344.84.wav"), "transcript", ] = "they don't want" all_2005.loc[ all_2005["wav_filename"].str.endswith("fe_03_10677-101.04-106.41.wav"), "transcript", ] = "uh my mine yeah the german shepherd pitbull mix he snores almost as loud as i do" # The following file is just a short sound and not at all transcribed like provided. # So we just exclude it. all_2004 = all_2004[ ~all_2004["wav_filename"].str.endswith("fe_03_00027-393.8-394.05.wav") ] # The following file is far too long and would ruin our training batch size. # So we just exclude it. all_2005 = all_2005[ ~all_2005["wav_filename"].str.endswith("fe_03_11487-31.09-234.06.wav") ] # The following file is too large for its transcript, so we just exclude it. all_2004 = all_2004[ ~all_2004["wav_filename"].str.endswith("fe_03_01326-307.42-307.93.wav") ] # Conditionally split Fisher data into train/validation/test sets train_2004, dev_2004, test_2004 = _split_sets(all_2004) train_2005, dev_2005, test_2005 = _split_sets(all_2005) # Join 2004 and 2005 data train_files = train_2004.append(train_2005) dev_files = dev_2004.append(dev_2005) test_files = test_2004.append(test_2005) # Write sets to disk as CSV files train_files.to_csv(os.path.join(data_dir, "fisher-train.csv"), index=False) dev_files.to_csv(os.path.join(data_dir, "fisher-dev.csv"), index=False) test_files.to_csv(os.path.join(data_dir, "fisher-test.csv"), index=False) def _maybe_convert_wav(data_dir, original_data, converted_data): source_dir = os.path.join(data_dir, original_data) target_dir = os.path.join(data_dir, converted_data) # Conditionally convert sph files to wav files if os.path.exists(target_dir): print("skipping maybe_convert_wav") return # Create target_dir os.makedirs(target_dir) # Loop over sph files in source_dir and convert each to 16-bit PCM wav for root, dirnames, filenames in os.walk(source_dir): for filename in fnmatch.filter(filenames, "*.sph"): sph_file = os.path.join(root, filename) for channel in ["1", "2"]: wav_filename = ( os.path.splitext(os.path.basename(sph_file))[0] + "_c" + channel + ".wav" ) wav_file = os.path.join(target_dir, wav_filename) print("converting {} to {}".format(sph_file, wav_file)) subprocess.check_call( ["sph2pipe", "-c", channel, "-p", "-f", "rif", sph_file, wav_file] ) def _parse_transcriptions(trans_file): segments = [] with codecs.open(trans_file, "r", "utf-8") as fin: for line in fin: if line.startswith("#") or len(line) <= 1: continue tokens = line.split() start_time = float(tokens[0]) stop_time = float(tokens[1]) speaker = tokens[2] transcript = " ".join(tokens[3:]) # We need to do the encode-decode dance here because encode # returns a bytes() object on Python 3, and text_to_char_array # expects a string. transcript = ( unicodedata.normalize("NFKD", transcript) .encode("ascii", "ignore") .decode("ascii", "ignore") ) segments.append( { "start_time": start_time, "stop_time": stop_time, "speaker": speaker, "transcript": transcript, } ) return segments def _split_wav_and_sentences(data_dir, trans_data, original_data, converted_data): trans_dir = os.path.join(data_dir, trans_data) source_dir = os.path.join(data_dir, original_data) target_dir = os.path.join(data_dir, converted_data) if not os.path.exists(target_dir): os.makedirs(target_dir) files = [] # Loop over transcription files and split corresponding wav for root, dirnames, filenames in os.walk(trans_dir): for filename in fnmatch.filter(filenames, "*.txt"): trans_file = os.path.join(root, filename) segments = _parse_transcriptions(trans_file) # Open wav corresponding to transcription file wav_filenames = [ os.path.splitext(os.path.basename(trans_file))[0] + "_c" + channel + ".wav" for channel in ["1", "2"] ] wav_files = [ os.path.join(source_dir, wav_filename) for wav_filename in wav_filenames ] print("splitting {} according to {}".format(wav_files, trans_file)) origAudios = [ librosa.load(wav_file, sr=16000, mono=False) for wav_file in wav_files ] # Loop over segments and split wav_file for each segment for segment in segments: # Create wav segment filename start_time = segment["start_time"] stop_time = segment["stop_time"] new_wav_filename = ( os.path.splitext(os.path.basename(trans_file))[0] + "-" + str(start_time) + "-" + str(stop_time) + ".wav" ) new_wav_file = os.path.join(target_dir, new_wav_filename) channel = 0 if segment["speaker"] == "A:" else 1 _split_and_resample_wav( origAudios[channel], start_time, stop_time, new_wav_file ) new_wav_filesize = os.path.getsize(new_wav_file) transcript = validate_label(segment["transcript"]) if transcript != None: files.append( (os.path.abspath(new_wav_file), new_wav_filesize, transcript) ) return pandas.DataFrame( data=files, columns=["wav_filename", "wav_filesize", "transcript"] ) def _split_audio(origAudio, start_time, stop_time): audioData, frameRate = origAudio nChannels = len(audioData.shape) startIndex = int(start_time * frameRate) stopIndex = int(stop_time * frameRate) return ( audioData[startIndex:stopIndex] if 1 == nChannels else audioData[:, startIndex:stopIndex] ) def _split_and_resample_wav(origAudio, start_time, stop_time, new_wav_file): frameRate = origAudio[1] chunkData = _split_audio(origAudio, start_time, stop_time) soundfile.write(new_wav_file, chunkData, frameRate, "PCM_16") def _split_sets(filelist): """ randomply split the datasets into train, validation, and test sets where the size of the validation and test sets are determined by the `get_sample_size` function. """ random.shuffle(filelist) sample_size = get_sample_size(len(filelist)) train_beg = 0 train_end = len(filelist) - 2 * sample_size dev_beg = train_end dev_end = train_end + sample_size test_beg = dev_end test_end = len(filelist) return ( filelist[train_beg:train_end], filelist[dev_beg:dev_end], filelist[test_beg:test_end], ) def get_sample_size(population_size): """calculates the sample size for a 99% confidence and 1% margin of error """ margin_of_error = 0.01 fraction_picking = 0.50 z_score = 2.58 # Corresponds to confidence level 99% numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( margin_of_error ** 2 ) sample_size = 0 for train_size in range(population_size, 0, -1): denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( margin_of_error ** 2 * train_size ) sample_size = int(numerator / denominator) if 2 * sample_size + train_size <= population_size: break return sample_size if __name__ == "__main__": _download_and_preprocess_data(sys.argv[1]) ================================================ FILE: bin/import_freestmandarin.py ================================================ #!/usr/bin/env python import glob import os import tarfile import numpy as np import pandas from deepspeech_training.util.importers import get_importers_parser COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"] def extract(archive_path, target_dir): print("Extracting {} into {}...".format(archive_path, target_dir)) with tarfile.open(archive_path) as tar: tar.extractall(target_dir) def preprocess_data(tgz_file, target_dir): # First extract main archive and sub-archives extract(tgz_file, target_dir) main_folder = os.path.join(target_dir, "ST-CMDS-20170001_1-OS") # Folder structure is now: # - ST-CMDS-20170001_1-OS/ # - *.wav # - *.txt # - *.metadata def load_set(glob_path): set_files = [] for wav in glob.glob(glob_path): wav_filename = wav wav_filesize = os.path.getsize(wav) txt_filename = os.path.splitext(wav_filename)[0] + ".txt" with open(txt_filename, "r") as fin: transcript = fin.read() set_files.append((wav_filename, wav_filesize, transcript)) return set_files # Load all files, then deterministically split into train/dev/test sets all_files = load_set(os.path.join(main_folder, "*.wav")) df = pandas.DataFrame(data=all_files, columns=COLUMN_NAMES) df.sort_values(by="wav_filename", inplace=True) indices = np.arange(0, len(df)) np.random.seed(12345) np.random.shuffle(indices) # Total corpus size: 102600 samples. 5000 samples gives us 99% confidence # level with a margin of error of under 2%. test_indices = indices[-5000:] dev_indices = indices[-10000:-5000] train_indices = indices[:-10000] train_files = df.iloc[train_indices] durations = (train_files["wav_filesize"] - 44) / 16000 / 2 train_files = train_files[durations <= 10.0] print("Trimming {} samples > 10 seconds".format((durations > 10.0).sum())) dest_csv = os.path.join(target_dir, "freestmandarin_train.csv") print("Saving train set into {}...".format(dest_csv)) train_files.to_csv(dest_csv, index=False) dev_files = df.iloc[dev_indices] dest_csv = os.path.join(target_dir, "freestmandarin_dev.csv") print("Saving dev set into {}...".format(dest_csv)) dev_files.to_csv(dest_csv, index=False) test_files = df.iloc[test_indices] dest_csv = os.path.join(target_dir, "freestmandarin_test.csv") print("Saving test set into {}...".format(dest_csv)) test_files.to_csv(dest_csv, index=False) def main(): # https://www.openslr.org/38/ parser = get_importers_parser(description="Import Free ST Chinese Mandarin corpus") parser.add_argument("tgz_file", help="Path to ST-CMDS-20170001_1-OS.tar.gz") parser.add_argument( "--target_dir", default="", help="Target folder to extract files into and put the resulting CSVs. Defaults to same folder as the main archive.", ) params = parser.parse_args() if not params.target_dir: params.target_dir = os.path.dirname(params.tgz_file) preprocess_data(params.tgz_file, params.target_dir) if __name__ == "__main__": main() ================================================ FILE: bin/import_gram_vaani.py ================================================ #!/usr/bin/env python import csv import logging import math import os import subprocess import urllib from pathlib import Path import pandas as pd from sox import Transformer import swifter from deepspeech_training.util.importers import get_importers_parser, get_validate_label __version__ = "0.1.0" _logger = logging.getLogger(__name__) MAX_SECS = 10 BITDEPTH = 16 N_CHANNELS = 1 SAMPLE_RATE = 16000 DEV_PERCENTAGE = 0.10 TRAIN_PERCENTAGE = 0.80 def parse_args(args): """Parse command line parameters Args: args ([str]): Command line parameters as list of strings Returns: :obj:`argparse.Namespace`: command line parameters namespace """ parser = get_importers_parser(description="Imports GramVaani data for Deep Speech") parser.add_argument( "--version", action="version", version="GramVaaniImporter {ver}".format(ver=__version__), ) parser.add_argument( "-v", "--verbose", action="store_const", required=False, help="set loglevel to INFO", dest="loglevel", const=logging.INFO, ) parser.add_argument( "-vv", "--very-verbose", action="store_const", required=False, help="set loglevel to DEBUG", dest="loglevel", const=logging.DEBUG, ) parser.add_argument( "-c", "--csv_filename", required=True, help="Path to the GramVaani csv", dest="csv_filename", ) parser.add_argument( "-t", "--target_dir", required=True, help="Directory in which to save the importer GramVaani data", dest="target_dir", ) return parser.parse_args(args) def setup_logging(level): """Setup basic logging Args: level (int): minimum log level for emitting messages """ format = "[%(asctime)s] %(levelname)s:%(name)s:%(message)s" logging.basicConfig( level=level, stream=sys.stdout, format=format, datefmt="%Y-%m-%d %H:%M:%S" ) class GramVaaniCSV: """GramVaaniCSV representing a GramVaani dataset. Args: csv_filename (str): Path to the GramVaani csv Attributes: data (:class:`pandas.DataFrame`): `pandas.DataFrame` Containing the GramVaani csv data """ def __init__(self, csv_filename): self.data = self._parse_csv(csv_filename) def _parse_csv(self, csv_filename): _logger.info("Parsing csv file...%s", os.path.abspath(csv_filename)) data = pd.read_csv( os.path.abspath(csv_filename), names=[ "piece_id", "audio_url", "transcript_labelled", "transcript", "labels", "content_filename", "audio_length", "user_id", ], usecols=["audio_url", "transcript", "audio_length"], skiprows=[0], engine="python", encoding="utf-8", quotechar='"', quoting=csv.QUOTE_ALL, ) data.dropna(inplace=True) _logger.info("Parsed %d lines csv file." % len(data)) return data class GramVaaniDownloader: """GramVaaniDownloader downloads a GramVaani dataset. Args: gram_vaani_csv (GramVaaniCSV): A GramVaaniCSV representing the data to download target_dir (str): The path to download the data to Attributes: data (:class:`pandas.DataFrame`): `pandas.DataFrame` Containing the GramVaani csv data """ def __init__(self, gram_vaani_csv, target_dir): self.target_dir = target_dir self.data = gram_vaani_csv.data def download(self): """Downloads the data associated with this instance Return: mp3_directory (os.path): The directory into which the associated mp3's were downloaded """ mp3_directory = self._pre_download() self.data.swifter.apply( func=lambda arg: self._download(*arg, mp3_directory), axis=1, raw=True ) return mp3_directory def _pre_download(self): mp3_directory = os.path.join(self.target_dir, "mp3") if not os.path.exists(self.target_dir): _logger.info("Creating directory...%s", self.target_dir) os.mkdir(self.target_dir) if not os.path.exists(mp3_directory): _logger.info("Creating directory...%s", mp3_directory) os.mkdir(mp3_directory) return mp3_directory def _download(self, audio_url, transcript, audio_length, mp3_directory): if audio_url == "audio_url": return mp3_filename = os.path.join(mp3_directory, os.path.basename(audio_url)) if not os.path.exists(mp3_filename): _logger.debug("Downloading mp3 file...%s", audio_url) urllib.request.urlretrieve(audio_url, mp3_filename) else: _logger.debug("Already downloaded mp3 file...%s", audio_url) class GramVaaniConverter: """GramVaaniConverter converts the mp3's to wav's for a GramVaani dataset. Args: target_dir (str): The path to download the data from mp3_directory (os.path): The path containing the GramVaani mp3's Attributes: target_dir (str): The target directory passed as a command line argument mp3_directory (os.path): The path containing the GramVaani mp3's """ def __init__(self, target_dir, mp3_directory): self.target_dir = target_dir self.mp3_directory = Path(mp3_directory) def convert(self): """Converts the mp3's associated with this instance to wav's Return: wav_directory (os.path): The directory into which the associated wav's were downloaded """ wav_directory = self._pre_convert() for mp3_filename in self.mp3_directory.glob("**/*.mp3"): wav_filename = os.path.join( wav_directory, os.path.splitext(os.path.basename(mp3_filename))[0] + ".wav", ) if not os.path.exists(wav_filename): _logger.debug( "Converting mp3 file %s to wav file %s" % (mp3_filename, wav_filename) ) transformer = Transformer() transformer.convert( samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH ) transformer.build(str(mp3_filename), str(wav_filename)) else: _logger.debug( "Already converted mp3 file %s to wav file %s" % (mp3_filename, wav_filename) ) return wav_directory def _pre_convert(self): wav_directory = os.path.join(self.target_dir, "wav") if not os.path.exists(self.target_dir): _logger.info("Creating directory...%s", self.target_dir) os.mkdir(self.target_dir) if not os.path.exists(wav_directory): _logger.info("Creating directory...%s", wav_directory) os.mkdir(wav_directory) return wav_directory class GramVaaniDataSets: def __init__(self, target_dir, wav_directory, gram_vaani_csv): self.target_dir = target_dir self.wav_directory = wav_directory self.csv_data = gram_vaani_csv.data self.raw = pd.DataFrame(columns=["wav_filename", "wav_filesize", "transcript"]) self.valid = pd.DataFrame( columns=["wav_filename", "wav_filesize", "transcript"] ) self.train = pd.DataFrame( columns=["wav_filename", "wav_filesize", "transcript"] ) self.dev = pd.DataFrame(columns=["wav_filename", "wav_filesize", "transcript"]) self.test = pd.DataFrame(columns=["wav_filename", "wav_filesize", "transcript"]) def create(self): self._convert_csv_data_to_raw_data() self.raw.index = range(len(self.raw.index)) self.valid = self.raw[self._is_valid_raw_rows()] self.valid = self.valid.sample(frac=1).reset_index(drop=True) train_size, dev_size, test_size = self._calculate_data_set_sizes() self.train = self.valid.loc[0:train_size] self.dev = self.valid.loc[train_size : train_size + dev_size] self.test = self.valid.loc[ train_size + dev_size : train_size + dev_size + test_size ] def _convert_csv_data_to_raw_data(self): self.raw[["wav_filename", "wav_filesize", "transcript"]] = self.csv_data[ ["audio_url", "transcript", "audio_length"] ].swifter.apply( func=lambda arg: self._convert_csv_data_to_raw_data_impl(*arg), axis=1, raw=True, ) self.raw.reset_index() def _convert_csv_data_to_raw_data_impl(self, audio_url, transcript, audio_length): if audio_url == "audio_url": return pd.Series(["wav_filename", "wav_filesize", "transcript"]) mp3_filename = os.path.basename(audio_url) wav_relative_filename = os.path.join( "wav", os.path.splitext(os.path.basename(mp3_filename))[0] + ".wav" ) wav_filesize = os.path.getsize( os.path.join(self.target_dir, wav_relative_filename) ) transcript = validate_label(transcript) if None == transcript: transcript = "" return pd.Series([wav_relative_filename, wav_filesize, transcript]) def _is_valid_raw_rows(self): is_valid_raw_transcripts = self._is_valid_raw_transcripts() is_valid_raw_wav_frames = self._is_valid_raw_wav_frames() is_valid_raw_row = [ (is_valid_raw_transcript & is_valid_raw_wav_frame) for is_valid_raw_transcript, is_valid_raw_wav_frame in zip( is_valid_raw_transcripts, is_valid_raw_wav_frames ) ] series = pd.Series(is_valid_raw_row) return series def _is_valid_raw_transcripts(self): return pd.Series([bool(transcript) for transcript in self.raw.transcript]) def _is_valid_raw_wav_frames(self): transcripts = [str(transcript) for transcript in self.raw.transcript] wav_filepaths = [ os.path.join(self.target_dir, str(wav_filename)) for wav_filename in self.raw.wav_filename ] wav_frames = [ int( subprocess.check_output( ["soxi", "-s", wav_filepath], stderr=subprocess.STDOUT ) ) for wav_filepath in wav_filepaths ] is_valid_raw_wav_frames = [ self._is_wav_frame_valid(wav_frame, transcript) for wav_frame, transcript in zip(wav_frames, transcripts) ] return pd.Series(is_valid_raw_wav_frames) def _is_wav_frame_valid(self, wav_frame, transcript): is_wav_frame_valid = True if int(wav_frame / SAMPLE_RATE * 1000 / 10 / 2) < len(str(transcript)): is_wav_frame_valid = False elif wav_frame / SAMPLE_RATE > MAX_SECS: is_wav_frame_valid = False return is_wav_frame_valid def _calculate_data_set_sizes(self): total_size = len(self.valid) dev_size = math.floor(total_size * DEV_PERCENTAGE) train_size = math.floor(total_size * TRAIN_PERCENTAGE) test_size = total_size - (train_size + dev_size) return (train_size, dev_size, test_size) def save(self): datasets = ["train", "dev", "test"] for dataset in datasets: self._save(dataset) def _save(self, dataset): dataset_path = os.path.join(self.target_dir, dataset + ".csv") dataframe = getattr(self, dataset) dataframe.to_csv( dataset_path, index=False, encoding="utf-8", escapechar="\\", quoting=csv.QUOTE_MINIMAL, ) def main(args): """Main entry point allowing external calls Args: args ([str]): command line parameter list """ args = parse_args(args) validate_label = get_validate_label(args) setup_logging(args.loglevel) _logger.info("Starting GramVaani importer...") _logger.info("Starting loading GramVaani csv...") csv = GramVaaniCSV(args.csv_filename) _logger.info("Starting downloading GramVaani mp3's...") downloader = GramVaaniDownloader(csv, args.target_dir) mp3_directory = downloader.download() _logger.info("Starting converting GramVaani mp3's to wav's...") converter = GramVaaniConverter(args.target_dir, mp3_directory) wav_directory = converter.convert() datasets = GramVaaniDataSets(args.target_dir, wav_directory, csv) datasets.create() datasets.save() _logger.info("Finished GramVaani importer...") main(sys.argv[1:]) ================================================ FILE: bin/import_ldc93s1.py ================================================ #!/usr/bin/env python import os import sys import pandas from deepspeech_training.util.downloader import maybe_download def _download_and_preprocess_data(data_dir): # Conditionally download data LDC93S1_BASE = "LDC93S1" LDC93S1_BASE_URL = "https://catalog.ldc.upenn.edu/desc/addenda/" local_file = maybe_download( LDC93S1_BASE + ".wav", data_dir, LDC93S1_BASE_URL + LDC93S1_BASE + ".wav" ) trans_file = maybe_download( LDC93S1_BASE + ".txt", data_dir, LDC93S1_BASE_URL + LDC93S1_BASE + ".txt" ) with open(trans_file, "r") as fin: transcript = " ".join(fin.read().strip().lower().split(" ")[2:]).replace( ".", "" ) df = pandas.DataFrame( data=[(os.path.abspath(local_file), os.path.getsize(local_file), transcript)], columns=["wav_filename", "wav_filesize", "transcript"], ) df.to_csv(os.path.join(data_dir, "ldc93s1.csv"), index=False) if __name__ == "__main__": _download_and_preprocess_data(sys.argv[1]) ================================================ FILE: bin/import_librivox.py ================================================ #!/usr/bin/env python import codecs import fnmatch import os import subprocess import sys import tarfile import unicodedata import pandas import progressbar from sox import Transformer from tensorflow.python.platform import gfile from deepspeech_training.util.downloader import maybe_download SAMPLE_RATE = 16000 def _download_and_preprocess_data(data_dir): # Conditionally download data to data_dir print( "Downloading Librivox data set (55GB) into {} if not already present...".format( data_dir ) ) with progressbar.ProgressBar(max_value=7, widget=progressbar.AdaptiveETA) as bar: TRAIN_CLEAN_100_URL = ( "http://www.openslr.org/resources/12/train-clean-100.tar.gz" ) TRAIN_CLEAN_360_URL = ( "http://www.openslr.org/resources/12/train-clean-360.tar.gz" ) TRAIN_OTHER_500_URL = ( "http://www.openslr.org/resources/12/train-other-500.tar.gz" ) DEV_CLEAN_URL = "http://www.openslr.org/resources/12/dev-clean.tar.gz" DEV_OTHER_URL = "http://www.openslr.org/resources/12/dev-other.tar.gz" TEST_CLEAN_URL = "http://www.openslr.org/resources/12/test-clean.tar.gz" TEST_OTHER_URL = "http://www.openslr.org/resources/12/test-other.tar.gz" def filename_of(x): return os.path.split(x)[1] train_clean_100 = maybe_download( filename_of(TRAIN_CLEAN_100_URL), data_dir, TRAIN_CLEAN_100_URL ) bar.update(0) train_clean_360 = maybe_download( filename_of(TRAIN_CLEAN_360_URL), data_dir, TRAIN_CLEAN_360_URL ) bar.update(1) train_other_500 = maybe_download( filename_of(TRAIN_OTHER_500_URL), data_dir, TRAIN_OTHER_500_URL ) bar.update(2) dev_clean = maybe_download(filename_of(DEV_CLEAN_URL), data_dir, DEV_CLEAN_URL) bar.update(3) dev_other = maybe_download(filename_of(DEV_OTHER_URL), data_dir, DEV_OTHER_URL) bar.update(4) test_clean = maybe_download( filename_of(TEST_CLEAN_URL), data_dir, TEST_CLEAN_URL ) bar.update(5) test_other = maybe_download( filename_of(TEST_OTHER_URL), data_dir, TEST_OTHER_URL ) bar.update(6) # Conditionally extract LibriSpeech data # We extract each archive into data_dir, but test for existence in # data_dir/LibriSpeech because the archives share that root. print("Extracting librivox data if not already extracted...") with progressbar.ProgressBar(max_value=7, widget=progressbar.AdaptiveETA) as bar: LIBRIVOX_DIR = "LibriSpeech" work_dir = os.path.join(data_dir, LIBRIVOX_DIR) _maybe_extract( data_dir, os.path.join(LIBRIVOX_DIR, "train-clean-100"), train_clean_100 ) bar.update(0) _maybe_extract( data_dir, os.path.join(LIBRIVOX_DIR, "train-clean-360"), train_clean_360 ) bar.update(1) _maybe_extract( data_dir, os.path.join(LIBRIVOX_DIR, "train-other-500"), train_other_500 ) bar.update(2) _maybe_extract(data_dir, os.path.join(LIBRIVOX_DIR, "dev-clean"), dev_clean) bar.update(3) _maybe_extract(data_dir, os.path.join(LIBRIVOX_DIR, "dev-other"), dev_other) bar.update(4) _maybe_extract(data_dir, os.path.join(LIBRIVOX_DIR, "test-clean"), test_clean) bar.update(5) _maybe_extract(data_dir, os.path.join(LIBRIVOX_DIR, "test-other"), test_other) bar.update(6) # Convert FLAC data to wav, from: # data_dir/LibriSpeech/split/1/2/1-2-3.flac # to: # data_dir/LibriSpeech/split-wav/1-2-3.wav # # And split LibriSpeech transcriptions, from: # data_dir/LibriSpeech/split/1/2/1-2.trans.txt # to: # data_dir/LibriSpeech/split-wav/1-2-0.txt # data_dir/LibriSpeech/split-wav/1-2-1.txt # data_dir/LibriSpeech/split-wav/1-2-2.txt # ... print("Converting FLAC to WAV and splitting transcriptions...") with progressbar.ProgressBar(max_value=7, widget=progressbar.AdaptiveETA) as bar: train_100 = _convert_audio_and_split_sentences( work_dir, "train-clean-100", "train-clean-100-wav" ) bar.update(0) train_360 = _convert_audio_and_split_sentences( work_dir, "train-clean-360", "train-clean-360-wav" ) bar.update(1) train_500 = _convert_audio_and_split_sentences( work_dir, "train-other-500", "train-other-500-wav" ) bar.update(2) dev_clean = _convert_audio_and_split_sentences( work_dir, "dev-clean", "dev-clean-wav" ) bar.update(3) dev_other = _convert_audio_and_split_sentences( work_dir, "dev-other", "dev-other-wav" ) bar.update(4) test_clean = _convert_audio_and_split_sentences( work_dir, "test-clean", "test-clean-wav" ) bar.update(5) test_other = _convert_audio_and_split_sentences( work_dir, "test-other", "test-other-wav" ) bar.update(6) # Write sets to disk as CSV files train_100.to_csv( os.path.join(data_dir, "librivox-train-clean-100.csv"), index=False ) train_360.to_csv( os.path.join(data_dir, "librivox-train-clean-360.csv"), index=False ) train_500.to_csv( os.path.join(data_dir, "librivox-train-other-500.csv"), index=False ) dev_clean.to_csv(os.path.join(data_dir, "librivox-dev-clean.csv"), index=False) dev_other.to_csv(os.path.join(data_dir, "librivox-dev-other.csv"), index=False) test_clean.to_csv(os.path.join(data_dir, "librivox-test-clean.csv"), index=False) test_other.to_csv(os.path.join(data_dir, "librivox-test-other.csv"), index=False) def _maybe_extract(data_dir, extracted_data, archive): # If data_dir/extracted_data does not exist, extract archive in data_dir if not gfile.Exists(os.path.join(data_dir, extracted_data)): tar = tarfile.open(archive) tar.extractall(data_dir) tar.close() def _convert_audio_and_split_sentences(extracted_dir, data_set, dest_dir): source_dir = os.path.join(extracted_dir, data_set) target_dir = os.path.join(extracted_dir, dest_dir) if not os.path.exists(target_dir): os.makedirs(target_dir) # Loop over transcription files and split each one # # The format for each file 1-2.trans.txt is: # 1-2-0 transcription of 1-2-0.flac # 1-2-1 transcription of 1-2-1.flac # ... # # Each file is then split into several files: # 1-2-0.txt (contains transcription of 1-2-0.flac) # 1-2-1.txt (contains transcription of 1-2-1.flac) # ... # # We also convert the corresponding FLACs to WAV in the same pass files = [] for root, dirnames, filenames in os.walk(source_dir): for filename in fnmatch.filter(filenames, "*.trans.txt"): trans_filename = os.path.join(root, filename) with codecs.open(trans_filename, "r", "utf-8") as fin: for line in fin: # Parse each segment line first_space = line.find(" ") seqid, transcript = line[:first_space], line[first_space + 1 :] # We need to do the encode-decode dance here because encode # returns a bytes() object on Python 3, and text_to_char_array # expects a string. transcript = ( unicodedata.normalize("NFKD", transcript) .encode("ascii", "ignore") .decode("ascii", "ignore") ) transcript = transcript.lower().strip() # Convert corresponding FLAC to a WAV flac_file = os.path.join(root, seqid + ".flac") wav_file = os.path.join(target_dir, seqid + ".wav") if not os.path.exists(wav_file): tfm = Transformer() tfm.set_output_format(rate=SAMPLE_RATE) tfm.build(flac_file, wav_file) wav_filesize = os.path.getsize(wav_file) files.append((os.path.abspath(wav_file), wav_filesize, transcript)) return pandas.DataFrame( data=files, columns=["wav_filename", "wav_filesize", "transcript"] ) if __name__ == "__main__": _download_and_preprocess_data(sys.argv[1]) ================================================ FILE: bin/import_lingua_libre.py ================================================ #!/usr/bin/env python3 import argparse import csv import os import re import subprocess import unicodedata import zipfile from glob import glob from multiprocessing import Pool import progressbar import sox from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download from deepspeech_training.util.importers import ( get_counter, get_imported_samples, get_importers_parser, get_validate_label, print_import_report, ) from ds_ctcdecoder import Alphabet FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] SAMPLE_RATE = 16000 BITDEPTH = 16 N_CHANNELS = 1 MAX_SECS = 10 ARCHIVE_DIR_NAME = "lingua_libre" ARCHIVE_NAME = "Q{qId}-{iso639_3}-{language_English_name}.zip" ARCHIVE_URL = "https://lingualibre.fr/datasets/" + ARCHIVE_NAME def _download_and_preprocess_data(target_dir): # Making path absolute target_dir = os.path.abspath(target_dir) # Conditionally download data archive_path = maybe_download(ARCHIVE_NAME, target_dir, ARCHIVE_URL) # Conditionally extract data _maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path) # Produce CSV files and convert ogg data to wav _maybe_convert_sets(target_dir, ARCHIVE_DIR_NAME) def _maybe_extract(target_dir, extracted_data, archive_path): # If target_dir/extracted_data does not exist, extract archive in target_dir extracted_path = os.path.join(target_dir, extracted_data) if not os.path.exists(extracted_path): print('No directory "%s" - extracting archive...' % extracted_path) if not os.path.isdir(extracted_path): os.mkdir(extracted_path) with zipfile.ZipFile(archive_path) as zip_f: zip_f.extractall(extracted_path) else: print('Found directory "%s" - not extracting it from archive.' % archive_path) def one_sample(sample): """ Take a audio file, and optionally convert it to 16kHz WAV """ ogg_filename = sample[0] # Storing wav files next to the ogg ones - just with a different suffix wav_filename = os.path.splitext(ogg_filename)[0] + ".wav" _maybe_convert_wav(ogg_filename, wav_filename) file_size = -1 frames = 0 if os.path.exists(wav_filename): file_size = os.path.getsize(wav_filename) frames = int( subprocess.check_output( ["soxi", "-s", wav_filename], stderr=subprocess.STDOUT ) ) label = label_filter(sample[1]) rows = [] counter = get_counter() if file_size == -1: # Excluding samples that failed upon conversion counter["failed"] += 1 elif label is None: # Excluding samples that failed on label validation counter["invalid_label"] += 1 elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)): # Excluding samples that are too short to fit the transcript counter["too_short"] += 1 elif frames / SAMPLE_RATE > MAX_SECS: # Excluding very long samples to keep a reasonable batch-size counter["too_long"] += 1 else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) def _maybe_convert_sets(target_dir, extracted_data): extracted_dir = os.path.join(target_dir, extracted_data) # override existing CSV with normalized one target_csv_template = os.path.join( target_dir, ARCHIVE_DIR_NAME + "_" + ARCHIVE_NAME.replace(".zip", "_{}.csv") ) if os.path.isfile(target_csv_template): return ogg_root_dir = os.path.join(extracted_dir, ARCHIVE_NAME.replace(".zip", "")) # Get audiofile path and transcript for each sentence in tsv samples = [] glob_dir = os.path.join(ogg_root_dir, "**/*.ogg") for record in glob(glob_dir, recursive=True): record_file = record.replace(ogg_root_dir + os.path.sep, "") if record_filter(record_file): samples.append( ( os.path.join(ogg_root_dir, record_file), os.path.splitext(os.path.basename(record_file))[0], ) ) counter = get_counter() num_samples = len(samples) rows = [] print("Importing ogg files...") pool = Pool() bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1): counter += processed[0] rows += processed[1] bar.update(i) bar.update(num_samples) pool.close() pool.join() with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80% with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10% with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10% train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) train_writer.writeheader() dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) dev_writer.writeheader() test_writer = csv.DictWriter(test_csv_file, fieldnames=FIELDNAMES) test_writer.writeheader() for i, item in enumerate(rows): transcript = validate_label(item[2]) if not transcript: continue wav_filename = os.path.join( ogg_root_dir, item[0].replace(".ogg", ".wav") ) i_mod = i % 10 if i_mod == 0: writer = test_writer elif i_mod == 1: writer = dev_writer else: writer = train_writer writer.writerow( dict( wav_filename=wav_filename, wav_filesize=os.path.getsize(wav_filename), transcript=transcript, ) ) imported_samples = get_imported_samples(counter) assert counter["all"] == num_samples assert len(rows) == imported_samples print_import_report(counter, SAMPLE_RATE, MAX_SECS) def _maybe_convert_wav(ogg_filename, wav_filename): if not os.path.exists(wav_filename): transformer = sox.Transformer() transformer.convert(samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH) try: transformer.build(ogg_filename, wav_filename) except sox.core.SoxError as ex: print("SoX processing error", ex, ogg_filename, wav_filename) def handle_args(): parser = get_importers_parser( description="Importer for LinguaLibre dataset. Check https://lingualibre.fr/wiki/Help:Download_from_LinguaLibre for details." ) parser.add_argument(dest="target_dir") parser.add_argument( "--qId", type=int, required=True, help="LinguaLibre language qId" ) parser.add_argument( "--iso639-3", type=str, required=True, help="ISO639-3 language code" ) parser.add_argument( "--english-name", type=str, required=True, help="English name of the language" ) parser.add_argument( "--filter_alphabet", help="Exclude samples with characters not in provided alphabet", ) parser.add_argument( "--normalize", action="store_true", help="Converts diacritic characters to their base ones", ) parser.add_argument( "--bogus-records", type=argparse.FileType("r"), required=False, help="Text file listing well-known bogus record to skip from importing, from https://lingualibre.fr/wiki/LinguaLibre:Misleading_items", ) return parser.parse_args() if __name__ == "__main__": CLI_ARGS = handle_args() ALPHABET = Alphabet(CLI_ARGS.filter_alphabet) if CLI_ARGS.filter_alphabet else None validate_label = get_validate_label(CLI_ARGS) bogus_regexes = [] if CLI_ARGS.bogus_records: for line in CLI_ARGS.bogus_records: bogus_regexes.append(re.compile(line.strip())) def record_filter(path): if any(regex.match(path) for regex in bogus_regexes): print("Reject", path) return False return True def label_filter(label): if CLI_ARGS.normalize: label = ( unicodedata.normalize("NFKD", label.strip()) .encode("ascii", "ignore") .decode("ascii", "ignore") ) label = validate_label(label) if ALPHABET and label and not ALPHABET.CanEncode(label): label = None return label ARCHIVE_NAME = ARCHIVE_NAME.format( qId=CLI_ARGS.qId, iso639_3=CLI_ARGS.iso639_3, language_English_name=CLI_ARGS.english_name, ) ARCHIVE_URL = ARCHIVE_URL.format( qId=CLI_ARGS.qId, iso639_3=CLI_ARGS.iso639_3, language_English_name=CLI_ARGS.english_name, ) _download_and_preprocess_data(target_dir=CLI_ARGS.target_dir) ================================================ FILE: bin/import_m-ailabs.py ================================================ #!/usr/bin/env python3 # pylint: disable=invalid-name import csv import os import subprocess import tarfile import unicodedata from glob import glob from multiprocessing import Pool import progressbar from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download from deepspeech_training.util.importers import ( get_counter, get_imported_samples, get_importers_parser, get_validate_label, print_import_report, ) from ds_ctcdecoder import Alphabet FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] SAMPLE_RATE = 16000 MAX_SECS = 15 ARCHIVE_DIR_NAME = "{language}" ARCHIVE_NAME = "{language}.tgz" ARCHIVE_URL = "https://data.solak.de/data/Training/stt_tts/" + ARCHIVE_NAME def _download_and_preprocess_data(target_dir): # Making path absolute target_dir = os.path.abspath(target_dir) # Conditionally download data archive_path = maybe_download(ARCHIVE_NAME, target_dir, ARCHIVE_URL) # Conditionally extract data _maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path) # Produce CSV files _maybe_convert_sets(target_dir, ARCHIVE_DIR_NAME) def _maybe_extract(target_dir, extracted_data, archive_path): # If target_dir/extracted_data does not exist, extract archive in target_dir extracted_path = os.path.join(target_dir, extracted_data) if not os.path.exists(extracted_path): print('No directory "%s" - extracting archive...' % extracted_path) if not os.path.isdir(extracted_path): os.mkdir(extracted_path) tar = tarfile.open(archive_path) tar.extractall(extracted_path) tar.close() else: print('Found directory "%s" - not extracting it from archive.' % archive_path) def one_sample(sample): """ Take a audio file, and optionally convert it to 16kHz WAV """ wav_filename = sample[0] file_size = -1 frames = 0 if os.path.exists(wav_filename): tmp_filename = os.path.splitext(wav_filename)[0]+'.tmp.wav' subprocess.check_call( ['sox', wav_filename, '-r', str(SAMPLE_RATE), '-c', '1', '-b', '16', tmp_filename], stderr=subprocess.STDOUT ) os.rename(tmp_filename, wav_filename) file_size = os.path.getsize(wav_filename) frames = int( subprocess.check_output( ["soxi", "-s", wav_filename], stderr=subprocess.STDOUT ) ) label = label_filter(sample[1]) counter = get_counter() rows = [] if file_size == -1: # Excluding samples that failed upon conversion print("conversion failure", wav_filename) counter["failed"] += 1 elif label is None: # Excluding samples that failed on label validation counter["invalid_label"] += 1 elif int(frames / SAMPLE_RATE * 1000 / 15 / 2) < len(str(label)): # Excluding samples that are too short to fit the transcript counter["too_short"] += 1 elif frames / SAMPLE_RATE > MAX_SECS: # Excluding very long samples to keep a reasonable batch-size counter["too_long"] += 1 else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) def _maybe_convert_sets(target_dir, extracted_data): extracted_dir = os.path.join(target_dir, extracted_data) # override existing CSV with normalized one target_csv_template = os.path.join( target_dir, ARCHIVE_DIR_NAME, ARCHIVE_NAME.replace(".tgz", "_{}.csv") ) if os.path.isfile(target_csv_template): return wav_root_dir = os.path.join(extracted_dir) # Get audiofile path and transcript for each sentence in tsv samples = [] glob_dir = os.path.join(wav_root_dir, "**/metadata.csv") for record in glob(glob_dir, recursive=True): if any( map(lambda sk: sk in record, SKIP_LIST) ): # pylint: disable=cell-var-from-loop continue with open(record, "r") as rec: for re in rec.readlines(): re = re.strip().split("|") audio = os.path.join(os.path.dirname(record), "wavs", re[0] + ".wav") transcript = re[2] samples.append((audio, transcript)) counter = get_counter() num_samples = len(samples) rows = [] print("Importing WAV files...") pool = Pool() bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1): counter += processed[0] rows += processed[1] bar.update(i) bar.update(num_samples) pool.close() pool.join() with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80% with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10% with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10% train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) train_writer.writeheader() dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) dev_writer.writeheader() test_writer = csv.DictWriter(test_csv_file, fieldnames=FIELDNAMES) test_writer.writeheader() for i, item in enumerate(rows): transcript = validate_label(item[2]) if not transcript: continue wav_filename = item[0] i_mod = i % 10 if i_mod == 0: writer = test_writer elif i_mod == 1: writer = dev_writer else: writer = train_writer writer.writerow( dict( wav_filename=os.path.relpath(wav_filename, extracted_dir), wav_filesize=os.path.getsize(wav_filename), transcript=transcript, ) ) imported_samples = get_imported_samples(counter) assert counter["all"] == num_samples assert len(rows) == imported_samples print_import_report(counter, SAMPLE_RATE, MAX_SECS) def handle_args(): parser = get_importers_parser( description="Importer for M-AILABS dataset. https://www.caito.de/2019/01/the-m-ailabs-speech-dataset/." ) parser.add_argument(dest="target_dir") parser.add_argument( "--filter_alphabet", help="Exclude samples with characters not in provided alphabet", ) parser.add_argument( "--normalize", action="store_true", help="Converts diacritic characters to their base ones", ) parser.add_argument( "--skiplist", type=str, default="", help="Directories / books to skip, comma separated", ) parser.add_argument( "--language", required=True, type=str, help="Dataset language to use" ) return parser.parse_args() if __name__ == "__main__": CLI_ARGS = handle_args() ALPHABET = Alphabet(CLI_ARGS.filter_alphabet) if CLI_ARGS.filter_alphabet else None SKIP_LIST = filter(None, CLI_ARGS.skiplist.split(",")) validate_label = get_validate_label(CLI_ARGS) def label_filter(label): if CLI_ARGS.normalize: label = ( unicodedata.normalize("NFKD", label.strip()) .encode("ascii", "ignore") .decode("ascii", "ignore") ) label = validate_label(label) if ALPHABET and label and not ALPHABET.CanEncode(label): label = None return label ARCHIVE_DIR_NAME = ARCHIVE_DIR_NAME.format(language=CLI_ARGS.language) ARCHIVE_NAME = ARCHIVE_NAME.format(language=CLI_ARGS.language) ARCHIVE_URL = ARCHIVE_URL.format(language=CLI_ARGS.language) _download_and_preprocess_data(target_dir=CLI_ARGS.target_dir) ================================================ FILE: bin/import_magicdata.py ================================================ #!/usr/bin/env python import glob import os import tarfile import wave import pandas from deepspeech_training.util.importers import get_importers_parser COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"] def extract(archive_path, target_dir): print("Extracting {} into {}...".format(archive_path, target_dir)) with tarfile.open(archive_path) as tar: tar.extractall(target_dir) def is_file_truncated(wav_filename, wav_filesize): with wave.open(wav_filename, mode="rb") as fin: assert fin.getframerate() == 16000 assert fin.getsampwidth() == 2 assert fin.getnchannels() == 1 header_duration = fin.getnframes() / fin.getframerate() filesize_duration = (wav_filesize - 44) / 16000 / 2 return header_duration != filesize_duration def preprocess_data(folder_with_archives, target_dir): # First extract subset archives for subset in ("train", "dev", "test"): extract( os.path.join( folder_with_archives, "magicdata_{}_set.tar.gz".format(subset) ), target_dir, ) # Folder structure is now: # - magicdata_{train,dev,test}.tar.gz # - magicdata/ # - train/*.wav # - train/TRANS.txt # - dev/*.wav # - dev/TRANS.txt # - test/*.wav # - test/TRANS.txt # The TRANS files are CSVs with three columns, one containing the WAV file # name, one containing the speaker ID, and one containing the transcription def load_set(set_path): transcripts = pandas.read_csv( os.path.join(set_path, "TRANS.txt"), sep="\t", index_col=0 ) glob_path = os.path.join(set_path, "*", "*.wav") set_files = [] for wav in glob.glob(glob_path): try: wav_filename = wav wav_filesize = os.path.getsize(wav) transcript_key = os.path.basename(wav) transcript = transcripts.loc[transcript_key, "Transcription"] # Some files in this dataset are truncated, the header duration # doesn't match the file size. This causes errors at training # time, so check here if things are fine before including a file if is_file_truncated(wav_filename, wav_filesize): print( "Warning: File {} is corrupted, header duration does " "not match file size. Ignoring.".format(wav_filename) ) continue set_files.append((wav_filename, wav_filesize, transcript)) except KeyError: print("Warning: Missing transcript for WAV file {}.".format(wav)) return set_files for subset in ("train", "dev", "test"): print("Loading {} set samples...".format(subset)) subset_files = load_set(os.path.join(target_dir, subset)) df = pandas.DataFrame(data=subset_files, columns=COLUMN_NAMES) # Trim train set to under 10s if subset == "train": durations = (df["wav_filesize"] - 44) / 16000 / 2 df = df[durations <= 10.0] print("Trimming {} samples > 10 seconds".format((durations > 10.0).sum())) with_noise = df["transcript"].str.contains(r"\[(FIL|SPK)\]") df = df[~with_noise] print( "Trimming {} samples with noise ([FIL] or [SPK])".format( sum(with_noise) ) ) dest_csv = os.path.join(target_dir, "magicdata_{}.csv".format(subset)) print("Saving {} set into {}...".format(subset, dest_csv)) df.to_csv(dest_csv, index=False) def main(): # https://openslr.org/68/ parser = get_importers_parser(description="Import MAGICDATA corpus") parser.add_argument( "folder_with_archives", help="Path to folder containing magicdata_{train,dev,test}.tar.gz", ) parser.add_argument( "--target_dir", default="", help="Target folder to extract files into and put the resulting CSVs. Defaults to a folder called magicdata next to the archives", ) params = parser.parse_args() if not params.target_dir: params.target_dir = os.path.join(params.folder_with_archives, "magicdata") preprocess_data(params.folder_with_archives, params.target_dir) if __name__ == "__main__": main() ================================================ FILE: bin/import_primewords.py ================================================ #!/usr/bin/env python import glob import json import os import tarfile import numpy as np import pandas from deepspeech_training.util.importers import get_importers_parser COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"] def extract(archive_path, target_dir): print("Extracting {} into {}...".format(archive_path, target_dir)) with tarfile.open(archive_path) as tar: tar.extractall(target_dir) def preprocess_data(tgz_file, target_dir): # First extract main archive and sub-archives extract(tgz_file, target_dir) main_folder = os.path.join(target_dir, "primewords_md_2018_set1") # Folder structure is now: # - primewords_md_2018_set1/ # - audio_files/ # - [0-f]/[00-0f]/*.wav # - set1_transcript.json transcripts_path = os.path.join(main_folder, "set1_transcript.json") with open(transcripts_path) as fin: transcripts = json.load(fin) transcripts = {entry["file"]: entry["text"] for entry in transcripts} def load_set(glob_path): set_files = [] for wav in glob.glob(glob_path): try: wav_filename = wav wav_filesize = os.path.getsize(wav) transcript_key = os.path.basename(wav) transcript = transcripts[transcript_key] set_files.append((wav_filename, wav_filesize, transcript)) except KeyError: print("Warning: Missing transcript for WAV file {}.".format(wav)) return set_files # Load all files, then deterministically split into train/dev/test sets all_files = load_set(os.path.join(main_folder, "audio_files", "*", "*", "*.wav")) df = pandas.DataFrame(data=all_files, columns=COLUMN_NAMES) df.sort_values(by="wav_filename", inplace=True) indices = np.arange(0, len(df)) np.random.seed(12345) np.random.shuffle(indices) # Total corpus size: 50287 samples. 5000 samples gives us 99% confidence # level with a margin of error of under 2%. test_indices = indices[-5000:] dev_indices = indices[-10000:-5000] train_indices = indices[:-10000] train_files = df.iloc[train_indices] durations = (train_files["wav_filesize"] - 44) / 16000 / 2 train_files = train_files[durations <= 15.0] print("Trimming {} samples > 15 seconds".format((durations > 15.0).sum())) dest_csv = os.path.join(target_dir, "primewords_train.csv") print("Saving train set into {}...".format(dest_csv)) train_files.to_csv(dest_csv, index=False) dev_files = df.iloc[dev_indices] dest_csv = os.path.join(target_dir, "primewords_dev.csv") print("Saving dev set into {}...".format(dest_csv)) dev_files.to_csv(dest_csv, index=False) test_files = df.iloc[test_indices] dest_csv = os.path.join(target_dir, "primewords_test.csv") print("Saving test set into {}...".format(dest_csv)) test_files.to_csv(dest_csv, index=False) def main(): # https://www.openslr.org/47/ parser = get_importers_parser(description="Import Primewords Chinese corpus set 1") parser.add_argument("tgz_file", help="Path to primewords_md_2018_set1.tar.gz") parser.add_argument( "--target_dir", default="", help="Target folder to extract files into and put the resulting CSVs. Defaults to same folder as the main archive.", ) params = parser.parse_args() if not params.target_dir: params.target_dir = os.path.dirname(params.tgz_file) preprocess_data(params.tgz_file, params.target_dir) if __name__ == "__main__": main() ================================================ FILE: bin/import_slr57.py ================================================ #!/usr/bin/env python3 import csv import os import subprocess import tarfile import unicodedata from glob import glob from multiprocessing import Pool import progressbar from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download from deepspeech_training.util.importers import ( get_counter, get_imported_samples, get_importers_parser, get_validate_label, print_import_report, ) from ds_ctcdecoder import Alphabet FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] SAMPLE_RATE = 16000 MAX_SECS = 15 ARCHIVE_DIR_NAME = "African_Accented_French" ARCHIVE_NAME = "African_Accented_French.tar.gz" ARCHIVE_URL = "http://www.openslr.org/resources/57/" + ARCHIVE_NAME def _download_and_preprocess_data(target_dir): # Making path absolute target_dir = os.path.abspath(target_dir) # Conditionally download data archive_path = maybe_download(ARCHIVE_NAME, target_dir, ARCHIVE_URL) # Conditionally extract data _maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path) # Produce CSV files _maybe_convert_sets(target_dir, ARCHIVE_DIR_NAME) def _maybe_extract(target_dir, extracted_data, archive_path): # If target_dir/extracted_data does not exist, extract archive in target_dir extracted_path = os.path.join(target_dir, extracted_data) if not os.path.exists(extracted_path): print('No directory "%s" - extracting archive...' % extracted_path) if not os.path.isdir(extracted_path): os.mkdir(extracted_path) tar = tarfile.open(archive_path) tar.extractall(target_dir) tar.close() else: print('Found directory "%s" - not extracting it from archive.' % archive_path) def one_sample(sample): """ Take a audio file, and optionally convert it to 16kHz WAV """ wav_filename = sample[0] file_size = -1 frames = 0 if os.path.exists(wav_filename): file_size = os.path.getsize(wav_filename) frames = int( subprocess.check_output( ["soxi", "-s", wav_filename], stderr=subprocess.STDOUT ) ) label = label_filter(sample[1]) counter = get_counter() rows = [] if file_size == -1: # Excluding samples that failed upon conversion counter["failed"] += 1 elif label is None: # Excluding samples that failed on label validation counter["invalid_label"] += 1 elif int(frames / SAMPLE_RATE * 1000 / 15 / 2) < len(str(label)): # Excluding samples that are too short to fit the transcript counter["too_short"] += 1 elif frames / SAMPLE_RATE > MAX_SECS: # Excluding very long samples to keep a reasonable batch-size counter["too_long"] += 1 else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) def _maybe_convert_sets(target_dir, extracted_data): extracted_dir = os.path.join(target_dir, extracted_data) # override existing CSV with normalized one target_csv_template = os.path.join( target_dir, ARCHIVE_DIR_NAME, ARCHIVE_NAME.replace(".tar.gz", "_{}.csv") ) if os.path.isfile(target_csv_template): return wav_root_dir = os.path.join(extracted_dir) all_files = [ "transcripts/train/yaounde/fn_text.txt", "transcripts/train/ca16_conv/transcripts.txt", "transcripts/train/ca16_read/conditioned.txt", "transcripts/dev/niger_west_african_fr/transcripts.txt", "speech/dev/niger_west_african_fr/niger_wav_file_name_transcript.tsv", "transcripts/devtest/ca16_read/conditioned.txt", "transcripts/test/ca16/prompts.txt", ] transcripts = {} for tr in all_files: with open(os.path.join(target_dir, ARCHIVE_DIR_NAME, tr), "r") as tr_source: for line in tr_source.readlines(): line = line.strip() if ".tsv" in tr: sep = " " else: sep = " " audio = os.path.basename(line.split(sep)[0]) if not (".wav" in audio): if ".tdf" in audio: audio = audio.replace(".tdf", ".wav") else: audio += ".wav" transcript = " ".join(line.split(sep)[1:]) transcripts[audio] = transcript # Get audiofile path and transcript for each sentence in tsv samples = [] glob_dir = os.path.join(wav_root_dir, "**/*.wav") for record in glob(glob_dir, recursive=True): record_file = os.path.basename(record) if record_file in transcripts: samples.append((record, transcripts[record_file])) # Keep track of how many samples are good vs. problematic counter = get_counter() num_samples = len(samples) rows = [] print("Importing WAV files...") pool = Pool() bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1): counter += processed[0] rows += processed[1] bar.update(i) bar.update(num_samples) pool.close() pool.join() with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80% with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10% with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10% train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) train_writer.writeheader() dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) dev_writer.writeheader() test_writer = csv.DictWriter(test_csv_file, fieldnames=FIELDNAMES) test_writer.writeheader() for i, item in enumerate(rows): transcript = validate_label(item[2]) if not transcript: continue wav_filename = item[0] i_mod = i % 10 if i_mod == 0: writer = test_writer elif i_mod == 1: writer = dev_writer else: writer = train_writer writer.writerow( dict( wav_filename=wav_filename, wav_filesize=os.path.getsize(wav_filename), transcript=transcript, ) ) imported_samples = get_imported_samples(counter) assert counter["all"] == num_samples assert len(rows) == imported_samples print_import_report(counter, SAMPLE_RATE, MAX_SECS) def handle_args(): parser = get_importers_parser( description="Importer for African Accented French dataset. More information on http://www.openslr.org/57/." ) parser.add_argument(dest="target_dir") parser.add_argument( "--filter_alphabet", help="Exclude samples with characters not in provided alphabet", ) parser.add_argument( "--normalize", action="store_true", help="Converts diacritic characters to their base ones", ) return parser.parse_args() if __name__ == "__main__": CLI_ARGS = handle_args() ALPHABET = Alphabet(CLI_ARGS.filter_alphabet) if CLI_ARGS.filter_alphabet else None validate_label = get_validate_label(CLI_ARGS) def label_filter(label): if CLI_ARGS.normalize: label = ( unicodedata.normalize("NFKD", label.strip()) .encode("ascii", "ignore") .decode("ascii", "ignore") ) label = validate_label(label) if ALPHABET and label and not ALPHABET.CanEncode(label): label = None return label _download_and_preprocess_data(target_dir=CLI_ARGS.target_dir) ================================================ FILE: bin/import_swb.py ================================================ #!/usr/bin/env python # ensure that you have downloaded the LDC dataset LDC97S62 and tar exists in a folder e.g. # ./data/swb/swb1_LDC97S62.tgz # from the deepspeech directory run with: ./bin/import_swb.py ./data/swb/ import codecs import fnmatch import os import random import subprocess import sys import tarfile import unicodedata import wave import librosa import pandas import requests import soundfile # <= Has an external dependency on libsndfile from deepspeech_training.util.importers import validate_label_eng as validate_label # ARCHIVE_NAME refers to ISIP alignments from 01/29/03 ARCHIVE_NAME = "switchboard_word_alignments.tar.gz" ARCHIVE_URL = "http://www.openslr.org/resources/5/" ARCHIVE_DIR_NAME = "LDC97S62" LDC_DATASET = "swb1_LDC97S62.tgz" def download_file(folder, url): # https://stackoverflow.com/a/16696317/738515 local_filename = url.split("/")[-1] full_filename = os.path.join(folder, local_filename) r = requests.get(url, stream=True) with open(full_filename, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) return full_filename def maybe_download(archive_url, target_dir, ldc_dataset): # If archive file does not exist, download it... archive_path = os.path.join(target_dir, ldc_dataset) ldc_path = archive_url + ldc_dataset if not os.path.exists(target_dir): print('No path "%s" - creating ...' % target_dir) os.makedirs(target_dir) if not os.path.exists(archive_path): print('No archive "%s" - downloading...' % archive_path) download_file(target_dir, ldc_path) else: print('Found archive "%s" - not downloading.' % archive_path) return archive_path def _download_and_preprocess_data(data_dir): new_data_dir = os.path.join(data_dir, ARCHIVE_DIR_NAME) target_dir = os.path.abspath(new_data_dir) archive_path = os.path.abspath(os.path.join(data_dir, LDC_DATASET)) # Check swb1_LDC97S62.tgz then extract assert os.path.isfile(archive_path) _extract(target_dir, archive_path) # Transcripts transcripts_path = maybe_download(ARCHIVE_URL, target_dir, ARCHIVE_NAME) _extract(target_dir, transcripts_path) # Check swb1_d1/2/3/4/swb_ms98_transcriptions expected_folders = [ "swb1_d1", "swb1_d2", "swb1_d3", "swb1_d4", "swb_ms98_transcriptions", ] assert all([os.path.isdir(os.path.join(target_dir, e)) for e in expected_folders]) # Conditionally convert swb sph data to wav _maybe_convert_wav(target_dir, "swb1_d1", "swb1_d1-wav") _maybe_convert_wav(target_dir, "swb1_d2", "swb1_d2-wav") _maybe_convert_wav(target_dir, "swb1_d3", "swb1_d3-wav") _maybe_convert_wav(target_dir, "swb1_d4", "swb1_d4-wav") # Conditionally split wav data d1 = _maybe_split_wav_and_sentences( target_dir, "swb_ms98_transcriptions", "swb1_d1-wav", "swb1_d1-split-wav" ) d2 = _maybe_split_wav_and_sentences( target_dir, "swb_ms98_transcriptions", "swb1_d2-wav", "swb1_d2-split-wav" ) d3 = _maybe_split_wav_and_sentences( target_dir, "swb_ms98_transcriptions", "swb1_d3-wav", "swb1_d3-split-wav" ) d4 = _maybe_split_wav_and_sentences( target_dir, "swb_ms98_transcriptions", "swb1_d4-wav", "swb1_d4-split-wav" ) swb_files = d1.append(d2).append(d3).append(d4) train_files, dev_files, test_files = _split_sets(swb_files) # Write sets to disk as CSV files train_files.to_csv(os.path.join(target_dir, "swb-train.csv"), index=False) dev_files.to_csv(os.path.join(target_dir, "swb-dev.csv"), index=False) test_files.to_csv(os.path.join(target_dir, "swb-test.csv"), index=False) def _extract(target_dir, archive_path): with tarfile.open(archive_path) as tar: tar.extractall(target_dir) def _maybe_convert_wav(data_dir, original_data, converted_data): source_dir = os.path.join(data_dir, original_data) target_dir = os.path.join(data_dir, converted_data) # Conditionally convert sph files to wav files if os.path.exists(target_dir): print("skipping maybe_convert_wav") return # Create target_dir os.makedirs(target_dir) # Loop over sph files in source_dir and convert each to 16-bit PCM wav for root, dirnames, filenames in os.walk(source_dir): for filename in fnmatch.filter(filenames, "*.sph"): for channel in ["1", "2"]: sph_file = os.path.join(root, filename) wav_filename = ( os.path.splitext(os.path.basename(sph_file))[0] + "-" + channel + ".wav" ) wav_file = os.path.join(target_dir, wav_filename) temp_wav_filename = ( os.path.splitext(os.path.basename(sph_file))[0] + "-" + channel + "-temp.wav" ) temp_wav_file = os.path.join(target_dir, temp_wav_filename) print("converting {} to {}".format(sph_file, temp_wav_file)) subprocess.check_call( [ "sph2pipe", "-c", channel, "-p", "-f", "rif", sph_file, temp_wav_file, ] ) print("upsampling {} to {}".format(temp_wav_file, wav_file)) audioData, frameRate = librosa.load(temp_wav_file, sr=16000, mono=True) soundfile.write(wav_file, audioData, frameRate, "PCM_16") os.remove(temp_wav_file) def _parse_transcriptions(trans_file): segments = [] with codecs.open(trans_file, "r", "utf-8") as fin: for line in fin: if line.startswith("#") or len(line) <= 1: continue tokens = line.split() start_time = float(tokens[1]) stop_time = float(tokens[2]) transcript = validate_label(" ".join(tokens[3:])) if transcript == None: continue # We need to do the encode-decode dance here because encode # returns a bytes() object on Python 3, and text_to_char_array # expects a string. transcript = ( unicodedata.normalize("NFKD", transcript) .encode("ascii", "ignore") .decode("ascii", "ignore") ) segments.append( { "start_time": start_time, "stop_time": stop_time, "transcript": transcript, } ) return segments def _maybe_split_wav_and_sentences(data_dir, trans_data, original_data, converted_data): trans_dir = os.path.join(data_dir, trans_data) source_dir = os.path.join(data_dir, original_data) target_dir = os.path.join(data_dir, converted_data) if os.path.exists(target_dir): print("skipping maybe_split_wav") return os.makedirs(target_dir) files = [] # Loop over transcription files and split corresponding wav for root, dirnames, filenames in os.walk(trans_dir): for filename in fnmatch.filter(filenames, "*.text"): if "trans" not in filename: continue trans_file = os.path.join(root, filename) segments = _parse_transcriptions(trans_file) # Open wav corresponding to transcription file channel = ("2", "1")[ (os.path.splitext(os.path.basename(trans_file))[0])[6] == "A" ] wav_filename = ( "sw0" + (os.path.splitext(os.path.basename(trans_file))[0])[2:6] + "-" + channel + ".wav" ) wav_file = os.path.join(source_dir, wav_filename) print("splitting {} according to {}".format(wav_file, trans_file)) if not os.path.exists(wav_file): print("skipping. does not exist:" + wav_file) continue origAudio = wave.open(wav_file, "r") # Loop over segments and split wav_file for each segment for segment in segments: # Create wav segment filename start_time = segment["start_time"] stop_time = segment["stop_time"] new_wav_filename = ( os.path.splitext(os.path.basename(trans_file))[0] + "-" + str(start_time) + "-" + str(stop_time) + ".wav" ) if _is_wav_too_short(new_wav_filename): continue new_wav_file = os.path.join(target_dir, new_wav_filename) _split_wav(origAudio, start_time, stop_time, new_wav_file) new_wav_filesize = os.path.getsize(new_wav_file) transcript = segment["transcript"] files.append( (os.path.abspath(new_wav_file), new_wav_filesize, transcript) ) # Close origAudio origAudio.close() return pandas.DataFrame( data=files, columns=["wav_filename", "wav_filesize", "transcript"] ) def _is_wav_too_short(wav_filename): short_wav_filenames = [ "sw2986A-ms98-a-trans-80.6385-83.358875.wav", "sw2663A-ms98-a-trans-161.12025-164.213375.wav", ] return wav_filename in short_wav_filenames def _split_wav(origAudio, start_time, stop_time, new_wav_file): frameRate = origAudio.getframerate() origAudio.setpos(int(start_time * frameRate)) chunkData = origAudio.readframes(int((stop_time - start_time) * frameRate)) chunkAudio = wave.open(new_wav_file, "w") chunkAudio.setnchannels(origAudio.getnchannels()) chunkAudio.setsampwidth(origAudio.getsampwidth()) chunkAudio.setframerate(frameRate) chunkAudio.writeframes(chunkData) chunkAudio.close() def _split_sets(filelist): """ randomply split the datasets into train, validation, and test sets where the size of the validation and test sets are determined by the `get_sample_size` function. """ random.shuffle(filelist) sample_size = get_sample_size(len(filelist)) train_beg = 0 train_end = len(filelist) - 2 * sample_size dev_beg = train_end dev_end = train_end + sample_size test_beg = dev_end test_end = len(filelist) return ( filelist[train_beg:train_end], filelist[dev_beg:dev_end], filelist[test_beg:test_end], ) def get_sample_size(population_size): """calculates the sample size for a 99% confidence and 1% margin of error """ margin_of_error = 0.01 fraction_picking = 0.50 z_score = 2.58 # Corresponds to confidence level 99% numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( margin_of_error ** 2 ) sample_size = 0 for train_size in range(population_size, 0, -1): denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( margin_of_error ** 2 * train_size ) sample_size = int(numerator / denominator) if 2 * sample_size + train_size <= population_size: break return sample_size def _read_data_set( filelist, thread_count, batch_size, numcep, numcontext, stride=1, offset=0, next_index=lambda i: i + 1, limit=0, ): # Optionally apply dataset size limit if limit > 0: filelist = filelist.iloc[:limit] filelist = filelist[offset::stride] # Return DataSet return DataSet( txt_files, thread_count, batch_size, numcep, numcontext, next_index=next_index ) if __name__ == "__main__": _download_and_preprocess_data(sys.argv[1]) ================================================ FILE: bin/import_swc.py ================================================ #!/usr/bin/env python """ Downloads and prepares (parts of) the "Spoken Wikipedia Corpora" for DeepSpeech.py Use "python3 import_swc.py -h" for help """ import argparse import csv import os import random import re import shutil import sys import tarfile import unicodedata import wave import xml.etree.ElementTree as ET from collections import Counter from glob import glob from multiprocessing.pool import ThreadPool import progressbar import sox from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download from deepspeech_training.util.importers import validate_label_eng as validate_label from ds_ctcdecoder import Alphabet SWC_URL = "https://www2.informatik.uni-hamburg.de/nats/pub/SWC/SWC_{language}.tar" SWC_ARCHIVE = "SWC_{language}.tar" LANGUAGES = ["dutch", "english", "german"] FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] FIELDNAMES_EXT = FIELDNAMES + ["article", "speaker"] CHANNELS = 1 SAMPLE_RATE = 16000 UNKNOWN = "" AUDIO_PATTERN = "audio*.ogg" WAV_NAME = "audio.wav" ALIGNED_NAME = "aligned.swc" SUBSTITUTIONS = { "german": [ (re.compile(r"\$"), "dollar"), (re.compile(r"€"), "euro"), (re.compile(r"£"), "pfund"), ( re.compile(r"ein tausend ([^\s]+) hundert ([^\s]+) er( |$)"), r"\1zehnhundert \2er ", ), (re.compile(r"ein tausend (acht|neun) hundert"), r"\1zehnhundert"), ( re.compile( r"eins punkt null null null punkt null null null punkt null null null" ), "eine milliarde", ), ( re.compile( r"punkt null null null punkt null null null punkt null null null" ), "milliarden", ), (re.compile(r"eins punkt null null null punkt null null null"), "eine million"), (re.compile(r"punkt null null null punkt null null null"), "millionen"), (re.compile(r"eins punkt null null null"), "ein tausend"), (re.compile(r"punkt null null null"), "tausend"), (re.compile(r"punkt null"), None), ] } DONT_NORMALIZE = {"german": "ÄÖÜäöüß"} PRE_FILTER = str.maketrans(dict.fromkeys("/()[]{}<>:")) class Sample: def __init__(self, wav_path, start, end, text, article, speaker, sub_set=None): self.wav_path = wav_path self.start = start self.end = end self.text = text self.article = article self.speaker = speaker self.sub_set = sub_set def fail(message): print(message) sys.exit(1) def group(lst, get_key): groups = {} for obj in lst: key = get_key(obj) if key in groups: groups[key].append(obj) else: groups[key] = [obj] return groups def get_sample_size(population_size): margin_of_error = 0.01 fraction_picking = 0.50 z_score = 2.58 # Corresponds to confidence level 99% numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( margin_of_error ** 2 ) sample_size = 0 for train_size in range(population_size, 0, -1): denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( margin_of_error ** 2 * train_size ) sample_size = int(numerator / denominator) if 2 * sample_size + train_size <= population_size: break return sample_size def maybe_download_language(language): lang_upper = language[0].upper() + language[1:] return maybe_download( SWC_ARCHIVE.format(language=lang_upper), CLI_ARGS.base_dir, SWC_URL.format(language=lang_upper), ) def maybe_extract(data_dir, extracted_data, archive): extracted = os.path.join(data_dir, extracted_data) if os.path.isdir(extracted): print('Found directory "{}" - not extracting.'.format(extracted)) else: print('Extracting "{}"...'.format(archive)) with tarfile.open(archive) as tar: members = tar.getmembers() bar = progressbar.ProgressBar(max_value=len(members), widgets=SIMPLE_BAR) for member in bar(members): tar.extract(member=member, path=extracted) return extracted def ignored(node): if node is None: return False if node.tag == "ignored": return True return ignored(node.find("..")) def read_token(token): texts, start, end = [], None, None notes = token.findall("n") if len(notes) > 0: for note in notes: attributes = note.attrib if start is None and "start" in attributes: start = int(attributes["start"]) if "end" in attributes: token_end = int(attributes["end"]) if end is None or token_end > end: end = token_end if "pronunciation" in attributes: t = attributes["pronunciation"] texts.append(t) elif "text" in token.attrib: texts.append(token.attrib["text"]) return start, end, " ".join(texts) def in_alphabet(alphabet, c): return alphabet.CanEncode(c) if alphabet else True ALPHABETS = {} def get_alphabet(language): if language in ALPHABETS: return ALPHABETS[language] alphabet_path = getattr(CLI_ARGS, language + "_alphabet") alphabet = Alphabet(alphabet_path) if alphabet_path else None ALPHABETS[language] = alphabet return alphabet def label_filter(label, language): label = label.translate(PRE_FILTER) label = validate_label(label) if label is None: return None, "validation" substitutions = SUBSTITUTIONS[language] if language in SUBSTITUTIONS else [] for pattern, replacement in substitutions: if replacement is None: if pattern.match(label): return None, "substitution rule" else: label = pattern.sub(replacement, label) chars = [] dont_normalize = DONT_NORMALIZE[language] if language in DONT_NORMALIZE else "" alphabet = get_alphabet(language) for c in label: if CLI_ARGS.normalize and c not in dont_normalize and not in_alphabet(alphabet, c): c = unicodedata.normalize("NFKD", c).encode("ascii", "ignore").decode("ascii", "ignore") for sc in c: if not in_alphabet(alphabet, sc): return None, "illegal character" chars.append(sc) label = "".join(chars) label = validate_label(label) return label, "validation" if label is None else None def collect_samples(base_dir, language): roots = [] for root, _, files in os.walk(base_dir): if ALIGNED_NAME in files and WAV_NAME in files: roots.append(root) samples = [] reasons = Counter() def add_sample( p_wav_path, p_article, p_speaker, p_start, p_end, p_text, p_reason="complete" ): if p_start is not None and p_end is not None and p_text is not None: duration = p_end - p_start text, filter_reason = label_filter(p_text, language) skip = False if filter_reason is not None: skip = True p_reason = filter_reason elif CLI_ARGS.exclude_unknown_speakers and p_speaker == UNKNOWN: skip = True p_reason = "unknown speaker" elif CLI_ARGS.exclude_unknown_articles and p_article == UNKNOWN: skip = True p_reason = "unknown article" elif duration > CLI_ARGS.max_duration > 0 and CLI_ARGS.ignore_too_long: skip = True p_reason = "exceeded duration" elif int(duration / 30) < len(text): skip = True p_reason = "too short to decode" elif duration / len(text) < 10: skip = True p_reason = "length duration ratio" if skip: reasons[p_reason] += 1 else: samples.append( Sample(p_wav_path, p_start, p_end, text, p_article, p_speaker) ) elif p_start is None or p_end is None: reasons["missing timestamps"] += 1 else: reasons["missing text"] += 1 print("Collecting samples...") bar = progressbar.ProgressBar(max_value=len(roots), widgets=SIMPLE_BAR) for root in bar(roots): wav_path = os.path.join(root, WAV_NAME) aligned = ET.parse(os.path.join(root, ALIGNED_NAME)) article = UNKNOWN speaker = UNKNOWN for prop in aligned.iter("prop"): attributes = prop.attrib if "key" in attributes and "value" in attributes: if attributes["key"] == "DC.identifier": article = attributes["value"] elif attributes["key"] == "reader.name": speaker = attributes["value"] for sentence in aligned.iter("s"): if ignored(sentence): continue split = False tokens = list(map(read_token, sentence.findall("t"))) sample_start, sample_end, token_texts, sample_texts = None, None, [], [] for token_start, token_end, token_text in tokens: if CLI_ARGS.exclude_numbers and any(c.isdigit() for c in token_text): add_sample( wav_path, article, speaker, sample_start, sample_end, " ".join(sample_texts), p_reason="has numbers", ) sample_start, sample_end, token_texts, sample_texts = ( None, None, [], [], ) continue if sample_start is None: sample_start = token_start if sample_start is None: continue token_texts.append(token_text) if token_end is not None: if ( token_start != sample_start and token_end - sample_start > CLI_ARGS.max_duration > 0 ): add_sample( wav_path, article, speaker, sample_start, sample_end, " ".join(sample_texts), p_reason="split", ) sample_start = sample_end sample_texts = [] split = True sample_end = token_end sample_texts.extend(token_texts) token_texts = [] add_sample( wav_path, article, speaker, sample_start, sample_end, " ".join(sample_texts), p_reason="split" if split else "complete", ) print("Skipped samples:") for reason, n in reasons.most_common(): print(" - {}: {}".format(reason, n)) return samples def maybe_convert_one_to_wav(entry): root, _, files = entry transformer = sox.Transformer() transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS) combiner = sox.Combiner() combiner.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS) output_wav = os.path.join(root, WAV_NAME) if os.path.isfile(output_wav): return files = sorted(glob(os.path.join(root, AUDIO_PATTERN))) try: if len(files) == 1: transformer.build(files[0], output_wav) elif len(files) > 1: wav_files = [] for i, file in enumerate(files): wav_path = os.path.join(root, "audio{}.wav".format(i)) transformer.build(file, wav_path) wav_files.append(wav_path) combiner.set_input_format(file_type=["wav"] * len(wav_files)) combiner.build(wav_files, output_wav, "concatenate") except sox.core.SoxError: return def maybe_convert_to_wav(base_dir): roots = list(os.walk(base_dir)) print("Converting and joining source audio files...") bar = progressbar.ProgressBar(max_value=len(roots), widgets=SIMPLE_BAR) tp = ThreadPool() for _ in bar(tp.imap_unordered(maybe_convert_one_to_wav, roots)): pass tp.close() tp.join() def assign_sub_sets(samples): sample_size = get_sample_size(len(samples)) speakers = group(samples, lambda sample: sample.speaker).values() speakers = list(sorted(speakers, key=len)) sample_sets = [[], []] while any(map(lambda s: len(s) < sample_size, sample_sets)) and len(speakers) > 0: for sample_set in sample_sets: if len(sample_set) < sample_size and len(speakers) > 0: sample_set.extend(speakers.pop(0)) train_set = sum(speakers, []) if len(train_set) == 0: print( "WARNING: Unable to build dev and test sets without speaker bias as there is no speaker meta data" ) random.seed(42) # same source data == same output random.shuffle(samples) for index, sample in enumerate(samples): if index < sample_size: sample.sub_set = "dev" elif index < 2 * sample_size: sample.sub_set = "test" else: sample.sub_set = "train" else: for sub_set, sub_set_samples in [ ("train", train_set), ("dev", sample_sets[0]), ("test", sample_sets[1]), ]: for sample in sub_set_samples: sample.sub_set = sub_set for sub_set, sub_set_samples in group(samples, lambda s: s.sub_set).items(): t = sum(map(lambda s: s.end - s.start, sub_set_samples)) / (1000 * 60 * 60) print( 'Sub-set "{}" with {} samples (duration: {:.2f} h)'.format( sub_set, len(sub_set_samples), t ) ) def create_sample_dirs(language): print("Creating sample directories...") for set_name in ["train", "dev", "test"]: dir_path = os.path.join(CLI_ARGS.base_dir, language + "-" + set_name) if not os.path.isdir(dir_path): os.mkdir(dir_path) def split_audio_files(samples, language): print("Splitting audio files...") sub_sets = Counter() src_wav_files = group(samples, lambda s: s.wav_path).items() bar = progressbar.ProgressBar(max_value=len(src_wav_files), widgets=SIMPLE_BAR) for wav_path, file_samples in bar(src_wav_files): file_samples = sorted(file_samples, key=lambda s: s.start) with wave.open(wav_path, "r") as src_wav_file: rate = src_wav_file.getframerate() for sample in file_samples: index = sub_sets[sample.sub_set] sample_wav_path = os.path.join( CLI_ARGS.base_dir, language + "-" + sample.sub_set, "sample-{0:06d}.wav".format(index), ) sample.wav_path = sample_wav_path sub_sets[sample.sub_set] += 1 src_wav_file.setpos(int(sample.start * rate / 1000.0)) data = src_wav_file.readframes( int((sample.end - sample.start) * rate / 1000.0) ) with wave.open(sample_wav_path, "w") as sample_wav_file: sample_wav_file.setnchannels(src_wav_file.getnchannels()) sample_wav_file.setsampwidth(src_wav_file.getsampwidth()) sample_wav_file.setframerate(rate) sample_wav_file.writeframes(data) def write_csvs(samples, language): for sub_set, set_samples in group(samples, lambda s: s.sub_set).items(): set_samples = sorted(set_samples, key=lambda s: s.wav_path) base_dir = os.path.abspath(CLI_ARGS.base_dir) csv_path = os.path.join(base_dir, language + "-" + sub_set + ".csv") print('Writing "{}"...'.format(csv_path)) with open(csv_path, "w", encoding="utf-8", newline="") as csv_file: writer = csv.DictWriter( csv_file, fieldnames=FIELDNAMES_EXT if CLI_ARGS.add_meta else FIELDNAMES ) writer.writeheader() bar = progressbar.ProgressBar( max_value=len(set_samples), widgets=SIMPLE_BAR ) for sample in bar(set_samples): row = { "wav_filename": os.path.relpath(sample.wav_path, base_dir), "wav_filesize": os.path.getsize(sample.wav_path), "transcript": sample.text, } if CLI_ARGS.add_meta: row["article"] = sample.article row["speaker"] = sample.speaker writer.writerow(row) def cleanup(archive, language): if not CLI_ARGS.keep_archive: print('Removing archive "{}"...'.format(archive)) os.remove(archive) language_dir = os.path.join(CLI_ARGS.base_dir, language) if not CLI_ARGS.keep_intermediate and os.path.isdir(language_dir): print('Removing intermediate files in "{}"...'.format(language_dir)) shutil.rmtree(language_dir) def prepare_language(language): archive = maybe_download_language(language) extracted = maybe_extract(CLI_ARGS.base_dir, language, archive) maybe_convert_to_wav(extracted) samples = collect_samples(extracted, language) assign_sub_sets(samples) create_sample_dirs(language) split_audio_files(samples, language) write_csvs(samples, language) cleanup(archive, language) def handle_args(): parser = argparse.ArgumentParser(description="Import Spoken Wikipedia Corpora") parser.add_argument("base_dir", help="Directory containing all data") parser.add_argument( "--language", default="all", help="One of (all|{})".format("|".join(LANGUAGES)) ) parser.add_argument( "--exclude_numbers", type=bool, default=True, help="If sequences with non-transliterated numbers should be excluded", ) parser.add_argument( "--max_duration", type=int, default=10000, help="Maximum sample duration in milliseconds", ) parser.add_argument( "--ignore_too_long", type=bool, default=False, help="If samples exceeding max_duration should be removed", ) parser.add_argument( "--normalize", action="store_true", help="Converts diacritic characters to their base ones", ) for language in LANGUAGES: parser.add_argument( "--{}_alphabet".format(language), help="Exclude {} samples with characters not in provided alphabet file".format( language ), ) parser.add_argument( "--add_meta", action="store_true", help="Adds article and speaker CSV columns" ) parser.add_argument( "--exclude_unknown_speakers", action="store_true", help="Exclude unknown speakers", ) parser.add_argument( "--exclude_unknown_articles", action="store_true", help="Exclude unknown articles", ) parser.add_argument( "--keep_archive", type=bool, default=True, help="If downloaded archives should be kept", ) parser.add_argument( "--keep_intermediate", type=bool, default=False, help="If intermediate files should be kept", ) return parser.parse_args() if __name__ == "__main__": CLI_ARGS = handle_args() if CLI_ARGS.language == "all": for lang in LANGUAGES: prepare_language(lang) elif CLI_ARGS.language in LANGUAGES: prepare_language(CLI_ARGS.language) else: fail("Wrong language id") ================================================ FILE: bin/import_ted.py ================================================ #!/usr/bin/env python import sys import tarfile import unicodedata import wave from glob import glob from os import makedirs, path, remove, rmdir import pandas from sox import Transformer from tensorflow.python.platform import gfile from deepspeech_training.util.downloader import maybe_download from deepspeech_training.util.stm import parse_stm_file def _download_and_preprocess_data(data_dir): # Conditionally download data TED_DATA = "TEDLIUM_release2.tar.gz" TED_DATA_URL = "http://www.openslr.org/resources/19/TEDLIUM_release2.tar.gz" local_file = maybe_download(TED_DATA, data_dir, TED_DATA_URL) # Conditionally extract TED data TED_DIR = "TEDLIUM_release2" _maybe_extract(data_dir, TED_DIR, local_file) # Conditionally convert TED sph data to wav _maybe_convert_wav(data_dir, TED_DIR) # Conditionally split TED wav and text data into sentences train_files, dev_files, test_files = _maybe_split_sentences(data_dir, TED_DIR) # Write sets to disk as CSV files train_files.to_csv(path.join(data_dir, "ted-train.csv"), index=False) dev_files.to_csv(path.join(data_dir, "ted-dev.csv"), index=False) test_files.to_csv(path.join(data_dir, "ted-test.csv"), index=False) def _maybe_extract(data_dir, extracted_data, archive): # If data_dir/extracted_data does not exist, extract archive in data_dir if not gfile.Exists(path.join(data_dir, extracted_data)): tar = tarfile.open(archive) tar.extractall(data_dir) tar.close() def _maybe_convert_wav(data_dir, extracted_data): # Create extracted_data dir extracted_dir = path.join(data_dir, extracted_data) # Conditionally convert dev sph to wav _maybe_convert_wav_dataset(extracted_dir, "dev") # Conditionally convert train sph to wav _maybe_convert_wav_dataset(extracted_dir, "train") # Conditionally convert test sph to wav _maybe_convert_wav_dataset(extracted_dir, "test") def _maybe_convert_wav_dataset(extracted_dir, data_set): # Create source dir source_dir = path.join(extracted_dir, data_set, "sph") # Create target dir target_dir = path.join(extracted_dir, data_set, "wav") # Conditionally convert sph files to wav files if not gfile.Exists(target_dir): # Create target_dir makedirs(target_dir) # Loop over sph files in source_dir and convert each to wav for sph_file in glob(path.join(source_dir, "*.sph")): transformer = Transformer() wav_filename = path.splitext(path.basename(sph_file))[0] + ".wav" wav_file = path.join(target_dir, wav_filename) transformer.build(sph_file, wav_file) remove(sph_file) # Remove source_dir rmdir(source_dir) def _maybe_split_sentences(data_dir, extracted_data): # Create extracted_data dir extracted_dir = path.join(data_dir, extracted_data) # Conditionally split dev wav dev_files = _maybe_split_dataset(extracted_dir, "dev") # Conditionally split train wav train_files = _maybe_split_dataset(extracted_dir, "train") # Conditionally split test wav test_files = _maybe_split_dataset(extracted_dir, "test") return train_files, dev_files, test_files def _maybe_split_dataset(extracted_dir, data_set): # Create stm dir stm_dir = path.join(extracted_dir, data_set, "stm") # Create wav dir wav_dir = path.join(extracted_dir, data_set, "wav") files = [] # Loop over stm files and split corresponding wav for stm_file in glob(path.join(stm_dir, "*.stm")): # Parse stm file stm_segments = parse_stm_file(stm_file) # Open wav corresponding to stm_file wav_filename = path.splitext(path.basename(stm_file))[0] + ".wav" wav_file = path.join(wav_dir, wav_filename) origAudio = wave.open(wav_file, "r") # Loop over stm_segments and split wav_file for each segment for stm_segment in stm_segments: # Create wav segment filename start_time = stm_segment.start_time stop_time = stm_segment.stop_time new_wav_filename = ( path.splitext(path.basename(stm_file))[0] + "-" + str(start_time) + "-" + str(stop_time) + ".wav" ) new_wav_file = path.join(wav_dir, new_wav_filename) # If the wav segment filename does not exist create it if not gfile.Exists(new_wav_file): _split_wav(origAudio, start_time, stop_time, new_wav_file) new_wav_filesize = path.getsize(new_wav_file) files.append( (path.abspath(new_wav_file), new_wav_filesize, stm_segment.transcript) ) # Close origAudio origAudio.close() return pandas.DataFrame( data=files, columns=["wav_filename", "wav_filesize", "transcript"] ) def _split_wav(origAudio, start_time, stop_time, new_wav_file): frameRate = origAudio.getframerate() origAudio.setpos(int(start_time * frameRate)) chunkData = origAudio.readframes(int((stop_time - start_time) * frameRate)) chunkAudio = wave.open(new_wav_file, "w") chunkAudio.setnchannels(origAudio.getnchannels()) chunkAudio.setsampwidth(origAudio.getsampwidth()) chunkAudio.setframerate(frameRate) chunkAudio.writeframes(chunkData) chunkAudio.close() if __name__ == "__main__": _download_and_preprocess_data(sys.argv[1]) ================================================ FILE: bin/import_timit.py ================================================ #!/usr/bin/env python """ NAME : LDC TIMIT Dataset URL : https://catalog.ldc.upenn.edu/ldc93s1 HOURS : 5 TYPE : Read - English AUTHORS : Garofolo, John, et al. TYPE : LDC Membership LICENCE : LDC User Agreement """ import errno import fnmatch import os import subprocess import sys import tarfile from os import path import pandas as pd def clean(word): # LC ALL & strip punctuation which are not required new = word.lower().replace(".", "") new = new.replace(",", "") new = new.replace(";", "") new = new.replace('"', "") new = new.replace("!", "") new = new.replace("?", "") new = new.replace(":", "") new = new.replace("-", "") return new def _preprocess_data(args): # Assume data is downloaded from LDC - https://catalog.ldc.upenn.edu/ldc93s1 # SA sentences are repeated throughout by each speaker therefore can be removed for ASR as they will affect WER ignoreSASentences = True if ignoreSASentences: print("Using recommended ignore SA sentences") print( "Ignoring SA sentences (2 x sentences which are repeated by all speakers)" ) else: print("Using unrecommended setting to include SA sentences") datapath = args target = path.join(datapath, "TIMIT") print( "Checking to see if data has already been extracted in given argument: %s", target, ) if not path.isdir(target): print( "Could not find extracted data, trying to find: TIMIT-LDC93S1.tgz in: ", datapath, ) filepath = path.join(datapath, "TIMIT-LDC93S1.tgz") if path.isfile(filepath): print("File found, extracting") tar = tarfile.open(filepath) tar.extractall(target) tar.close() else: print("File should be downloaded from LDC and placed at:", filepath) strerror = "File not found" raise IOError(errno, strerror, filepath) else: # is path therefore continue print("Found extracted data in: ", target) print("Preprocessing data") # We convert the .WAV (NIST sphere format) into MSOFT .wav # creates _rif.wav as the new .wav file for root, dirnames, filenames in os.walk(target): for filename in fnmatch.filter(filenames, "*.WAV"): sph_file = os.path.join(root, filename) wav_file = os.path.join(root, filename)[:-4] + "_rif.wav" print("converting {} to {}".format(sph_file, wav_file)) subprocess.check_call(["sox", sph_file, wav_file]) print("Preprocessing Complete") print("Building CSVs") # Lists to build CSV files train_list_wavs, train_list_trans, train_list_size = [], [], [] test_list_wavs, test_list_trans, test_list_size = [], [], [] for root, dirnames, filenames in os.walk(target): for filename in fnmatch.filter(filenames, "*_rif.wav"): full_wav = os.path.join(root, filename) wav_filesize = path.getsize(full_wav) # need to remove _rif.wav (8chars) then add .TXT trans_file = full_wav[:-8] + ".TXT" with open(trans_file, "r") as f: for line in f: split = line.split() start = split[0] end = split[1] t_list = split[2:] trans = "" for t in t_list: trans = trans + " " + clean(t) # if ignoreSAsentences we only want those without SA in the name # OR # if not ignoreSAsentences we want all to be added if (ignoreSASentences and not ("SA" in os.path.basename(full_wav))) or ( not ignoreSASentences ): if "train" in full_wav.lower(): train_list_wavs.append(full_wav) train_list_trans.append(trans) train_list_size.append(wav_filesize) elif "test" in full_wav.lower(): test_list_wavs.append(full_wav) test_list_trans.append(trans) test_list_size.append(wav_filesize) else: raise IOError a = { "wav_filename": train_list_wavs, "wav_filesize": train_list_size, "transcript": train_list_trans, } c = { "wav_filename": test_list_wavs, "wav_filesize": test_list_size, "transcript": test_list_trans, } all = { "wav_filename": train_list_wavs + test_list_wavs, "wav_filesize": train_list_size + test_list_size, "transcript": train_list_trans + test_list_trans, } df_all = pd.DataFrame( all, columns=["wav_filename", "wav_filesize", "transcript"], dtype=int ) df_train = pd.DataFrame( a, columns=["wav_filename", "wav_filesize", "transcript"], dtype=int ) df_test = pd.DataFrame( c, columns=["wav_filename", "wav_filesize", "transcript"], dtype=int ) df_all.to_csv( target + "/timit_all.csv", sep=",", header=True, index=False, encoding="ascii" ) df_train.to_csv( target + "/timit_train.csv", sep=",", header=True, index=False, encoding="ascii" ) df_test.to_csv( target + "/timit_test.csv", sep=",", header=True, index=False, encoding="ascii" ) if __name__ == "__main__": _preprocess_data(sys.argv[1]) print("Completed") ================================================ FILE: bin/import_ts.py ================================================ #!/usr/bin/env python3 import csv import os import re import subprocess import zipfile from multiprocessing import Pool import progressbar import sox import unidecode from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download from deepspeech_training.util.importers import ( get_counter, get_imported_samples, get_importers_parser, get_validate_label, print_import_report, ) FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] SAMPLE_RATE = 16000 MAX_SECS = 15 ARCHIVE_NAME = "2019-04-11_fr_FR" ARCHIVE_DIR_NAME = "ts_" + ARCHIVE_NAME ARCHIVE_URL = ( "https://deepspeech-storage-mirror.s3.fr-par.scw.cloud/" + ARCHIVE_NAME + ".zip" ) def _download_and_preprocess_data(target_dir, english_compatible=False): # Making path absolute target_dir = os.path.abspath(target_dir) # Conditionally download data archive_path = maybe_download( "ts_" + ARCHIVE_NAME + ".zip", target_dir, ARCHIVE_URL ) # Conditionally extract archive data _maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path) # Conditionally convert TrainingSpeech data to DeepSpeech CSVs and wav _maybe_convert_sets( target_dir, ARCHIVE_DIR_NAME, english_compatible=english_compatible ) def _maybe_extract(target_dir, extracted_data, archive_path): # If target_dir/extracted_data does not exist, extract archive in target_dir extracted_path = os.path.join(target_dir, extracted_data) if not os.path.exists(extracted_path): print('No directory "%s" - extracting archive...' % extracted_path) if not os.path.isdir(extracted_path): os.mkdir(extracted_path) with zipfile.ZipFile(archive_path) as zip_f: zip_f.extractall(extracted_path) else: print('Found directory "%s" - not extracting it from archive.' % archive_path) def one_sample(sample): """ Take a audio file, and optionally convert it to 16kHz WAV """ orig_filename = sample["path"] # Storing wav files next to the wav ones - just with a different suffix wav_filename = os.path.splitext(orig_filename)[0] + ".converted.wav" _maybe_convert_wav(orig_filename, wav_filename) file_size = -1 frames = 0 if os.path.exists(wav_filename): file_size = os.path.getsize(wav_filename) frames = int( subprocess.check_output( ["soxi", "-s", wav_filename], stderr=subprocess.STDOUT ) ) label = sample["text"] rows = [] # Keep track of how many samples are good vs. problematic counter = get_counter() if file_size == -1: # Excluding samples that failed upon conversion counter["failed"] += 1 elif label is None: # Excluding samples that failed on label validation counter["invalid_label"] += 1 elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)): # Excluding samples that are too short to fit the transcript counter["too_short"] += 1 elif frames / SAMPLE_RATE > MAX_SECS: # Excluding very long samples to keep a reasonable batch-size counter["too_long"] += 1 else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) def _maybe_convert_sets(target_dir, extracted_data, english_compatible=False): extracted_dir = os.path.join(target_dir, extracted_data) # override existing CSV with normalized one target_csv_template = os.path.join(target_dir, "ts_" + ARCHIVE_NAME + "_{}.csv") if os.path.isfile(target_csv_template): return path_to_original_csv = os.path.join(extracted_dir, "data.csv") with open(path_to_original_csv) as csv_f: data = [ d for d in csv.DictReader(csv_f, delimiter=",") if float(d["duration"]) <= MAX_SECS ] for line in data: line["path"] = os.path.join(extracted_dir, line["path"]) num_samples = len(data) rows = [] counter = get_counter() print("Importing {} wav files...".format(num_samples)) pool = Pool() bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for i, processed in enumerate(pool.imap_unordered(one_sample, data), start=1): counter += processed[0] rows += processed[1] bar.update(i) bar.update(num_samples) pool.close() pool.join() with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80% with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10% with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10% train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES) train_writer.writeheader() dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES) dev_writer.writeheader() test_writer = csv.DictWriter(test_csv_file, fieldnames=FIELDNAMES) test_writer.writeheader() for i, item in enumerate(rows): transcript = validate_label( cleanup_transcript( item[2], english_compatible=english_compatible ) ) if not transcript: continue wav_filename = os.path.join(target_dir, extracted_data, item[0]) i_mod = i % 10 if i_mod == 0: writer = test_writer elif i_mod == 1: writer = dev_writer else: writer = train_writer writer.writerow( dict( wav_filename=wav_filename, wav_filesize=os.path.getsize(wav_filename), transcript=transcript, ) ) imported_samples = get_imported_samples(counter) assert counter["all"] == num_samples assert len(rows) == imported_samples print_import_report(counter, SAMPLE_RATE, MAX_SECS) def _maybe_convert_wav(orig_filename, wav_filename): if not os.path.exists(wav_filename): transformer = sox.Transformer() transformer.convert(samplerate=SAMPLE_RATE) try: transformer.build(orig_filename, wav_filename) except sox.core.SoxError as ex: print("SoX processing error", ex, orig_filename, wav_filename) PUNCTUATIONS_REG = re.compile(r"[°\-,;!?.()\[\]*…—]") MULTIPLE_SPACES_REG = re.compile(r"\s{2,}") def cleanup_transcript(text, english_compatible=False): text = text.replace("’", "'").replace("\u00A0", " ") text = PUNCTUATIONS_REG.sub(" ", text) text = MULTIPLE_SPACES_REG.sub(" ", text) if english_compatible: text = unidecode.unidecode(text) return text.strip().lower() def handle_args(): parser = get_importers_parser(description="Importer for TrainingSpeech dataset.") parser.add_argument(dest="target_dir") parser.add_argument( "--english-compatible", action="store_true", dest="english_compatible", help="Remove diactrics and other non-ascii chars.", ) return parser.parse_args() if __name__ == "__main__": cli_args = handle_args() validate_label = get_validate_label(cli_args) _download_and_preprocess_data(cli_args.target_dir, cli_args.english_compatible) ================================================ FILE: bin/import_tuda.py ================================================ #!/usr/bin/env python """ Downloads and prepares (parts of) the "German Distant Speech" corpus (TUDA) for DeepSpeech.py Use "python3 import_tuda.py -h" for help """ import argparse import csv import os import tarfile import unicodedata import wave import xml.etree.ElementTree as ET from collections import Counter import progressbar from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download from deepspeech_training.util.importers import validate_label_eng as validate_label from ds_ctcdecoder import Alphabet TUDA_VERSION = "v2" TUDA_PACKAGE = "german-speechdata-package-{}".format(TUDA_VERSION) TUDA_URL = "http://ltdata1.informatik.uni-hamburg.de/kaldi_tuda_de/{}.tar.gz".format( TUDA_PACKAGE ) TUDA_ARCHIVE = "{}.tar.gz".format(TUDA_PACKAGE) CHANNELS = 1 SAMPLE_WIDTH = 2 SAMPLE_RATE = 16000 FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"] def maybe_extract(archive): extracted = os.path.join(CLI_ARGS.base_dir, TUDA_PACKAGE) if os.path.isdir(extracted): print('Found directory "{}" - not extracting.'.format(extracted)) else: print('Extracting "{}"...'.format(archive)) with tarfile.open(archive) as tar: members = tar.getmembers() bar = progressbar.ProgressBar(max_value=len(members), widgets=SIMPLE_BAR) for member in bar(members): tar.extract(member=member, path=CLI_ARGS.base_dir) return extracted def in_alphabet(c): return ALPHABET.CanEncode(c) if ALPHABET else True def check_and_prepare_sentence(sentence): sentence = sentence.lower().replace("co2", "c o zwei") chars = [] for c in sentence: if CLI_ARGS.normalize and c not in "äöüß" and not in_alphabet(c): c = unicodedata.normalize("NFKD", c).encode("ascii", "ignore").decode("ascii", "ignore") for sc in c: if not in_alphabet(c): return None chars.append(sc) return validate_label("".join(chars)) def check_wav_file(wav_path, sentence): # pylint: disable=too-many-return-statements try: with wave.open(wav_path, "r") as src_wav_file: rate = src_wav_file.getframerate() channels = src_wav_file.getnchannels() sample_width = src_wav_file.getsampwidth() milliseconds = int(src_wav_file.getnframes() * 1000 / rate) if rate != SAMPLE_RATE: return False, "wrong sample rate" if channels != CHANNELS: return False, "wrong number of channels" if sample_width != SAMPLE_WIDTH: return False, "wrong sample width" if milliseconds / len(sentence) < 30: return False, "too short" if milliseconds > CLI_ARGS.max_duration > 0: return False, "too long" except wave.Error: return False, "invalid wav file" except EOFError: return False, "premature EOF" return True, "OK" def write_csvs(extracted): sample_counter = 0 reasons = Counter() for sub_set in ["train", "dev", "test"]: set_path = os.path.join(extracted, sub_set) set_files = os.listdir(set_path) recordings = {} for file in set_files: if file.endswith(".xml"): recordings[file[:-4]] = [] for file in set_files: if file.endswith(".wav") and "_" in file: prefix = file.split("_")[0] if prefix in recordings: recordings[prefix].append(file) recordings = recordings.items() csv_path = os.path.join( CLI_ARGS.base_dir, "tuda-{}-{}.csv".format(TUDA_VERSION, sub_set) ) print('Writing "{}"...'.format(csv_path)) with open(csv_path, "w", encoding="utf-8", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=FIELDNAMES) writer.writeheader() set_dir = os.path.join(extracted, sub_set) bar = progressbar.ProgressBar(max_value=len(recordings), widgets=SIMPLE_BAR) for prefix, wav_names in bar(recordings): xml_path = os.path.join(set_dir, prefix + ".xml") meta = ET.parse(xml_path).getroot() sentence = list(meta.iter("cleaned_sentence"))[0].text sentence = check_and_prepare_sentence(sentence) if sentence is None: reasons['alphabet filter'] += 1 continue for wav_name in wav_names: sample_counter += 1 wav_path = os.path.join(set_path, wav_name) keep, reason = check_wav_file(wav_path, sentence) if keep: writer.writerow( { "wav_filename": os.path.relpath( wav_path, CLI_ARGS.base_dir ), "wav_filesize": os.path.getsize(wav_path), "transcript": sentence.lower(), } ) else: reasons[reason] += 1 if len(reasons.keys()) > 0: print("Excluded samples:") for reason, n in reasons.most_common(): print(' - "{}": {} ({:.2f}%)'.format(reason, n, n * 100 / sample_counter)) def cleanup(archive): if not CLI_ARGS.keep_archive: print('Removing archive "{}"...'.format(archive)) os.remove(archive) def download_and_prepare(): archive = maybe_download(TUDA_ARCHIVE, CLI_ARGS.base_dir, TUDA_URL) extracted = maybe_extract(archive) write_csvs(extracted) cleanup(archive) def handle_args(): parser = argparse.ArgumentParser(description="Import German Distant Speech (TUDA)") parser.add_argument("base_dir", help="Directory containing all data") parser.add_argument( "--max_duration", type=int, default=10000, help="Maximum sample duration in milliseconds", ) parser.add_argument( "--normalize", action="store_true", help="Converts diacritic characters to their base ones", ) parser.add_argument( "--alphabet", help="Exclude samples with characters not in provided alphabet file", ) parser.add_argument( "--keep_archive", type=bool, default=True, help="If downloaded archives should be kept", ) return parser.parse_args() if __name__ == "__main__": CLI_ARGS = handle_args() ALPHABET = Alphabet(CLI_ARGS.alphabet) if CLI_ARGS.alphabet else None download_and_prepare() ================================================ FILE: bin/import_vctk.py ================================================ #!/usr/bin/env python # VCTK used in wavenet paper https://arxiv.org/pdf/1609.03499.pdf # Licenced under Open Data Commons Attribution License (ODC-By) v1.0. # as per https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html import os import random import re from multiprocessing import Pool from zipfile import ZipFile import librosa import progressbar from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download from deepspeech_training.util.importers import ( get_counter, get_imported_samples, print_import_report, ) SAMPLE_RATE = 16000 MAX_SECS = 10 MIN_SECS = 1 ARCHIVE_DIR_NAME = "VCTK-Corpus" ARCHIVE_NAME = "VCTK-Corpus.zip?sequence=2&isAllowed=y" ARCHIVE_URL = ( "https://datashare.is.ed.ac.uk/bitstream/handle/10283/2651/" + ARCHIVE_NAME ) def _download_and_preprocess_data(target_dir): # Making path absolute target_dir = os.path.abspath(target_dir) # Conditionally download data archive_path = maybe_download(ARCHIVE_NAME, target_dir, ARCHIVE_URL) # Conditionally extract common voice data _maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path) # Conditionally convert common voice CSV files and mp3 data to DeepSpeech CSVs and wav _maybe_convert_sets(target_dir, ARCHIVE_DIR_NAME) def _maybe_extract(target_dir, extracted_data, archive_path): # If target_dir/extracted_data does not exist, extract archive in target_dir extracted_path = os.path.join(target_dir, extracted_data) if not os.path.exists(extracted_path): print(f"No directory {extracted_path} - extracting archive...") with ZipFile(archive_path, "r") as zipobj: # Extract all the contents of zip file in current directory zipobj.extractall(target_dir) else: print(f"Found directory {extracted_path} - not extracting it from archive.") def _maybe_convert_sets(target_dir, extracted_data): extracted_dir = os.path.join(target_dir, extracted_data, "wav48") txt_dir = os.path.join(target_dir, extracted_data, "txt") directory = os.path.expanduser(extracted_dir) srtd = len(sorted(os.listdir(directory))) all_samples = [] for target in sorted(os.listdir(directory)): all_samples += _maybe_prepare_set( path.join(extracted_dir, os.path.split(target)[-1]) ) num_samples = len(all_samples) print(f"Converting wav files to {SAMPLE_RATE}hz...") pool = Pool() bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) for i, _ in enumerate(pool.imap_unordered(one_sample, all_samples), start=1): bar.update(i) bar.update(num_samples) pool.close() pool.join() _write_csv(extracted_dir, txt_dir, target_dir) def one_sample(sample): if is_audio_file(sample): y, sr = librosa.load(sample, sr=16000) # Trim the beginning and ending silence yt, index = librosa.effects.trim(y) # pylint: disable=unused-variable duration = librosa.get_duration(yt, sr) if duration > MAX_SECS or duration < MIN_SECS: os.remove(sample) else: librosa.output.write_wav(sample, yt, sr) def _maybe_prepare_set(target_csv): samples = sorted(os.listdir(target_csv)) new_samples = [] for s in samples: new_samples.append(os.path.join(target_csv, s)) samples = new_samples return samples def _write_csv(extracted_dir, txt_dir, target_dir): print(f"Writing CSV file") dset_abs_path = extracted_dir dset_txt_abs_path = txt_dir audios = make_manifest(dset_abs_path) utterences = load_txts(dset_txt_abs_path) csv = [] for file in audios: st = os.stat(file) file_size = st.st_size # Seems to be one wav directory missing from txts - skip it file_parts = file.split(os.sep) file_subdir = file_parts[-2] if file_subdir == "p315": continue file_name = file_parts[-1] file_name_no_ext = file_name.split(".")[0] utterence = utterences[file_name_no_ext] utterence_clean = re.sub(r"[^a-zA-Z' ]+", "", utterence).lower().strip() csv_line = f"{file},{file_size},{utterence_clean}\n" csv.append(csv_line) random.seed(1454) random.shuffle(csv) train_data = csv[:37000] dev_data = csv[37000:40200] test_data = csv[40200:] with open(os.path.join(target_dir, "vctk_full.csv"), "w") as fd: fd.write("wav_filename,wav_filesize,transcript\n") for i in csv: fd.write(i) with open(os.path.join(target_dir, "vctk_train.csv"), "w") as fd: fd.write("wav_filename,wav_filesize,transcript\n") for i in train_data: fd.write(i) with open(os.path.join(target_dir, "vctk_dev.csv"), "w") as fd: fd.write("wav_filename,wav_filesize,transcript\n") for i in dev_data: fd.write(i) with open(os.path.join(target_dir, "vctk_test.csv"), "w") as fd: fd.write("wav_filename,wav_filesize,transcript\n") for i in test_data: fd.write(i) print(f"Wrote {len(csv)} entries") def make_manifest(directory): audios = [] directory = os.path.expanduser(directory) for target in sorted(os.listdir(directory)): d = os.path.join(directory, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in fnames: new_path = os.path.join(root, fname) item = new_path audios.append(item) return audios def load_txts(directory): utterences = dict() directory = os.path.expanduser(directory) for target in sorted(os.listdir(directory)): d = os.path.join(directory, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in fnames: if fname.endswith(".txt"): with open(os.path.join(root, fname), "r") as f: fname_no_ext = os.path.basename(fname).rsplit(".", 1)[0] utterences[fname_no_ext] = f.readline() return utterences AUDIO_EXTENSIONS = [".wav", "WAV"] def is_audio_file(filepath): return any( os.path.basename(filepath).endswith(extension) for extension in AUDIO_EXTENSIONS ) if __name__ == "__main__": _download_and_preprocess_data(sys.argv[1]) ================================================ FILE: bin/import_voxforge.py ================================================ #!/usr/bin/env python import codecs import os import re import sys import tarfile import threading import unicodedata import urllib from glob import glob from multiprocessing.pool import ThreadPool from os import makedirs, path import pandas from bs4 import BeautifulSoup from tensorflow.python.platform import gfile from deepspeech_training.util.downloader import maybe_download """The number of jobs to run in parallel""" NUM_PARALLEL = 8 """Lambda function returns the filename of a path""" filename_of = lambda x: path.split(x)[1] class AtomicCounter(object): """A class that atomically increments a counter""" def __init__(self, start_count=0): """Initialize the counter :param start_count: the number to start counting at """ self.__lock = threading.Lock() self.__count = start_count def increment(self, amount=1): """Increments the counter by the given amount :param amount: the amount to increment by (default 1) :return: the incremented value of the counter """ self.__lock.acquire() self.__count += amount v = self.value() self.__lock.release() return v def value(self): """Returns the current value of the counter (not atomic)""" return self.__count def _parallel_downloader(voxforge_url, archive_dir, total, counter): """Generate a function to download a file based on given parameters This works by currying the above given arguments into a closure in the form of the following function. :param voxforge_url: the base voxforge URL :param archive_dir: the location to store the downloaded file :param total: the total number of files to download :param counter: an atomic counter to keep track of # of downloaded files :return: a function that actually downloads a file given these params """ def download(d): """Binds voxforge_url, archive_dir, total, and counter into this scope Downloads the given file :param d: a tuple consisting of (index, file) where index is the index of the file to download and file is the name of the file to download """ (i, file) = d download_url = voxforge_url + "/" + file c = counter.increment() print("Downloading file {} ({}/{})...".format(i + 1, c, total)) maybe_download(filename_of(download_url), archive_dir, download_url) return download def _parallel_extracter(data_dir, number_of_test, number_of_dev, total, counter): """Generate a function to extract a tar file based on given parameters This works by currying the above given arguments into a closure in the form of the following function. :param data_dir: the target directory to extract into :param number_of_test: the number of files to keep as the test set :param number_of_dev: the number of files to keep as the dev set :param total: the total number of files to extract :param counter: an atomic counter to keep track of # of extracted files :return: a function that actually extracts a tar file given these params """ def extract(d): """Binds data_dir, number_of_test, number_of_dev, total, and counter into this scope Extracts the given file :param d: a tuple consisting of (index, file) where index is the index of the file to extract and file is the name of the file to extract """ (i, archive) = d if i < number_of_test: dataset_dir = path.join(data_dir, "test") elif i < number_of_test + number_of_dev: dataset_dir = path.join(data_dir, "dev") else: dataset_dir = path.join(data_dir, "train") if not gfile.Exists( os.path.join(dataset_dir, ".".join(filename_of(archive).split(".")[:-1])) ): c = counter.increment() print("Extracting file {} ({}/{})...".format(i + 1, c, total)) tar = tarfile.open(archive) tar.extractall(dataset_dir) tar.close() return extract def _download_and_preprocess_data(data_dir): # Conditionally download data to data_dir if not path.isdir(data_dir): makedirs(data_dir) archive_dir = data_dir + "/archive" if not path.isdir(archive_dir): makedirs(archive_dir) print( "Downloading Voxforge data set into {} if not already present...".format( archive_dir ) ) voxforge_url = "http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/Audio/Main/16kHz_16bit" html_page = urllib.request.urlopen(voxforge_url) soup = BeautifulSoup(html_page, "html.parser") # list all links refs = [l["href"] for l in soup.find_all("a") if ".tgz" in l["href"]] # download files in parallel print("{} files to download".format(len(refs))) downloader = _parallel_downloader( voxforge_url, archive_dir, len(refs), AtomicCounter() ) p = ThreadPool(NUM_PARALLEL) p.map(downloader, enumerate(refs)) # Conditionally extract data to dataset_dir if not path.isdir(os.path.join(data_dir, "test")): makedirs(os.path.join(data_dir, "test")) if not path.isdir(os.path.join(data_dir, "dev")): makedirs(os.path.join(data_dir, "dev")) if not path.isdir(os.path.join(data_dir, "train")): makedirs(os.path.join(data_dir, "train")) tarfiles = glob(os.path.join(archive_dir, "*.tgz")) number_of_files = len(tarfiles) number_of_test = number_of_files // 100 number_of_dev = number_of_files // 100 # extract tars in parallel print( "Extracting Voxforge data set into {} if not already present...".format( data_dir ) ) extracter = _parallel_extracter( data_dir, number_of_test, number_of_dev, len(tarfiles), AtomicCounter() ) p.map(extracter, enumerate(tarfiles)) # Generate data set print("Generating Voxforge data set into {}".format(data_dir)) test_files = _generate_dataset(data_dir, "test") dev_files = _generate_dataset(data_dir, "dev") train_files = _generate_dataset(data_dir, "train") # Write sets to disk as CSV files train_files.to_csv(os.path.join(data_dir, "voxforge-train.csv"), index=False) dev_files.to_csv(os.path.join(data_dir, "voxforge-dev.csv"), index=False) test_files.to_csv(os.path.join(data_dir, "voxforge-test.csv"), index=False) def _generate_dataset(data_dir, data_set): extracted_dir = path.join(data_dir, data_set) files = [] for promts_file in glob(os.path.join(extracted_dir + "/*/etc/", "PROMPTS")): if path.isdir(os.path.join(promts_file[:-11], "wav")): with codecs.open(promts_file, "r", "utf-8") as f: for line in f: id = line.split(" ")[0].split("/")[-1] sentence = " ".join(line.split(" ")[1:]) sentence = re.sub("[^a-z']", " ", sentence.strip().lower()) transcript = "" for token in sentence.split(" "): word = token.strip() if word != "" and word != " ": transcript += word + " " transcript = ( unicodedata.normalize("NFKD", transcript.strip()) .encode("ascii", "ignore") .decode("ascii", "ignore") ) wav_file = path.join(promts_file[:-11], "wav/" + id + ".wav") if gfile.Exists(wav_file): wav_filesize = path.getsize(wav_file) # remove audios that are shorter than 0.5s and longer than 20s. # remove audios that are too short for transcript. if ( (wav_filesize / 32000) > 0.5 and (wav_filesize / 32000) < 20 and transcript != "" and wav_filesize / len(transcript) > 1400 ): files.append( (os.path.abspath(wav_file), wav_filesize, transcript) ) return pandas.DataFrame( data=files, columns=["wav_filename", "wav_filesize", "transcript"] ) if __name__ == "__main__": _download_and_preprocess_data(sys.argv[1]) ================================================ FILE: bin/ops_in_graph.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import sys import tensorflow.compat.v1 as tfv1 def main(): with tfv1.gfile.FastGFile(sys.argv[1], "rb") as fin: graph_def = tfv1.GraphDef() graph_def.ParseFromString(fin.read()) print("\n".join(sorted(set(n.op for n in graph_def.node)))) if __name__ == "__main__": main() ================================================ FILE: bin/play.py ================================================ #!/usr/bin/env python """ Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) and DeepSpeech CSV files Use "python3 play.py -h" for help """ import os import sys import random import argparse from deepspeech_training.util.audio import get_loadable_audio_type_from_extension, AUDIO_TYPE_PCM, AUDIO_TYPE_WAV from deepspeech_training.util.sample_collections import SampleList, LabeledSample, samples_from_source from deepspeech_training.util.augmentations import parse_augmentations, apply_sample_augmentations, SampleAugmentation def get_samples_in_play_order(): ext = os.path.splitext(CLI_ARGS.source)[1].lower() if get_loadable_audio_type_from_extension(ext): samples = SampleList([(CLI_ARGS.source, 0)], labeled=False) else: samples = samples_from_source(CLI_ARGS.source, buffering=0) played = 0 index = CLI_ARGS.start while True: if 0 <= CLI_ARGS.number <= played: return if CLI_ARGS.random: yield samples[random.randint(0, len(samples) - 1)] elif index < 0: yield samples[len(samples) + index] elif index >= len(samples): print("No sample with index {}".format(CLI_ARGS.start)) sys.exit(1) else: yield samples[index] played += 1 index = (index + 1) % len(samples) def play_collection(): augmentations = parse_augmentations(CLI_ARGS.augment) if any(not isinstance(a, SampleAugmentation) for a in augmentations): print("Warning: Some of the augmentations cannot be simulated by this command.") samples = get_samples_in_play_order() samples = apply_sample_augmentations(samples, audio_type=AUDIO_TYPE_PCM, augmentations=augmentations, process_ahead=0, clock=CLI_ARGS.clock) for sample in samples: if not CLI_ARGS.quiet: print('Sample "{}"'.format(sample.sample_id), file=sys.stderr) if isinstance(sample, LabeledSample): print(' "{}"'.format(sample.transcript), file=sys.stderr) if CLI_ARGS.pipe: sample.change_audio_type(AUDIO_TYPE_WAV) sys.stdout.buffer.write(sample.audio.getvalue()) return wave_obj = simpleaudio.WaveObject(sample.audio, sample.audio_format.channels, sample.audio_format.width, sample.audio_format.rate) play_obj = wave_obj.play() play_obj.wait_done() def handle_args(): parser = argparse.ArgumentParser( description="Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) " "and DeepSpeech CSV files" ) parser.add_argument("source", help="Sample DB, CSV or WAV file to play samples from") parser.add_argument( "--start", type=int, default=0, help="Sample index to start at (negative numbers are relative to the end of the collection)", ) parser.add_argument( "--number", type=int, default=-1, help="Number of samples to play (-1 for endless)", ) parser.add_argument( "--random", action="store_true", help="If samples should be played in random order", ) parser.add_argument( "--augment", action='append', help="Add an augmentation operation", ) parser.add_argument( "--clock", type=float, default=0.5, help="Simulates clock value used for augmentations during training." "Ranges from 0.0 (representing parameter start values) to" "1.0 (representing parameter end values)", ) parser.add_argument( "--pipe", action="store_true", help="Pipe first sample as wav file to stdout. Forces --number to 1.", ) parser.add_argument( "--quiet", action="store_true", help="No info logging to console", ) return parser.parse_args() if __name__ == "__main__": CLI_ARGS = handle_args() if not CLI_ARGS.pipe: try: import simpleaudio except ModuleNotFoundError: print('Unless using the --pipe flag, play.py requires Python package "simpleaudio" for playing samples') sys.exit(1) try: play_collection() except KeyboardInterrupt: print(" Stopped") sys.exit(0) ================================================ FILE: bin/run-ci-graph_augmentations.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_csv} --train_batch_size 1 \ --scorer "" \ --augment dropout \ --augment pitch \ --augment tempo \ --augment warp \ --augment time_mask \ --augment frequency_mask \ --augment add \ --augment multiply \ --n_hidden 100 \ --epochs 1 ================================================ FILE: bin/run-ci-ldc93s1_checkpoint.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_csv} --train_batch_size 1 \ --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ --test_files ${ldc93s1_csv} --test_batch_size 1 \ --n_hidden 100 --epochs 1 \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' \ --learning_rate 0.001 --dropout_rate 0.05 \ --scorer_path 'data/smoke_test/pruned_lm.scorer' | tee /tmp/resume.log if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then echo "Did not resume training from checkpoint" exit 1 else exit 0 fi ================================================ FILE: bin/run-ci-ldc93s1_checkpoint_bytes.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_csv} --train_batch_size 1 \ --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ --test_files ${ldc93s1_csv} --test_batch_size 1 \ --n_hidden 100 --epochs 1 \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_bytes' --bytes_output_mode \ --learning_rate 0.001 --dropout_rate 0.05 \ --scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' | tee /tmp/resume.log if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then echo "Did not resume training from checkpoint" exit 1 else exit 0 fi ================================================ FILE: bin/run-ci-ldc93s1_checkpoint_sdb.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" ldc93s1_sdb="${ldc93s1_dir}/ldc93s1.sdb" if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; if [ ! -f "${ldc93s1_dir}/ldc93s1.sdb" ]; then echo "Converting LDC93S1 example data, saving to ${ldc93s1_sdb}." python -u bin/data_set_tool.py ${ldc93s1_csv} ${ldc93s1_sdb} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_sdb} --train_batch_size 1 \ --dev_files ${ldc93s1_sdb} --dev_batch_size 1 \ --test_files ${ldc93s1_sdb} --test_batch_size 1 \ --n_hidden 100 --epochs 1 \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_sdb' \ --learning_rate 0.001 --dropout_rate 0.05 \ --scorer_path 'data/smoke_test/pruned_lm.scorer' | tee /tmp/resume.log if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then echo "Did not resume training from checkpoint" exit 1 else exit 0 fi ================================================ FILE: bin/run-ci-ldc93s1_new.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" epoch_count=$1 audio_sample_rate=$2 if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_csv} --train_batch_size 1 \ --feature_cache '/tmp/ldc93s1_cache' \ --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ --test_files ${ldc93s1_csv} --test_batch_size 1 \ --n_hidden 100 --epochs $epoch_count \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' \ --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train' \ --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --audio_sample_rate ${audio_sample_rate} ================================================ FILE: bin/run-ci-ldc93s1_new_bytes.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" epoch_count=$1 audio_sample_rate=$2 if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_csv} --train_batch_size 1 \ --feature_cache '/tmp/ldc93s1_cache' \ --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ --test_files ${ldc93s1_csv} --test_batch_size 1 \ --n_hidden 100 --epochs $epoch_count \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_bytes' \ --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_bytes' \ --scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \ --audio_sample_rate ${audio_sample_rate} \ --bytes_output_mode ================================================ FILE: bin/run-ci-ldc93s1_new_bytes_tflite.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" audio_sample_rate=$1 if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar \ --n_hidden 100 \ --checkpoint_dir '/tmp/ckpt_bytes' \ --export_dir '/tmp/train_bytes_tflite' \ --scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \ --bytes_output_mode \ --audio_sample_rate ${audio_sample_rate} \ --export_tflite ================================================ FILE: bin/run-ci-ldc93s1_new_metrics.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" epoch_count=$1 audio_sample_rate=$2 if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_csv} --train_batch_size 1 \ --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ --test_files ${ldc93s1_csv} --test_batch_size 1 \ --metrics_files ${ldc93s1_csv} \ --n_hidden 100 --epochs $epoch_count \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_metrics' \ --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_metrics' \ --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --audio_sample_rate ${audio_sample_rate} ================================================ FILE: bin/run-ci-ldc93s1_new_sdb.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" ldc93s1_sdb="${ldc93s1_dir}/ldc93s1.sdb" epoch_count=$1 audio_sample_rate=$2 if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; if [ ! -f "${ldc93s1_dir}/ldc93s1.sdb" ]; then echo "Converting LDC93S1 example data, saving to ${ldc93s1_sdb}." python -u bin/data_set_tool.py ${ldc93s1_csv} ${ldc93s1_sdb} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_sdb} --train_batch_size 1 \ --dev_files ${ldc93s1_sdb} --dev_batch_size 1 \ --test_files ${ldc93s1_sdb} --test_batch_size 1 \ --n_hidden 100 --epochs $epoch_count \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_sdb' \ --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_sdb' \ --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --audio_sample_rate ${audio_sample_rate} ================================================ FILE: bin/run-ci-ldc93s1_new_sdb_csv.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" ldc93s1_sdb="${ldc93s1_dir}/ldc93s1.sdb" epoch_count=$1 audio_sample_rate=$2 if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; if [ ! -f "${ldc93s1_dir}/ldc93s1.sdb" ]; then echo "Converting LDC93S1 example data, saving to ${ldc93s1_sdb}." python -u bin/data_set_tool.py ${ldc93s1_csv} ${ldc93s1_sdb} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_sdb},${ldc93s1_csv} --train_batch_size 1 \ --feature_cache '/tmp/ldc93s1_cache_sdb_csv' \ --dev_files ${ldc93s1_sdb},${ldc93s1_csv} --dev_batch_size 1 \ --test_files ${ldc93s1_sdb},${ldc93s1_csv} --test_batch_size 1 \ --n_hidden 100 --epochs $epoch_count \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_sdb_csv' \ --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_sdb_csv' \ --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --audio_sample_rate ${audio_sample_rate} ================================================ FILE: bin/run-ci-ldc93s1_singleshotinference.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_csv} --train_batch_size 1 \ --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ --test_files ${ldc93s1_csv} --test_batch_size 1 \ --n_hidden 100 --epochs 1 \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' --checkpoint_secs 0 \ --learning_rate 0.001 --dropout_rate 0.05 \ --scorer_path 'data/smoke_test/pruned_lm.scorer' python -u DeepSpeech.py \ --n_hidden 100 \ --checkpoint_dir '/tmp/ckpt' \ --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --one_shot_infer 'data/smoke_test/LDC93S1.wav' ================================================ FILE: bin/run-ci-ldc93s1_tflite.sh ================================================ #!/bin/sh set -xe ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" audio_sample_rate=$1 if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar \ --n_hidden 100 \ --checkpoint_dir '/tmp/ckpt' \ --export_dir '/tmp/train_tflite' \ --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --audio_sample_rate ${audio_sample_rate} \ --export_tflite mkdir /tmp/train_tflite/en-us python -u DeepSpeech.py --noshow_progressbar \ --n_hidden 100 \ --checkpoint_dir '/tmp/ckpt' \ --export_dir '/tmp/train_tflite/en-us' \ --scorer_path 'data/smoke_test/pruned_lm.scorer' \ --audio_sample_rate ${audio_sample_rate} \ --export_language 'Fake English (fk-FK)' \ --export_zip ================================================ FILE: bin/run-ci-sample_augmentations.sh ================================================ #!/bin/sh set -xe ldc93s1_dir=`cd data/smoke_test; pwd` ldc93s1_csv="${ldc93s1_dir}/LDC93S1.csv" ldc93s1_wav="${ldc93s1_dir}/LDC93S1.wav" ldc93s1_overlay_csv="${ldc93s1_dir}/LDC93S1_overlay.csv" ldc93s1_overlay_wav="${ldc93s1_dir}/LDC93S1_reversed.wav" play="python bin/play.py --number 1 --quiet" compare="python bin/compare_samples.py --no-success-output" if [ ! -f "${ldc93s1_csv}" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; if [ ! -f "${ldc93s1_overlay_csv}" ]; then echo "Reversing ${ldc93s1_wav} to ${ldc93s1_overlay_wav}." sox "${ldc93s1_wav}" "${ldc93s1_overlay_wav}" reverse echo "Creating ${ldc93s1_overlay_csv}." printf "wav_filename\n${ldc93s1_overlay_wav}" > "${ldc93s1_overlay_csv}" fi; if ! $compare --if-differ "${ldc93s1_wav}" "${ldc93s1_overlay_wav}"; then echo "Sample comparison tool not working correctly" exit 1 fi $play ${ldc93s1_wav} --augment overlay[source="${ldc93s1_overlay_csv}",snr=20] --pipe >/tmp/overlay-test.wav if ! $compare --if-differ "${ldc93s1_wav}" /tmp/overlay-test.wav; then echo "Overlay augmentation had no effect or changed basic sample properties" exit 1 fi $play ${ldc93s1_wav} --augment reverb[delay=50.0,decay=2.0] --pipe >/tmp/reverb-test.wav if ! $compare --if-differ "${ldc93s1_wav}" /tmp/reverb-test.wav; then echo "Reverb augmentation had no effect or changed basic sample properties" exit 1 fi $play ${ldc93s1_wav} --augment resample[rate=4000] --pipe >/tmp/resample-test.wav if ! $compare --if-differ "${ldc93s1_wav}" /tmp/resample-test.wav; then echo "Resample augmentation had no effect or changed basic sample properties" exit 1 fi $play ${ldc93s1_wav} --augment codec[bitrate=4000] --pipe >/tmp/codec-test.wav if ! $compare --if-differ "${ldc93s1_wav}" /tmp/codec-test.wav; then echo "Codec augmentation had no effect or changed basic sample properties" exit 1 fi $play ${ldc93s1_wav} --augment volume --pipe >/tmp/volume-test.wav if ! $compare --if-differ "${ldc93s1_wav}" /tmp/volume-test.wav; then echo "Volume augmentation had no effect or changed basic sample properties" exit 1 fi ================================================ FILE: bin/run-ci-transfer.sh ================================================ #!/bin/sh # This bash script is for running minimum working examples # of transfer learning for continuous integration tests # to be run on CI. set -xe ru_dir="./data/smoke_test/russian_sample_data" ru_csv="${ru_dir}/ru.csv" ldc93s1_dir="./data/smoke_test" ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." python -u bin/import_ldc93s1.py ${ldc93s1_dir} fi; # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 # Force UTF-8 output export PYTHONIOENCODING=utf-8 echo "##### Train ENGLISH model and transfer to RUSSIAN #####" echo "##### while iterating over loading logic #####" for LOAD in 'init' 'last' 'auto'; do echo "########################################################" echo "#### Train ENGLISH model with just --checkpoint_dir ####" echo "########################################################" python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --alphabet_config_path "./data/alphabet.txt" \ --load_train "$LOAD" \ --train_files "${ldc93s1_csv}" --train_batch_size 1 \ --dev_files "${ldc93s1_csv}" --dev_batch_size 1 \ --test_files "${ldc93s1_csv}" --test_batch_size 1 \ --scorer_path '' \ --checkpoint_dir '/tmp/ckpt/transfer/eng' \ --n_hidden 100 \ --epochs 10 echo "##############################################################################" echo "#### Train ENGLISH model with --save_checkpoint_dir --load_checkpoint_dir ####" echo "##############################################################################" python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --alphabet_config_path "./data/alphabet.txt" \ --load_train "$LOAD" \ --train_files "${ldc93s1_csv}" --train_batch_size 1 \ --dev_files "${ldc93s1_csv}" --dev_batch_size 1 \ --test_files "${ldc93s1_csv}" --test_batch_size 1 \ --save_checkpoint_dir '/tmp/ckpt/transfer/eng' \ --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \ --scorer_path '' \ --n_hidden 100 \ --epochs 10 echo "####################################################################################" echo "#### Transfer to RUSSIAN model with --save_checkpoint_dir --load_checkpoint_dir ####" echo "####################################################################################" python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --drop_source_layers 1 \ --alphabet_config_path "${ru_dir}/alphabet.ru" \ --load_train 'last' \ --train_files "${ru_csv}" --train_batch_size 1 \ --dev_files "${ru_csv}" --dev_batch_size 1 \ --save_checkpoint_dir '/tmp/ckpt/transfer/ru' \ --load_checkpoint_dir '/tmp/ckpt/transfer/eng' \ --scorer_path '' \ --n_hidden 100 \ --epochs 10 # Test transfer learning checkpoint python -u evaluate.py --noshow_progressbar \ --test_files "${ru_csv}" --test_batch_size 1 \ --alphabet_config_path "${ru_dir}/alphabet.ru" \ --load_checkpoint_dir '/tmp/ckpt/transfer/ru' \ --scorer_path '' \ --n_hidden 100 done ================================================ FILE: bin/run-ldc93s1.sh ================================================ #!/bin/sh set -xe if [ ! -f DeepSpeech.py ]; then echo "Please make sure you run this from DeepSpeech's top level directory." exit 1 fi; if [ ! -f "data/ldc93s1/ldc93s1.csv" ]; then echo "Downloading and preprocessing LDC93S1 example data, saving in ./data/ldc93s1." python -u bin/import_ldc93s1.py ./data/ldc93s1 fi; if [ -d "${COMPUTE_KEEP_DIR}" ]; then checkpoint_dir=$COMPUTE_KEEP_DIR else checkpoint_dir=$(python -c 'from xdg import BaseDirectory as xdg; print(xdg.save_data_path("deepspeech/ldc93s1"))') fi # Force only one visible device because we have a single-sample dataset # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar \ --train_files data/ldc93s1/ldc93s1.csv \ --test_files data/ldc93s1/ldc93s1.csv \ --train_batch_size 1 \ --test_batch_size 1 \ --n_hidden 100 \ --epochs 200 \ --checkpoint_dir "$checkpoint_dir" \ "$@" ================================================ FILE: build-python-wheel.yml-DISABLED_ENABLE_ME_TO_REBUILD_DURING_PR ================================================ build: template_file: build-python-wheel.tyml metadata: name: "Build Python 3.5 wheels on ARM64" description: "Building some Python 3.5 wheels for ARM64 system" ================================================ FILE: ci_scripts/aarch64-build.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/build-utils.sh source $(dirname "$0")/tf-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so //native_client:generate_scorer_package " BAZEL_BUILD_FLAGS="${BAZEL_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS}" BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" maybe_install_xldd do_bazel_build do_deepspeech_binary_build ================================================ FILE: ci_scripts/all-utils.sh ================================================ #!/bin/bash set -xe set_ldc_sample_filename() { local _bitrate=$1 if [ -z "${_bitrate}" ]; then echo "Bitrate should not be empty" exit 1 fi; case "${_bitrate}" in 8k) ldc93s1_sample_filename='LDC93S1_pcms16le_1_8000.wav' ;; 16k) ldc93s1_sample_filename='LDC93S1_pcms16le_1_16000.wav' ;; esac } download_model_prod() { local _model_source_file=$(basename "${model_source}") ${WGET} "${model_source}" -O - | gunzip --force > "${CI_TMP_DIR}/${_model_source_file}" local _model_source_mmap_file=$(basename "${model_source_mmap}") ${WGET} "${model_source_mmap}" -O - | gunzip --force > "${CI_TMP_DIR}/${_model_source_mmap_file}" } download_data() { cp ${DS_DSDIR}/data/smoke_test/*.wav ${CI_TMP_DIR}/ cp ${DS_DSDIR}/data/smoke_test/pruned_lm.scorer ${CI_TMP_DIR}/kenlm.scorer cp ${DS_DSDIR}/data/smoke_test/pruned_lm.bytes.scorer ${CI_TMP_DIR}/kenlm.bytes.scorer cp -R ${DS_DSDIR}/native_client/test ${CI_TMP_DIR}/test_sources } download_material() { download_data ls -hal ${CI_TMP_DIR}/${model_name} ${CI_TMP_DIR}/${model_name_mmap} ${CI_TMP_DIR}/LDC93S1*.wav } maybe_install_xldd() { # -s required to avoid the noisy output like "Entering / Leaving directories" toolchain=$(make -s -C ${DS_DSDIR}/native_client/ TARGET=${SYSTEM_TARGET} TFDIR=${DS_TFDIR} print-toolchain) if [ ! -x "${toolchain}ldd" ]; then cp "${DS_DSDIR}/native_client/xldd" "${toolchain}ldd" && chmod +x "${toolchain}ldd" fi } # Checks whether we run a patched version of bazel. # Patching is required to dump computeKey() parameters to .ckd files # See bazel.patch # Return 0 (success exit code) on patched version, 1 on release version is_patched_bazel() { bazel_version=$(bazel version | grep 'Build label:' | cut -d':' -f2) bazel shutdown if [ -z "${bazel_version}" ]; then return 0; else return 1; fi; } verify_bazel_rebuild() { bazel_explain_file="$1" if [ ! -f "${bazel_explain_file}" ]; then echo "No such explain file: ${bazel_explain_file}" exit 1 fi; mkdir -p ${CI_ARTIFACTS_DIR} || true cp ${DS_DSDIR}/tensorflow/bazel*.log ${CI_ARTIFACTS_DIR}/ spurious_rebuilds=$(grep 'Executing action' "${bazel_explain_file}" | grep 'Compiling' | grep -v -E 'no entry in the cache|[for host]|unconditional execution is requested|Executing genrule //native_client:workspace_status|Compiling native_client/workspace_status.cc|Linking native_client/libdeepspeech.so' | wc -l) if [ "${spurious_rebuilds}" -ne 0 ]; then echo "Bazel rebuilds some file it should not, please check." if is_patched_bazel; then mkdir -p ${DS_ROOT_TASK}/ckd/ds ${DS_ROOT_TASK}/ckd/tf tar xf ${DS_ROOT_TASK}/bazel-ckd-tf.tar --strip-components=4 -C ${DS_ROOT_TASK}/ckd/ds/ tar xf ${DS_ROOT_TASK}/bazel-ckd-ds.tar --strip-components=4 -C ${DS_DSDIR}/ckd/tensorflow/ echo "Making a diff between CKD files" mkdir -p ${CI_ARTIFACTS_DIR} diff -urNw ${DS_DSDIR}/ckd/tensorflow/ ${DS_ROOT_TASK}/ckd/ds/ | tee ${CI_ARTIFACTS_DIR}/ckd.diff rm -fr ${DS_DSDIR}/ckd/tensorflow/ ${DS_ROOT_TASK}/ckd/ds/ else echo "Cannot get CKD information from release, please use patched Bazel" fi; exit 1 fi; } symlink_electron() { if [ "${OS}" = "Darwin" ]; then ln -s Electron.app/Contents/MacOS/Electron node_modules/electron/dist/node else ln -s electron "${DS_ROOT_TASK}/node_modules/electron/dist/node" if [ "${OS}" = "Linux" -a -f "${DS_ROOT_TASK}/node_modules/electron/dist/chrome-sandbox" ]; then export ELECTRON_DISABLE_SANDBOX=1 fi fi } export_node_bin_path() { export PATH=${DS_ROOT_TASK}/node_modules/.bin/:${DS_ROOT_TASK}/node_modules/electron/dist/:$PATH } export_py_bin_path() { export PATH=$HOME/.local/bin/:$PATH } ================================================ FILE: ci_scripts/all-vars.sh ================================================ #!/bin/bash set -xe export OS=$(uname) if [ "${OS}" = "Linux" ]; then export DS_ROOT_TASK=${CI_TASK_DIR} export PYENV_ROOT="${DS_ROOT_TASK}/pyenv-root" export DS_CPU_COUNT=$(nproc) fi; if [ "${OS}" = "${CI_MSYS_VERSION}" ]; then export CI_TASK_DIR="$(cygpath ${CI_TASK_DIR})" export DS_ROOT_TASK=${CI_TASK_DIR} export PYENV_ROOT="${CI_TASK_DIR}/pyenv-root" export PLATFORM_EXE_SUFFIX=.exe export DS_CPU_COUNT=$(nproc) # Those are the versions available on NuGet.org export SUPPORTED_PYTHON_VERSIONS="3.5.4:ucs2 3.6.8:ucs2 3.7.6:ucs2 3.8.1:ucs2 3.9.0:ucs2" fi; if [ "${OS}" = "Darwin" ]; then export DS_ROOT_TASK=${CI_TASK_DIR} export DS_CPU_COUNT=$(sysctl hw.ncpu |cut -d' ' -f2) export PYENV_ROOT="${DS_ROOT_TASK}/pyenv-root" export HOMEBREW_NO_AUTO_UPDATE=1 export BREW_URL=https://github.com/Homebrew/brew/tarball/2.2.17 export BUILDS_BREW="${CI_TASK_DIR}/homebrew-builds" export TESTS_BREW="${CI_TASK_DIR}/homebrew-tests" export NVM_DIR=$TESTS_BREW/.nvm/ && mkdir -p $NVM_DIR export PKG_CONFIG_PATH="${BUILDS_BREW}/lib/pkgconfig" if [ -f "${BUILDS_BREW}/bin/brew" ]; then export PATH=${BUILDS_BREW}/bin/:${BUILDS_BREW}/opt/node@12/bin:$PATH fi; if [ -f "${TESTS_BREW}/bin/brew" ]; then export PATH=${TESTS_BREW}/bin/:$PATH fi; fi; export CI_ARTIFACTS_DIR=${CI_ARTIFACTS_DIR:-/tmp/artifacts} export CI_TMP_DIR=${CI_TMP_DIR:-/tmp} export ANDROID_TMP_DIR=/data/local/tmp mkdir -p ${CI_TMP_DIR} || true export DS_TFDIR=${DS_ROOT_TASK}/tensorflow export DS_DSDIR=${DS_ROOT_TASK}/ export DS_EXAMPLEDIR=${DS_ROOT_TASK}/examples export DS_VERSION="$(cat ${DS_DSDIR}/training/deepspeech_training/VERSION)" export GRADLE_USER_HOME=${DS_ROOT_TASK}/gradle-cache export ANDROID_SDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/SDK/ export ANDROID_NDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/android-ndk-r18b/ WGET=${WGET:-"wget"} TAR=${TAR:-"tar"} XZ=${XZ:-"xz -9 -T0"} ZIP=${ZIP:-"zip"} UNXZ=${UNXZ:-"xz -T0 -d"} UNGZ=${UNGZ:-"gunzip"} if [ "${OS}" = "Darwin" ]; then TAR="gtar" fi if [ "${OS}" = "${CI_MSYS_VERSION}" ]; then WGET=/usr/bin/wget.exe TAR=/usr/bin/tar.exe XZ="xz -9 -T0 -c -" UNXZ="xz -9 -T0 -d" fi model_source="${DEEPSPEECH_TEST_MODEL}" model_name="$(basename "${model_source}")" model_name_mmap="$(basename -s ".pb" "${model_source}").pbmm" model_source_mmap="$(dirname "${model_source}")/${model_name_mmap}" ldc93s1_sample_filename='' ================================================ FILE: ci_scripts/armv7-build.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/build-utils.sh source $(dirname "$0")/tf-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so //native_client:generate_scorer_package " BAZEL_BUILD_FLAGS="${BAZEL_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS}" BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" maybe_install_xldd do_bazel_build do_deepspeech_binary_build ================================================ FILE: ci_scripts/asserts.sh ================================================ #!/bin/bash set -xe strip() { # We strip leading carriage return due to ElectronJS on Windows producing stray # characters before its output intermittently. # Then we strip leading and trailing whitespace. echo "$(echo $1 | tr -d $'\r' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" } # This verify exact inference result assert_correct_inference() { phrase=$(strip "$1") expected=$(strip "$2") status=$3 if [ "$status" -ne "0" ]; then case "$(cat ${CI_TMP_DIR}/stderr)" in *"incompatible with minimum version"*) echo "Prod model too old for client, skipping test." return 0 ;; *) echo "Client failed to run:" cat ${CI_TMP_DIR}/stderr return 1 ;; esac fi if [ -z "${phrase}" -o -z "${expected}" ]; then echo "One or more empty strings:" echo "phrase: <${phrase}>" echo "expected: <${expected}>" return 1 fi; if [ "${phrase}" = "${expected}" ]; then echo "Proper output has been produced:" echo "${phrase}" return 0 else echo "!! Non matching output !!" echo "got: <${phrase}>" if [ -x "$(command -v xxd)" ]; then echo "xxd:"; echo "${phrase}" | xxd fi echo "-------------------" echo "expected: <${expected}>" if [ -x "$(command -v xxd)" ]; then echo "xxd:"; echo "${expected}" | xxd fi return 1 fi; } # This verify that ${expected} is contained within ${phrase} assert_working_inference() { phrase=$1 expected=$2 status=$3 if [ -z "${phrase}" -o -z "${expected}" ]; then echo "One or more empty strings:" echo "phrase: <${phrase}>" echo "expected: <${expected}>" return 1 fi; if [ "$status" -ne "0" ]; then case "$(cat ${CI_TMP_DIR}/stderr)" in *"incompatible with minimum version"*) echo "Prod model too old for client, skipping test." return 0 ;; *) echo "Client failed to run:" cat ${CI_TMP_DIR}/stderr return 1 ;; esac fi case "${phrase}" in *${expected}*) echo "Proper output has been produced:" echo "${phrase}" return 0 ;; *) echo "!! Non matching output !!" echo "got: <${phrase}>" if [ -x "$(command -v xxd)" ]; then echo "xxd:"; echo "${phrase}" | xxd fi echo "-------------------" echo "expected: <${expected}>" if [ -x "$(command -v xxd)" ]; then echo "xxd:"; echo "${expected}" | xxd fi return 1 ;; esac } assert_shows_something() { stderr=$1 expected=$2 if [ -z "${stderr}" -o -z "${expected}" ]; then echo "One or more empty strings:" echo "stderr: <${stderr}>" echo "expected: <${expected}>" return 1 fi; case "${stderr}" in *"incompatible with minimum version"*) echo "Prod model too old for client, skipping test." return 0 ;; *${expected}*) echo "Proper output has been produced:" echo "${stderr}" return 0 ;; *) echo "!! Non matching output !!" echo "got: <${stderr}>" if [ -x "$(command -v xxd)" ]; then echo "xxd:"; echo "${stderr}" | xxd fi echo "-------------------" echo "expected: <${expected}>" if [ -x "$(command -v xxd)" ]; then echo "xxd:"; echo "${expected}" | xxd fi return 1 ;; esac } assert_not_present() { stderr=$1 not_expected=$2 if [ -z "${stderr}" -o -z "${not_expected}" ]; then echo "One or more empty strings:" echo "stderr: <${stderr}>" echo "not_expected: <${not_expected}>" return 1 fi; case "${stderr}" in *${not_expected}*) echo "!! Not expected was present !!" echo "got: <${stderr}>" if [ -x "$(command -v xxd)" ]; then echo "xxd:"; echo "${stderr}" | xxd fi echo "-------------------" echo "not_expected: <${not_expected}>" if [ -x "$(command -v xxd)" ]; then echo "xxd:"; echo "${not_expected}" | xxd fi return 1 ;; *) echo "Proper not expected output has not been produced:" echo "${stderr}" return 0 ;; esac } assert_correct_ldc93s1() { assert_correct_inference "$1" "she had your dark suit in greasy wash water all year" "$2" } assert_working_ldc93s1() { assert_working_inference "$1" "she had your dark suit in greasy wash water all year" "$2" } assert_correct_ldc93s1_lm() { assert_correct_inference "$1" "she had your dark suit in greasy wash water all year" "$2" } assert_working_ldc93s1_lm() { assert_working_inference "$1" "she had your dark suit in greasy wash water all year" "$2" } assert_correct_multi_ldc93s1() { assert_shows_something "$1" "/${ldc93s1_sample_filename}%she had your dark suit in greasy wash water all year%" "$?" assert_shows_something "$1" "/LDC93S1_pcms16le_2_44100.wav%she had your dark suit in greasy wash water all year%" "$?" ## 8k will output garbage anyway ... # assert_shows_something "$1" "/LDC93S1_pcms16le_1_8000.wav%she hayorasryrtl lyreasy asr watal w water all year%" } assert_correct_ldc93s1_prodmodel() { if [ -z "$3" -o "$3" = "16k" ]; then assert_correct_inference "$1" "she had your dark suit in greasy wash water all year" "$2" fi; if [ "$3" = "8k" ]; then assert_correct_inference "$1" "she had to do suit in greasy wash water all year" "$2" fi; } assert_working_ldc93s1_prodmodel() { if [ -z "$3" -o "$3" = "16k" ]; then assert_working_inference "$1" "she had your dark suit in greasy wash water all year" "$2" fi if [ "$3" = "8k" ]; then assert_working_inference "$1" "she had to do suit in greasy wash water all year" "$2" fi } assert_correct_ldc93s1_prodtflitemodel() { if [ -z "$3" -o "$3" = "16k" ]; then assert_correct_inference "$1" "she had her dark suit in greasy wash water all year" "$2" fi; if [ "$3" = "8k" ]; then assert_correct_inference "$1" "she had to do so and greasy wash water all year" "$2" fi; } assert_working_ldc93s1_prodtflitemodel() { if [ -z "$3" -o "$3" = "16k" ]; then assert_working_inference "$1" "she had her dark suit in greasy wash water all year" "$2" fi; if [ "$3" = "8k" ]; then assert_working_inference "$1" "she had to do so and greasy wash water all year" "$2" fi; } assert_correct_ldc93s1_prodmodel_stereo_44k() { assert_correct_inference "$1" "she had your dark suit in greasy wash water all year" "$2" } assert_working_ldc93s1_prodmodel_stereo_44k() { assert_working_inference "$1" "she had your dark suit in greasy wash water all year" "$2" } assert_correct_ldc93s1_prodtflitemodel_stereo_44k() { assert_correct_inference "$1" "she had her dark suit in greasy wash water all year" "$2" } assert_working_ldc93s1_prodtflitemodel_stereo_44k() { assert_working_inference "$1" "she had her dark suit in greasy wash water all year" "$2" } assert_correct_warning_upsampling() { assert_shows_something "$1" "erratic speech recognition" } assert_tensorflow_version() { assert_shows_something "$1" "${EXPECTED_TENSORFLOW_VERSION}" } assert_deepspeech_version() { assert_not_present "$1" "DeepSpeech: unknown" } # We need to ensure that running on inference really leverages GPU because # it might default back to CPU ensure_cuda_usage() { local _maybe_cuda=$1 DS_BINARY_FILE=${DS_BINARY_FILE:-"deepspeech"} if [ "${_maybe_cuda}" = "cuda" ]; then set +e export TF_CPP_MIN_VLOG_LEVEL=1 ds_cuda=$(${DS_BINARY_PREFIX}${DS_BINARY_FILE} --model ${CI_TMP_DIR}/${model_name} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>&1 1>/dev/null) export TF_CPP_MIN_VLOG_LEVEL= set -e assert_shows_something "${ds_cuda}" "Successfully opened dynamic library nvcuda.dll" assert_not_present "${ds_cuda}" "Skipping registering GPU devices" fi; } check_versions() { set +e ds_help=$(${DS_BINARY_PREFIX}deepspeech --model ${CI_TMP_DIR}/${model_name} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>&1 1>/dev/null) set -e assert_tensorflow_version "${ds_help}" assert_deepspeech_version "${ds_help}" } assert_deepspeech_runtime() { local expected_runtime=$1 set +e local ds_version=$(${DS_BINARY_PREFIX}deepspeech --version 2>&1) set -e assert_shows_something "${ds_version}" "${expected_runtime}" } check_runtime_nodejs() { assert_deepspeech_runtime "Runtime: Node" } check_runtime_electronjs() { assert_deepspeech_runtime "Runtime: Electron" } run_tflite_basic_inference_tests() { set +e phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e phrase_pbmodel_nolm=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --extended 2>${CI_TMP_DIR}/stderr) set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$?" } run_netframework_inference_tests() { set +e phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${CI_TMP_DIR}/${model_name} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${CI_TMP_DIR}/${model_name} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --extended yes 2>${CI_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e phrase_pbmodel_nolm=$(DeepSpeechConsole.exe --model ${CI_TMP_DIR}/${model_name_mmap} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e phrase_pbmodel_withlm=$(DeepSpeechConsole.exe --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) set -e assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?" } run_electronjs_inference_tests() { set +e phrase_pbmodel_nolm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e phrase_pbmodel_nolm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --extended 2>${CI_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e phrase_pbmodel_nolm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) set -e assert_working_ldc93s1 "${phrase_pbmodel_nolm}" "$?" set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) set -e assert_working_ldc93s1_lm "${phrase_pbmodel_withlm}" "$?" } run_basic_inference_tests() { set +e deepspeech --model "" --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr set -e grep "Missing model information" ${CI_TMP_DIR}/stderr set +e phrase_pbmodel_nolm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e phrase_pbmodel_nolm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --extended 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e phrase_pbmodel_nolm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm}" "$status" set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" } run_all_inference_tests() { run_basic_inference_tests set +e phrase_pbmodel_nolm_stereo_44k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1 "${phrase_pbmodel_nolm_stereo_44k}" "$status" set +e phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_stereo_44k}" "$status" # Run down-sampling warning test only when we actually perform downsampling if [ "${ldc93s1_sample_filename}" != "LDC93S1_pcms16le_1_8000.wav" ]; then set +e phrase_pbmodel_nolm_mono_8k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_nolm_mono_8k}" set +e phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" fi; } run_prod_concurrent_stream_tests() { local _bitrate=$1 set +e output=$(python3 ${CI_TMP_DIR}/test_sources/concurrent_streams.py \ --model ${CI_TMP_DIR}/${model_name_mmap} \ --scorer ${CI_TMP_DIR}/kenlm.scorer \ --audio1 ${CI_TMP_DIR}/LDC93S1_pcms16le_1_16000.wav \ --audio2 ${CI_TMP_DIR}/new-home-in-the-stars-16k.wav 2>${CI_TMP_DIR}/stderr) status=$? set -e output1=$(echo "${output}" | head -n 1) output2=$(echo "${output}" | tail -n 1) assert_correct_ldc93s1_prodmodel "${output1}" "${status}" "16k" assert_correct_inference "${output2}" "we must find a new home in the stars" "${status}" } run_prod_inference_tests() { local _bitrate=$1 set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodmodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status" # Run down-sampling warning test only when we actually perform downsampling if [ "${ldc93s1_sample_filename}" != "LDC93S1_pcms16le_1_8000.wav" ]; then set +e phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" fi; } # Equivalent to run_prod_inference_tests but we use assert_working* instead of assert_correct # ElectronJS mixes stdout and stderr and exact matching is broken run_electronjs_prod_inference_tests() { local _bitrate=$1 set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_working_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_working_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_working_ldc93s1_prodmodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status" } run_prodtflite_inference_tests() { local _bitrate=$1 set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_prodtflitemodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status" # Run down-sampling warning test only when we actually perform downsampling if [ "${ldc93s1_sample_filename}" != "LDC93S1_pcms16le_1_8000.wav" ]; then set +e phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null) set -e assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" fi; } # Equivalent to run_prodtflite_inference_tests but we use assert_working* instead of assert_correct # ElectronJS mixes stdout and stderr and exact matching is broken run_electronjs_prodtflite_inference_tests() { local _bitrate=$1 set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_working_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_working_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" set +e phrase_pbmodel_withlm_stereo_44k=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_working_ldc93s1_prodtflitemodel_stereo_44k "${phrase_pbmodel_withlm_stereo_44k}" "$status" } run_multi_inference_tests() { set +e -o pipefail multi_phrase_pbmodel_nolm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --audio ${CI_TMP_DIR}/ 2>${CI_TMP_DIR}/stderr | tr '\n' '%') status=$? set -e +o pipefail assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_nolm}" "$status" set +e -o pipefail multi_phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/ 2>${CI_TMP_DIR}/stderr | tr '\n' '%') status=$? set -e +o pipefail assert_correct_multi_ldc93s1 "${multi_phrase_pbmodel_withlm}" "$status" } run_hotword_tests() { DS_BINARY_FILE=${DS_BINARY_FILE:-"deepspeech"} set +e hotwords_decode=$(${DS_BINARY_PREFIX}${DS_BINARY_FILE} --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_working_ldc93s1_lm "${hotwords_decode}" "$status" } run_android_hotword_tests() { set +e hotwords_decode=$(${DS_BINARY_PREFIX}deepspeech --model ${DATA_TMP_DIR}/${model_name} --scorer ${DATA_TMP_DIR}/kenlm.scorer --audio ${DATA_TMP_DIR}/${ldc93s1_sample_filename} --hot_words "foo:0.0,bar:-0.1" 2>${CI_TMP_DIR}/stderr) status=$? set -e assert_correct_ldc93s1_lm "${hotwords_decode}" "$status" } run_cpp_only_inference_tests() { set +e phrase_pbmodel_withlm_intermediate_decode=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --stream 1280 2>${CI_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status" } run_js_streaming_inference_tests() { set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${CI_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --stream --extended 2>${CI_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status" } run_js_streaming_prod_inference_tests() { local _bitrate=$1 set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${CI_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" local _bitrate=$1 set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --stream --extended 2>${CI_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" } run_js_streaming_prodtflite_inference_tests() { local _bitrate=$1 set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${CI_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" local _bitrate=$1 set +e phrase_pbmodel_withlm=$(deepspeech --model ${CI_TMP_DIR}/${model_name_mmap} --scorer ${CI_TMP_DIR}/kenlm.scorer --audio ${CI_TMP_DIR}/${ldc93s1_sample_filename} --stream --extended 2>${CI_TMP_DIR}/stderr | tail -n 1) status=$? set -e assert_correct_ldc93s1_prodtflitemodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}" } ================================================ FILE: ci_scripts/build-utils.sh ================================================ #!/bin/bash set -xe do_bazel_build() { local _opt_or_dbg=${1:-"opt"} cd ${DS_TFDIR} eval "export ${BAZEL_ENV_FLAGS}" if [ "${_opt_or_dbg}" = "opt" ]; then if is_patched_bazel; then find ${DS_ROOT_TASK}/tensorflow/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/bazel-ckd-tf.tar -T - fi; fi; bazel ${BAZEL_OUTPUT_USER_ROOT} build \ -s --explain bazel_monolithic.log --verbose_explanations --experimental_strict_action_env --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c ${_opt_or_dbg} ${BAZEL_BUILD_FLAGS} ${BAZEL_TARGETS} if [ "${_opt_or_dbg}" = "opt" ]; then if is_patched_bazel; then find ${DS_ROOT_TASK}/tensorflow/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/bazel-ckd-ds.tar -T - fi; verify_bazel_rebuild "${DS_ROOT_TASK}/tensorflow/bazel_monolithic.log" fi; } shutdown_bazel() { cd ${DS_TFDIR} bazel ${BAZEL_OUTPUT_USER_ROOT} shutdown } do_deepspeech_binary_build() { cd ${DS_DSDIR} make -C native_client/ \ TARGET=${SYSTEM_TARGET} \ TFDIR=${DS_TFDIR} \ RASPBIAN=${SYSTEM_RASPBIAN} \ EXTRA_CFLAGS="${EXTRA_LOCAL_CFLAGS}" \ EXTRA_LDFLAGS="${EXTRA_LOCAL_LDFLAGS}" \ EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" \ deepspeech${PLATFORM_EXE_SUFFIX} } ================================================ FILE: ci_scripts/cpp-bytes-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" download_material "${CI_TMP_DIR}/ds" export PATH=${CI_TMP_DIR}/ds/:$PATH # Bytes output mode with LDC93S1 takes too long to converge so we simply test # that loading the model won't crash check_versions ================================================ FILE: ci_scripts/cpp-tests-prod.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_PROD_MODEL} model_name=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP} model_name_mmap=$(basename "${model_source_mmap}") download_model_prod download_material export PATH=${CI_TMP_DIR}/ds/:$PATH check_versions run_prod_inference_tests "${bitrate}" ================================================ FILE: ci_scripts/cpp-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" download_data export PATH=${CI_TMP_DIR}/ds/:$PATH check_versions run_all_inference_tests run_multi_inference_tests run_cpp_only_inference_tests run_hotword_tests ================================================ FILE: ci_scripts/cpp_tflite-tests-prod.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_PROD_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_name_mmap=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP//.pbmm/.tflite} export DATA_TMP_DIR=${CI_TMP_DIR} download_model_prod download_material "${CI_TMP_DIR}/ds" export PATH=${CI_TMP_DIR}/ds/:$PATH check_versions run_prodtflite_inference_tests "${bitrate}" ================================================ FILE: ci_scripts/cpp_tflite-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_name_mmap=$(basename "${model_source}") export DATA_TMP_DIR=${CI_TMP_DIR} download_material "${CI_TMP_DIR}/ds" export PATH=${CI_TMP_DIR}/ds/:$PATH check_versions run_all_inference_tests run_multi_inference_tests run_cpp_only_inference_tests run_hotword_tests ================================================ FILE: ci_scripts/cpp_tflite_basic-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") export DATA_TMP_DIR=${CI_TMP_DIR} download_material "${CI_TMP_DIR}/ds" export PATH=${CI_TMP_DIR}/ds/:$PATH check_versions run_tflite_basic_inference_tests ================================================ FILE: ci_scripts/cppwin-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" download_material "${CI_TMP_DIR}/ds" export PATH=${CI_TMP_DIR}/ds/:$PATH check_versions ensure_cuda_usage "$2" run_basic_inference_tests ================================================ FILE: ci_scripts/cppwin_tflite-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_name_mmap=$(basename "${model_source}") export DATA_TMP_DIR=${CI_TMP_DIR} download_material "${CI_TMP_DIR}/ds" export PATH=${CI_TMP_DIR}/ds/:$PATH check_versions run_basic_inference_tests ================================================ FILE: ci_scripts/docs-requirements.txt ================================================ breathe==4.14.2 semver==2.8.1 sphinx==2.4.4 #FIXME: switch back to upstream sphinx-js when https://github.com/mozilla/sphinx-js/pull/135 is merged or the issue is fixed otherwise git+git://github.com/reuben/sphinx-js.git@a24775935443d21028ee4a7025a407c78030c4e7#egg=sphinx-js sphinx-rtd-theme==0.4.3 pygments==2.7.4 ================================================ FILE: ci_scripts/electronjs-tests-prod.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_PROD_MODEL} model_name=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP} model_name_mmap=$(basename "${model_source_mmap}") download_model_prod download_data node --version npm --version symlink_electron export_node_bin_path which electron which node if [ "${OS}" = "Linux" ]; then export DISPLAY=':99.0' sudo Xvfb :99 -screen 0 1024x768x24 > /dev/null 2>&1 & xvfb_process=$! fi node --version deepspeech --version check_runtime_electronjs run_electronjs_prod_inference_tests "${bitrate}" if [ "${OS}" = "Linux" ]; then sleep 1 sudo kill -9 ${xvfb_process} || true fi ================================================ FILE: ci_scripts/electronjs-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" download_data node --version npm --version symlink_electron export_node_bin_path which electron which node if [ "${OS}" = "Linux" ]; then export DISPLAY=':99.0' sudo Xvfb :99 -screen 0 1024x768x24 > /dev/null 2>&1 & xvfb_process=$! fi node --version deepspeech --version check_runtime_electronjs run_electronjs_inference_tests if [ "${OS}" = "Linux" ]; then sleep 1 sudo kill -9 ${xvfb_process} || true fi ================================================ FILE: ci_scripts/electronjs_tflite-tests-prod.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_PROD_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP//.pbmm/.tflite} model_name_mmap=$(basename "${model_source}") download_model_prod download_data node --version npm --version symlink_electron export_node_bin_path which electron which node if [ "${OS}" = "Linux" ]; then export DISPLAY=':99.0' sudo Xvfb :99 -screen 0 1024x768x24 > /dev/null 2>&1 & xvfb_process=$! fi node --version deepspeech --version check_runtime_electronjs run_electronjs_prodtflite_inference_tests "${bitrate}" if [ "${OS}" = "Linux" ]; then sleep 1 sudo kill -9 ${xvfb_process} || true fi ================================================ FILE: ci_scripts/electronjs_tflite-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_name_mmap=$(basename "${model_source}") download_data node --version npm --version symlink_electron export_node_bin_path which electron which node if [ "${OS}" = "Linux" ]; then export DISPLAY=':99.0' sudo Xvfb :99 -screen 0 1024x768x24 > /dev/null 2>&1 & xvfb_process=$! fi node --version deepspeech --version check_runtime_electronjs run_electronjs_inference_tests if [ "${OS}" = "Linux" ]; then sleep 1 sudo kill -9 ${xvfb_process} || true fi ================================================ FILE: ci_scripts/host-build.sh ================================================ #!/bin/bash set -xe runtime=$1 source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/build-utils.sh source $(dirname "$0")/tf-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so //native_client:generate_scorer_package " if [ "${runtime}" = "tflite" ]; then BAZEL_BUILD_TFLITE="--define=runtime=tflite" fi; BAZEL_BUILD_FLAGS="${BAZEL_BUILD_TFLITE} ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}" BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" SYSTEM_TARGET=host do_bazel_build do_deepspeech_binary_build ================================================ FILE: ci_scripts/node-tests-prod.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_PROD_MODEL} model_name=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP} model_name_mmap=$(basename "${model_source_mmap}") download_model_prod download_data node --version npm --version export_node_bin_path check_runtime_nodejs run_prod_inference_tests "${bitrate}" run_js_streaming_prod_inference_tests "${bitrate}" ================================================ FILE: ci_scripts/node-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" download_data node --version npm --version export_node_bin_path check_runtime_nodejs run_all_inference_tests run_js_streaming_inference_tests run_hotword_tests ================================================ FILE: ci_scripts/node_tflite-tests-prod.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_PROD_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP//.pbmm/.tflite} model_name_mmap=$(basename "${model_source}") download_model_prod download_data node --version npm --version export_node_bin_path check_runtime_nodejs run_prodtflite_inference_tests "${bitrate}" run_js_streaming_prodtflite_inference_tests "${bitrate}" ================================================ FILE: ci_scripts/node_tflite-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_name_mmap=$(basename "${model_source}") download_data node --version npm --version export_node_bin_path check_runtime_nodejs run_all_inference_tests run_js_streaming_inference_tests run_hotword_tests ================================================ FILE: ci_scripts/package-utils.sh ================================================ #!/bin/bash set -xe package_native_client() { tensorflow_dir=${DS_TFDIR} deepspeech_dir=${DS_DSDIR} artifacts_dir=${CI_ARTIFACTS_DIR} artifact_name=$1 if [ ! -d ${tensorflow_dir} -o ! -d ${deepspeech_dir} -o ! -d ${artifacts_dir} ]; then echo "Missing directory. Please check:" echo "tensorflow_dir=${tensorflow_dir}" echo "deepspeech_dir=${deepspeech_dir}" echo "artifacts_dir=${artifacts_dir}" exit 1 fi; if [ -z "${artifact_name}" ]; then echo "Please specify artifact name." fi; win_lib="" if [ -f "${tensorflow_dir}/bazel-bin/native_client/libdeepspeech.so.if.lib" ]; then win_lib="-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so.if.lib" fi; ${TAR} --verbose -cf - \ -C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \ ${win_lib} \ -C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \ -C ${deepspeech_dir}/ LICENSE \ -C ${deepspeech_dir}/native_client/ deepspeech${PLATFORM_EXE_SUFFIX} \ -C ${deepspeech_dir}/native_client/ deepspeech.h \ -C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \ | ${XZ} > "${artifacts_dir}/${artifact_name}" } package_native_client_ndk() { deepspeech_dir=${DS_DSDIR} tensorflow_dir=${DS_TFDIR} artifacts_dir=${CI_ARTIFACTS_DIR} artifact_name=$1 arch_abi=$2 if [ ! -d ${deepspeech_dir} -o ! -d ${artifacts_dir} ]; then echo "Missing directory. Please check:" echo "deepspeech_dir=${deepspeech_dir}" echo "artifacts_dir=${artifacts_dir}" exit 1 fi; if [ -z "${artifact_name}" ]; then echo "Please specify artifact name." fi; if [ -z "${arch_abi}" ]; then echo "Please specify arch abi." fi; ${TAR} --verbose -cf - \ -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ deepspeech \ -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ libdeepspeech.so \ -C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \ -C ${deepspeech_dir}/native_client/libs/${arch_abi}/ libc++_shared.so \ -C ${deepspeech_dir}/native_client/ deepspeech.h \ -C ${deepspeech_dir}/ LICENSE \ -C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \ | ${XZ} > "${artifacts_dir}/${artifact_name}" } package_libdeepspeech_as_zip() { tensorflow_dir=${DS_TFDIR} artifacts_dir=${CI_ARTIFACTS_DIR} artifact_name=$1 if [ ! -d ${tensorflow_dir} -o ! -d ${artifacts_dir} ]; then echo "Missing directory. Please check:" echo "tensorflow_dir=${tensorflow_dir}" echo "artifacts_dir=${artifacts_dir}" exit 1 fi; if [ -z "${artifact_name}" ]; then echo "Please specify artifact name." fi; ${ZIP} -r9 --junk-paths "${artifacts_dir}/${artifact_name}" ${tensorflow_dir}/bazel-bin/native_client/libdeepspeech.so } ================================================ FILE: ci_scripts/package.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/package-utils.sh mkdir -p ${CI_ARTIFACTS_DIR} || true cp ${DS_DSDIR}/tensorflow/bazel*.log ${CI_ARTIFACTS_DIR}/ package_native_client "native_client.tar.xz" package_libdeepspeech_as_zip "libdeepspeech.zip" if [ -d ${DS_DSDIR}/wheels ]; then cp ${DS_DSDIR}/wheels/* ${CI_ARTIFACTS_DIR}/ cp ${DS_DSDIR}/native_client/javascript/deepspeech-*.tgz ${CI_ARTIFACTS_DIR}/ fi; if [ -f ${DS_DSDIR}/native_client/javascript/wrapper.tar.gz ]; then cp ${DS_DSDIR}/native_client/javascript/wrapper.tar.gz ${CI_ARTIFACTS_DIR}/ fi; ================================================ FILE: ci_scripts/python-tests-prod.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_PROD_MODEL} model_name=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP} model_name_mmap=$(basename "${model_source_mmap}") download_model_prod download_material export_py_bin_path deepspeech --version run_prod_inference_tests "${bitrate}" run_prod_concurrent_stream_tests "${bitrate}" ================================================ FILE: ci_scripts/python-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" download_data export_py_bin_path deepspeech --version run_all_inference_tests run_hotword_tests ================================================ FILE: ci_scripts/python_tflite-tests-prod.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_PROD_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_name_mmap=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP//.pbmm/.tflite} download_model_prod download_material export_py_bin_path deepspeech --version run_prodtflite_inference_tests "${bitrate}" ================================================ FILE: ci_scripts/python_tflite-tests.sh ================================================ #!/bin/bash set -xe source $(dirname "$0")/all-vars.sh source $(dirname "$0")/all-utils.sh source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite} model_name=$(basename "${model_source}") model_name_mmap=$(basename "${model_source}") download_data export_py_bin_path deepspeech --version run_all_inference_tests run_hotword_tests ================================================ FILE: ci_scripts/tf-build.sh ================================================ #!/bin/bash set -ex set -o pipefail source $(dirname $0)/tf-vars.sh pushd ${DS_ROOT_TASK}/tensorflow/ BAZEL_BUILD="bazel ${BAZEL_OUTPUT_USER_ROOT} build -s --explain bazel_monolithic_tf.log --verbose_explanations --experimental_strict_action_env --config=monolithic" # Start a bazel process to ensure reliability on Windows and avoid: # FATAL: corrupt installation: file 'c:\builds\tc-workdir\.bazel_cache/install/6b1660721930e9d5f231f7d2a626209b/_embedded_binaries/build-runfiles.exe' missing. bazel ${BAZEL_OUTPUT_USER_ROOT} info # Force toolchain sync (useful on macOS ?) bazel ${BAZEL_OUTPUT_USER_ROOT} sync --configure MAYBE_DEBUG=$2 OPT_OR_DBG="-c opt" if [ "${MAYBE_DEBUG}" = "dbg" ]; then OPT_OR_DBG="-c dbg" fi; case "$1" in "--windows-cpu") echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIBDEEPSPEECH} ${BUILD_TARGET_LITE_LIB} --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" ;; "--linux-cpu"|"--darwin-cpu") echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIB_CPP_API} ${BUILD_TARGET_LITE_LIB} ;; "--linux-cuda"|"--windows-cuda") eval "export ${TF_CUDA_FLAGS}" && (echo "" | TF_NEED_CUDA=1 ./configure) && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_CUDA_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_OPT_FLAGS} ${BUILD_TARGET_LIB_CPP_API} ;; "--linux-armv7") echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} ;; "--linux-aarch64") echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} ;; "--android-armv7") echo "" | TF_SET_ANDROID_WORKSPACE=1 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_ANDROID_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} ;; "--android-arm64") echo "" | TF_SET_ANDROID_WORKSPACE=1 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_ANDROID_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} ;; "--ios-arm64") echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_IOS_ARM64_FLAGS} ${BUILD_TARGET_LITE_LIB} ;; "--ios-x86_64") echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_IOS_X86_64_FLAGS} ${BUILD_TARGET_LITE_LIB} ;; esac bazel ${BAZEL_OUTPUT_USER_ROOT} shutdown popd ================================================ FILE: ci_scripts/tf-package.sh ================================================ #!/bin/bash set -xe source $(dirname $0)/tf-vars.sh mkdir -p ${CI_ARTIFACTS_DIR} || true cp ${DS_ROOT_TASK}/tensorflow/bazel_*.log ${CI_ARTIFACTS_DIR} || true OUTPUT_ROOT="${DS_ROOT_TASK}/tensorflow/bazel-bin" for output_bin in \ tensorflow/lite/experimental/c/libtensorflowlite_c.so \ tensorflow/tools/graph_transforms/transform_graph \ tensorflow/tools/graph_transforms/summarize_graph \ tensorflow/tools/benchmark/benchmark_model \ tensorflow/contrib/util/convert_graphdef_memmapped_format \ tensorflow/lite/toco/toco; do if [ -f "${OUTPUT_ROOT}/${output_bin}" ]; then cp ${OUTPUT_ROOT}/${output_bin} ${CI_ARTIFACTS_DIR}/ fi; done; if [ -f "${OUTPUT_ROOT}/tensorflow/lite/tools/benchmark/benchmark_model" ]; then cp ${OUTPUT_ROOT}/tensorflow/lite/tools/benchmark/benchmark_model ${CI_ARTIFACTS_DIR}/lite_benchmark_model fi # It seems that bsdtar and gnutar are behaving a bit differently on the way # they deal with --exclude="./public/*" ; this caused ./DeepSpeech/tensorflow/core/public/ # to be ditched when we just wanted to get rid of ./public/ on OSX. # Switching to gnutar (already needed for the --transform on DeepSpeech tasks) # does the trick. TAR_EXCLUDE="--exclude=./dls/*" if [ "${OS}" = "Darwin" ]; then TAR_EXCLUDE="--exclude=./dls/* --exclude=./public/* --exclude=./generic-worker/* --exclude=./homebrew/* --exclude=./homebrew.cache/* --exclude=./homebrew.logs/*" fi; # Make a tar of # - /home/build-user/ (linux # - /Users/build-user/TaskCluster/HeavyTasks/X/ (OSX) # - C:\builds\tc-workdir\ (windows) if [ "${OS}" = "${CI_MSYS_VERSION}" ]; then export PATH=$PATH:'/c/Program Files/7-Zip/' pushd ${DS_ROOT_TASK} 7z a '-xr!.\dls\' '-xr!.\tmp\' '-xr!.\msys64\' -snl -snh -so home.tar . | 7z a -si ${CI_ARTIFACTS_DIR}/home.tar.xz popd else ${TAR} -C ${DS_ROOT_TASK} ${TAR_EXCLUDE} -cf - . | ${XZ} > ${CI_ARTIFACTS_DIR}/home.tar.xz fi if [ "${OS}" = "Linux" ]; then SHA_SUM_GEN="sha256sum" elif [ "${OS}" = "${CI_MSYS_VERSION}" ]; then SHA_SUM_GEN="sha256sum" elif [ "${OS}" = "Darwin" ]; then SHA_SUM_GEN="shasum -a 256" fi; ${SHA_SUM_GEN} ${CI_ARTIFACTS_DIR}/* > ${CI_ARTIFACTS_DIR}/checksums.txt ================================================ FILE: ci_scripts/tf-setup.sh ================================================ #!/bin/bash set -ex source $(dirname $0)/tf-vars.sh install_android= install_cuda= case "$1" in "--linux-cuda"|"--windows-cuda") install_cuda=yes ;; "--android-armv7"|"--android-arm64") install_android=yes ;; esac # $1 url # $2 sha256 download() { fname=`basename $1` ${WGET} $1 -O ${DS_ROOT_TASK}/dls/$fname && echo "$2 ${DS_ROOT_TASK}/dls/$fname" | ${SHA_SUM} - } # Download stuff mkdir -p ${DS_ROOT_TASK}/dls || true download $BAZEL_URL $BAZEL_SHA256 if [ ! -z "${install_cuda}" ]; then download $CUDA_URL $CUDA_SHA256 download $CUDNN_URL $CUDNN_SHA256 fi; if [ ! -z "${install_android}" ]; then download $ANDROID_NDK_URL $ANDROID_NDK_SHA256 download $ANDROID_SDK_URL $ANDROID_SDK_SHA256 fi; # For debug ls -hal ${DS_ROOT_TASK}/dls/ # Install Bazel in ${DS_ROOT_TASK}/bin BAZEL_INSTALL_FILENAME=$(basename "${BAZEL_URL}") if [ "${OS}" = "Linux" ]; then BAZEL_INSTALL_FLAGS="--user" elif [ "${OS}" = "Darwin" ]; then BAZEL_INSTALL_FLAGS="--bin=${DS_ROOT_TASK}/bin --base=${DS_ROOT_TASK}/.bazel" fi; mkdir -p ${DS_ROOT_TASK}/bin || true pushd ${DS_ROOT_TASK}/bin if [ "${OS}" = "${CI_MSYS_VERSION}" ]; then cp ${DS_ROOT_TASK}/dls/${BAZEL_INSTALL_FILENAME} ${DS_ROOT_TASK}/bin/bazel.exe else /bin/bash ${DS_ROOT_TASK}/dls/${BAZEL_INSTALL_FILENAME} ${BAZEL_INSTALL_FLAGS} fi popd # For debug bazel version bazel shutdown if [ ! -z "${install_cuda}" ]; then # Install CUDA and CuDNN mkdir -p ${DS_ROOT_TASK}/DeepSpeech/CUDA/ || true pushd ${DS_ROOT_TASK} CUDA_FILE=`basename ${CUDA_URL}` PERL5LIB=. sh ${DS_ROOT_TASK}/dls/${CUDA_FILE} --silent --override --toolkit --toolkitpath=${DS_ROOT_TASK}/DeepSpeech/CUDA/ --defaultroot=${DS_ROOT_TASK}/DeepSpeech/CUDA/ CUDNN_FILE=`basename ${CUDNN_URL}` tar xvf ${DS_ROOT_TASK}/dls/${CUDNN_FILE} --strip-components=1 -C ${DS_ROOT_TASK}/DeepSpeech/CUDA/ popd LD_LIBRARY_PATH=${DS_ROOT_TASK}/DeepSpeech/CUDA/lib64/:${DS_ROOT_TASK}/DeepSpeech/CUDA/lib64/stubs/:$LD_LIBRARY_PATH export LD_LIBRARY_PATH # We might lack libcuda.so.1 symlink, let's fix as upstream does: # https://github.com/tensorflow/tensorflow/pull/13811/files?diff=split#diff-2352449eb75e66016e97a591d3f0f43dR96 if [ ! -h "${DS_ROOT_TASK}/DeepSpeech/CUDA/lib64/stubs/libcuda.so.1" ]; then ln -s "${DS_ROOT_TASK}/DeepSpeech/CUDA/lib64/stubs/libcuda.so" "${DS_ROOT_TASK}/DeepSpeech/CUDA/lib64/stubs/libcuda.so.1" fi; else echo "No CUDA/CuDNN to install" fi if [ ! -z "${install_android}" ]; then mkdir -p ${DS_ROOT_TASK}/DeepSpeech/Android/SDK || true ANDROID_NDK_FILE=`basename ${ANDROID_NDK_URL}` ANDROID_SDK_FILE=`basename ${ANDROID_SDK_URL}` pushd ${DS_ROOT_TASK}/DeepSpeech/Android unzip ${DS_ROOT_TASK}/dls/${ANDROID_NDK_FILE} popd pushd ${DS_ROOT_TASK}/DeepSpeech/Android/SDK unzip ${DS_ROOT_TASK}/dls/${ANDROID_SDK_FILE} yes | ./tools/bin/sdkmanager --licenses ./tools/bin/sdkmanager --update ./tools/bin/sdkmanager --install "platforms;android-16" "build-tools;28.0.3" popd fi mkdir -p ${CI_ARTIFACTS_DIR} || true # Taken from https://www.tensorflow.org/install/source # Only future is needed for our builds, as we don't build the Python package python -m pip install -U --user future==0.17.1 || true ================================================ FILE: ci_scripts/tf-vars.sh ================================================ #!/bin/bash set -ex export OS=$(uname) if [ "${OS}" = "Linux" ]; then export DS_ROOT_TASK=${CI_TASK_DIR} BAZEL_URL=https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-installer-linux-x86_64.sh BAZEL_SHA256=7ba815cbac712d061fe728fef958651512ff394b2708e89f79586ec93d1185ed CUDA_URL=http://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.243_418.87.00_linux.run CUDA_SHA256=e7c22dc21278eb1b82f34a60ad7640b41ad3943d929bebda3008b72536855d31 # From https://gitlab.com/nvidia/cuda/blob/centos7/10.1/devel/cudnn7/Dockerfile CUDNN_URL=http://developer.download.nvidia.com/compute/redist/cudnn/v7.6.0/cudnn-10.1-linux-x64-v7.6.0.64.tgz CUDNN_SHA256=e956c6f9222fcb867a10449cfc76dee5cfd7c7531021d95fe9586d7e043b57d7 ANDROID_NDK_URL=https://dl.google.com/android/repository/android-ndk-r18b-linux-x86_64.zip ANDROID_NDK_SHA256=4f61cbe4bbf6406aa5ef2ae871def78010eed6271af72de83f8bd0b07a9fd3fd ANDROID_SDK_URL=https://dl.google.com/android/repository/sdk-tools-linux-4333796.zip ANDROID_SDK_SHA256=92ffee5a1d98d856634e8b71132e8a95d96c83a63fde1099be3d86df3106def9 WGET=/usr/bin/wget elif [ "${OS}" = "${CI_MSYS_VERSION}" ]; then if [ -z "${CI_TASK_DIR}" -o -z "${CI_ARTIFACTS_DIR}" ]; then echo "Inconsistent Windows setup: missing some vars." echo "CI_TASK_DIR=${CI_TASK_DIR}" echo "CI_ARTIFACTS_DIR=${CI_ARTIFACTS_DIR}" exit 1 fi; # Re-export with cygpath to make sure it is sane, otherwise it might trigger # unobvious failures with cp etc. export CI_TASK_DIR="$(cygpath ${CI_TASK_DIR})" export CI_ARTIFACTS_DIR="$(cygpath ${CI_ARTIFACTS_DIR})" export DS_ROOT_TASK=${CI_TASK_DIR} export BAZEL_VC="C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC" export BAZEL_VC_FULL_VERSION="14.28.29910" export MSYS2_ARG_CONV_EXCL='//' mkdir -p ${CI_TASK_DIR}/tmp/ export TEMP=${CI_TASK_DIR}/tmp/ export TMP=${CI_TASK_DIR}/tmp/ BAZEL_URL=https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-windows-x86_64.exe BAZEL_SHA256=776db1f4986dacc3eda143932f00f7529f9ee65c7c1c004414c44aaa6419d0e9 CUDA_INSTALL_DIRECTORY=$(cygpath 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1') TAR=/usr/bin/tar.exe elif [ "${OS}" = "Darwin" ]; then if [ -z "${CI_TASK_DIR}" -o -z "${CI_ARTIFACTS_DIR}" ]; then echo "Inconsistent OSX setup: missing some vars." echo "CI_TASK_DIR=${CI_TASK_DIR}" echo "CI_ARTIFACTS_DIR=${CI_ARTIFACTS_DIR}" exit 1 fi; export DS_ROOT_TASK=${CI_TASK_DIR} BAZEL_URL=https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-installer-darwin-x86_64.sh BAZEL_SHA256=5cfa97031b43432b3c742c80e2e01c41c0acdca7ba1052fc8cf1e291271bc9cd SHA_SUM="shasum -a 256 -c" TAR=gtar fi; WGET=${WGET:-"wget"} TAR=${TAR:-"tar"} XZ=${XZ:-"xz -9 -T0"} ZIP=${ZIP:-"zip"} UNXZ=${UNXZ:-"xz -T0 -d"} UNGZ=${UNGZ:-"gunzip"} SHA_SUM=${SHA_SUM:-"sha256sum -c --strict"} # /tmp/artifacts for docker-worker on linux, # and task subdir for generic-worker on osx export CI_ARTIFACTS_DIR=${CI_ARTIFACTS_DIR:-/tmp/artifacts} ### Define variables that needs to be exported to other processes PATH=${DS_ROOT_TASK}/bin:$PATH if [ "${OS}" = "Darwin" ]; then PATH=${DS_ROOT_TASK}/homebrew/bin/:${DS_ROOT_TASK}/homebrew/opt/node@10/bin:$PATH fi; export PATH if [ "${OS}" = "Linux" ]; then export LD_LIBRARY_PATH=${DS_ROOT_TASK}/DeepSpeech/CUDA/lib64/:${DS_ROOT_TASK}/DeepSpeech/CUDA/lib64/stubs/:$LD_LIBRARY_PATH export ANDROID_SDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/SDK/ export ANDROID_NDK_HOME=${DS_ROOT_TASK}/DeepSpeech/Android/android-ndk-r18b/ fi; export TF_ENABLE_XLA=0 if [ "${OS}" = "Linux" ]; then TF_NEED_JEMALLOC=1 elif [ "${OS}" = "${CI_MSYS_VERSION}" ]; then TF_NEED_JEMALLOC=0 elif [ "${OS}" = "Darwin" ]; then TF_NEED_JEMALLOC=0 fi; export TF_NEED_JEMALLOC export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export TF_NEED_VERBS=0 export TF_NEED_MPI=0 export TF_NEED_IGNITE=0 export TF_NEED_GDR=0 export TF_NEED_NGRAPH=0 export TF_DOWNLOAD_CLANG=0 export TF_SET_ANDROID_WORKSPACE=0 export TF_NEED_TENSORRT=0 export TF_NEED_ROCM=0 # This should be gcc-5, hopefully. CUDA and TensorFlow might not be happy, otherwise. export GCC_HOST_COMPILER_PATH=/usr/bin/gcc if [ "${OS}" = "Linux" ]; then source /etc/os-release if [ "${ID}" = "ubuntu" -a "${VERSION_ID}" = "20.04" ]; then export PYTHON_BIN_PATH=/usr/bin/python3 else export PYTHON_BIN_PATH=/usr/bin/python2.7 fi fi ## Below, define or export some build variables # Enable some SIMD support. Limit ourselves to what Tensorflow needs. # Also ensure to not require too recent CPU: AVX2/FMA introduced by: # - Intel with Haswell (2013) # - AMD with Excavator (2015) # For better compatibility, AVX ony might be better. # # Build for generic amd64 platforms, no device-specific optimization # See https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html for targetting specific CPUs if [ "${OS}" = "${CI_MSYS_VERSION}" ]; then OPT_FLAGS="/arch:AVX" else OPT_FLAGS="-mtune=generic -march=x86-64 -msse -msse2 -msse3 -msse4.1 -msse4.2 -mavx" fi BAZEL_OPT_FLAGS="" for flag in ${OPT_FLAGS}; do BAZEL_OPT_FLAGS="${BAZEL_OPT_FLAGS} --copt=${flag}" done; BAZEL_OUTPUT_CACHE_DIR="${DS_ROOT_TASK}/.bazel_cache/" BAZEL_OUTPUT_CACHE_INSTANCE="${BAZEL_OUTPUT_CACHE_DIR}/output/" mkdir -p ${BAZEL_OUTPUT_CACHE_INSTANCE} || true # We need both to ensure stable path ; default value for output_base is some # MD5 value. BAZEL_OUTPUT_USER_ROOT="--output_user_root ${BAZEL_OUTPUT_CACHE_DIR} --output_base ${BAZEL_OUTPUT_CACHE_INSTANCE}" export BAZEL_OUTPUT_USER_ROOT NVCC_COMPUTE="3.5" ### Define build parameters/env variables that we will re-ues in sourcing scripts. if [ "${OS}" = "${CI_MSYS_VERSION}" ]; then TF_CUDA_FLAGS="TF_CUDA_CLANG=0 TF_CUDA_VERSION=10.1 TF_CUDNN_VERSION=7.6.0 CUDNN_INSTALL_PATH=\"${CUDA_INSTALL_DIRECTORY}\" TF_CUDA_PATHS=\"${CUDA_INSTALL_DIRECTORY}\" TF_CUDA_COMPUTE_CAPABILITIES=\"${NVCC_COMPUTE}\"" else TF_CUDA_FLAGS="TF_CUDA_CLANG=0 TF_CUDA_VERSION=10.1 TF_CUDNN_VERSION=7.6.0 CUDNN_INSTALL_PATH=\"${DS_ROOT_TASK}/DeepSpeech/CUDA\" TF_CUDA_PATHS=\"${DS_ROOT_TASK}/DeepSpeech/CUDA\" TF_CUDA_COMPUTE_CAPABILITIES=\"${NVCC_COMPUTE}\"" fi BAZEL_ARM_FLAGS="--config=rpi3 --config=rpi3_opt --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_ARM64_FLAGS="--config=rpi3-armv8 --config=rpi3-armv8_opt --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_ANDROID_ARM_FLAGS="--config=android --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_ANDROID_ARM64_FLAGS="--config=android --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_CUDA_FLAGS="--config=cuda" if [ "${OS}" = "Linux" ]; then # constexpr usage in tensorflow's absl dep fails badly because of gcc-5 # so let's skip that BAZEL_CUDA_FLAGS="${BAZEL_CUDA_FLAGS} --copt=-DNO_CONSTEXPR_FOR_YOU=1" fi BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV" if [ "${OS}" != "${CI_MSYS_VERSION}" ]; then BAZEL_EXTRA_FLAGS="--config=noaws --config=nogcp --config=nohdfs --config=nonccl --copt=-fvisibility=hidden" fi if [ "${OS}" = "Darwin" ]; then BAZEL_EXTRA_FLAGS="${BAZEL_EXTRA_FLAGS} --macos_minimum_os 10.10 --macos_sdk_version 10.15" fi ### Define build targets that we will re-ues in sourcing scripts. BUILD_TARGET_LIB_CPP_API="//tensorflow:tensorflow_cc" BUILD_TARGET_GRAPH_TRANSFORMS="//tensorflow/tools/graph_transforms:transform_graph" BUILD_TARGET_GRAPH_SUMMARIZE="//tensorflow/tools/graph_transforms:summarize_graph" BUILD_TARGET_GRAPH_BENCHMARK="//tensorflow/tools/benchmark:benchmark_model" #BUILD_TARGET_CONVERT_MMAP="//tensorflow/contrib/util:convert_graphdef_memmapped_format" BUILD_TARGET_TOCO="//tensorflow/lite/toco:toco" BUILD_TARGET_LITE_BENCHMARK="//tensorflow/lite/tools/benchmark:benchmark_model" BUILD_TARGET_LITE_LIB="//tensorflow/lite/c:libtensorflowlite_c.so" BUILD_TARGET_LIBDEEPSPEECH="//native_client:libdeepspeech.so" ================================================ FILE: data/README.rst ================================================ Language-Specific Data ====================== This directory contains language-specific data files. Most importantly, you will find here: 1. A list of unique characters for the target language (e.g. English) in ``data/alphabet.txt``. After installing the training code, you can check ``python -m deepspeech_training.util.check_characters --help`` for a tool that creates an alphabet file from a list of training CSV files. 2. A script used to generate a binary n-gram language model: ``data/lm/generate_lm.py``. For more information on how to build these resources from scratch, see the ``External scorer scripts`` section on `deepspeech.readthedocs.io `_. ================================================ FILE: data/alphabet.txt ================================================ # Each line in this file represents the Unicode codepoint (UTF-8 encoded) # associated with a numeric label. # A line that starts with # is a comment. You can escape it with \# if you wish # to use '#' as a label. a b c d e f g h i j k l m n o p q r s t u v w x y z ' # The last (non-comment) line needs to end with a newline. ================================================ FILE: data/lm/generate_lm.py ================================================ import argparse import gzip import io import os import subprocess from collections import Counter import progressbar def convert_and_filter_topk(args): """ Convert to lowercase, count word occurrences and save top-k words to a file """ counter = Counter() data_lower = os.path.join(args.output_dir, "lower.txt.gz") print("\nConverting to lowercase and counting word occurrences ...") with io.TextIOWrapper( io.BufferedWriter(gzip.open(data_lower, "w+")), encoding="utf-8" ) as file_out: # Open the input file either from input.txt or input.txt.gz _, file_extension = os.path.splitext(args.input_txt) if file_extension == ".gz": file_in = io.TextIOWrapper( io.BufferedReader(gzip.open(args.input_txt)), encoding="utf-8" ) else: file_in = open(args.input_txt, encoding="utf-8") for line in progressbar.progressbar(file_in): line_lower = line.lower() counter.update(line_lower.split()) file_out.write(line_lower) file_in.close() # Save top-k words print("\nSaving top {} words ...".format(args.top_k)) top_counter = counter.most_common(args.top_k) vocab_str = "\n".join(word for word, count in top_counter) vocab_path = "vocab-{}.txt".format(args.top_k) vocab_path = os.path.join(args.output_dir, vocab_path) with open(vocab_path, "w+") as file: file.write(vocab_str) print("\nCalculating word statistics ...") total_words = sum(counter.values()) print(" Your text file has {} words in total".format(total_words)) print(" It has {} unique words".format(len(counter))) top_words_sum = sum(count for word, count in top_counter) word_fraction = (top_words_sum / total_words) * 100 print( " Your top-{} words are {:.4f} percent of all words".format( args.top_k, word_fraction ) ) print(' Your most common word "{}" occurred {} times'.format(*top_counter[0])) last_word, last_count = top_counter[-1] print( ' The least common word in your top-k is "{}" with {} times'.format( last_word, last_count ) ) for i, (w, c) in enumerate(reversed(top_counter)): if c > last_count: print( ' The first word with {} occurrences is "{}" at place {}'.format( c, w, len(top_counter) - 1 - i ) ) break return data_lower, vocab_str def build_lm(args, data_lower, vocab_str): print("\nCreating ARPA file ...") lm_path = os.path.join(args.output_dir, "lm.arpa") subargs = [ os.path.join(args.kenlm_bins, "lmplz"), "--order", str(args.arpa_order), "--temp_prefix", args.output_dir, "--memory", args.max_arpa_memory, "--text", data_lower, "--arpa", lm_path, "--prune", *args.arpa_prune.split("|"), ] if args.discount_fallback: subargs += ["--discount_fallback"] subprocess.check_call(subargs) # Filter LM using vocabulary of top-k words print("\nFiltering ARPA file using vocabulary of top-k words ...") filtered_path = os.path.join(args.output_dir, "lm_filtered.arpa") subprocess.run( [ os.path.join(args.kenlm_bins, "filter"), "single", "model:{}".format(lm_path), filtered_path, ], input=vocab_str.encode("utf-8"), check=True, ) # Quantize and produce trie binary. print("\nBuilding lm.binary ...") binary_path = os.path.join(args.output_dir, "lm.binary") subprocess.check_call( [ os.path.join(args.kenlm_bins, "build_binary"), "-a", str(args.binary_a_bits), "-q", str(args.binary_q_bits), "-v", args.binary_type, filtered_path, binary_path, ] ) def main(): parser = argparse.ArgumentParser( description="Generate lm.binary and top-k vocab for DeepSpeech." ) parser.add_argument( "--input_txt", help="Path to a file.txt or file.txt.gz with sample sentences", type=str, required=True, ) parser.add_argument( "--output_dir", help="Directory path for the output", type=str, required=True ) parser.add_argument( "--top_k", help="Use top_k most frequent words for the vocab.txt file. These will be used to filter the ARPA file.", type=int, required=True, ) parser.add_argument( "--kenlm_bins", help="File path to the KENLM binaries lmplz, filter and build_binary", type=str, required=True, ) parser.add_argument( "--arpa_order", help="Order of k-grams in ARPA-file generation", type=int, required=True, ) parser.add_argument( "--max_arpa_memory", help="Maximum allowed memory usage for ARPA-file generation", type=str, required=True, ) parser.add_argument( "--arpa_prune", help="ARPA pruning parameters. Separate values with '|'", type=str, required=True, ) parser.add_argument( "--binary_a_bits", help="Build binary quantization value a in bits", type=int, required=True, ) parser.add_argument( "--binary_q_bits", help="Build binary quantization value q in bits", type=int, required=True, ) parser.add_argument( "--binary_type", help="Build binary data structure type", type=str, required=True, ) parser.add_argument( "--discount_fallback", help="To try when such message is returned by kenlm: 'Could not calculate Kneser-Ney discounts [...] rerun with --discount_fallback'", action="store_true", ) args = parser.parse_args() data_lower, vocab_str = convert_and_filter_topk(args) build_lm(args, data_lower, vocab_str) # Delete intermediate files os.remove(os.path.join(args.output_dir, "lower.txt.gz")) os.remove(os.path.join(args.output_dir, "lm.arpa")) os.remove(os.path.join(args.output_dir, "lm_filtered.arpa")) if __name__ == "__main__": main() ================================================ FILE: data/smoke_test/LDC93S1.txt ================================================ 0 46797 She had your dark suit in greasy wash water all year. ================================================ FILE: data/smoke_test/russian_sample_data/alphabet.ru ================================================ о е а и н т с л в р к м д п ы у б я ь г з ч й ж х ш ю ц э щ ф ё ъ ================================================ FILE: data/smoke_test/russian_sample_data/ru.csv ================================================ wav_filename,wav_filesize,transcript ru.wav,0,бедняга ребят на его месте должен был быть я ================================================ FILE: data/smoke_test/vocab.pruned.bytes.txt ================================================ s p o t w o r d j a u n t y n e a r e r h e a v y b e l l f l i n t ' s m o r a l i s t r e s o l v e d e i g h t h e u r o p e a n m o u t h m i s s u s m o s s p a r t y p a l e m i l l c e l t s d i s p e n s e d f r a n k l y s y m p a t h y m a d f l a t t e r e d d e v i l s v o m i t c o n t i n u e d l e a v e p h i l o s o p h y i n d e m n i t y w a i t e d n e t t e s t e d s a x o n p r o t e c t i v e g l i t t e r i n p r e v i o u s d e a d l e a r n f o r t h l e t t e r c a r e s a b o v e e x c e l l e n c e s f l a u b e r t g r a m m o n t e m p l o y m e n t s p r e p a r a t o r y e x h a u s t e d g r a v e l y v o l t a i r e f i f t e e n i n t i m a c y r e a s o n a b l y m i r e e g g s h u m b l e s o m e t h i n g d a m a g e p o e t r y m i n g l e l o w s t i c k v c o v e r l e s s f e l l m e t s i l e n t c a s t s t r o t h o n l y l i v e d u s r e a s o n i n g s g a i t s e v e n t h h u m b u g s t r i v i n g h a b i t g e n e r a l t a k e n a t t r a c t e d d r a i n e d w o r t h y s e c r e t a r r i v e o f f c l o u d s h a n d t h e m i n g e n u u s i n e v i t a b l e e a g e r l y m e l o d y c u n n i n g v o l u n t a r i l y g o l d b l o o d t h a n c o n s c i e n c e b r e a k i n g n a t u r e c o l o r a t t i t u d e w h e r e d i s p o s e s s t o r e r o o m i m p e r f e c t a n g e r a s y s t e m a t i c a l l y r e l i e v e p a c k e d p l e a s u r e f l a t t e r i e s s l u r a c c e p t a n c e s p e c u l i a r b e s t o w e d l a b y r i n t h a r r i v e d v e n t u r e d s o c i e t y a f f a i r s a f t e r n o o n w h e e l s p r i n c e c h i m e e a c h b e a t s d i s t e m p e r e d n a t u r a l l y p e r s o n a l r e p u t a t i o n e v e n i n g v a s t e m a d a m e l i n s i d i o u s c e t e r a s m o t h e r e d c l o t h e s n o t i c e d w o n d e r s b l u e s u g g e s t i o n f o r r e s t m o r n i n g m e d i t a t e d a r t i l l e r y p a s s e s i m p o s e c e r t a i n l y b u s i n e s s f a t h e r s n a y i n t o x i c a t i o n u n e x p e c t e d s t r o k i n g b u t r e p e a t d i s t u r b p o s s i b l y o h a c c e p t l i p s p l e a s e h e a r t i l y a c q u i s i t i o n e n j o y a c c e n t u a t i o n a c c o u n t e d s w e e t f i x e d d e f i n i t e v i g o r o u s l y p r o b l e m s f o l l o w m a n i f e s t e d f a s t i n d e l a y s d r a w n e v e n i n g ' s i s l a n d s b e t w e e n n o t w i t h s t a n d i n g t e r r i b l y p a s s i o n r e a d y s u p e r i m p o s e d e x p e c t a t i o n s r e l i g i o u s r e s p e c t s s e l f e m o t i o n a l m a d e i n v a r i a b l e c o n t e m p l a t e e f f e c t s i m m o r a l r e s t e d m a i n m a s t s t r i n g s d e s i g n c a p a c i t y a d d e d u n l o a d d o n ' t g r e a t e r s p e c i a l l e f t l e s s o u g h t i n e x p r e s s i v e d r a m a c h a r i t y e x c u s e f o r e i g n o t h e r ' s s t y l e c o n g r a t u l a t i o n s e n m i t y f a i r t h i r t y s o t o p d i a p h o r e s i s f o r w a r d s t a g e o u t s i d e g r e w s c o w l f r e e p o r t g r e n a d i e r u n d e r s t a n d i n g s t r a i n e d s e r v i c e s d i s a g r e e a b l e w h e n a n o m a l y b l a m e d a y s m e r r y c o m p l a i s a n c e o b t a i n e d f l u i d m e d i u m i ' d p r i v i l e g e s r a g h a g g a r d i n a u d i b l e d e v o t i o n u n c o n s c i o u s l y r o c k h o n e s t y ' r o u n d s e v e r e a m o u n t t a n k a r d p a i n f u l l y e n c l o s u r e s s e t t l e d p l e a s e d y o u r s e l v e s s h e l v e d h o u r s c h a s t e h e r e d i t a r i l y c o m e s m a r v e l o u s y e a i n c o n s i s t e n c y h a r d e s t s t e p s r i d d l e s u i t a p p l i c a t i o n s p r o f u n d i s s a n c t i f i e d e x s i n s n o i s e u t t e r e d d i s c o v e r e r n o b l e r i d e r c r o s s a w o k e h i m s e l f m e a n s f e l t u n d e r s t a n d s p a n i s h d e f e n d i n g e x p r e s s s k i e s t w i l i g h t c o r n e r e d p r o s p e c t g o d s s o e v e r a r e r e d o u b l e d m a c h i n e t w e n t y m i x j u d g i n g u t m o s t e v e n t u a l l y e x c i t i n g b e g l u n c h s e n s e s o ' s h a n t e r c o n s i d e r a b l e i n f a n t r y e d g e c l e r i c a l i t a l i a n a n g e l d r e a m s t e n s i o n m i r t h f i l t e r i n s t r u g g l e w i l l d e f i n e l a d y s h i p d ' e p i n a y o b j e c t i v e u n c o n s c i o u s n e s s w e ' l l w h o l l y a r t i s t ' s l o d g i n g p i l e n u m b e r b e d c l o t h e s m e a t r o a s t e d f i r s t l i n g s c o m f o r t a b l e s l o w s u p e r s e d e s h r e d c o u r s e p l a c e s a g a i n s t s e c u r i n g p r o b l e m s a t l o s e c u r e s i n c o m e v i s i o n s p o n d e r o u s l y d e p e n d s c o n c u r r e n c e b e g a n s a k e e v e ' s d r o w n c o n s c i e n t i o u s w a v e s a s s e n t t h e n c e f a l s e f r e e t h i n k e r a c q u a i n t a n c e s t a r e m a r k p a c k e t s h a f t e s b u r y h e a r e r s n i c e l y s u b s t a n c e e x t e n d s s t a r e d h a i r e d w o u n d c h a r i o t e e r s e e t h y f i n g e r s s i l l y m u s i c a l f i t g l a z e d c h a r g e d i r e c t e d e n c o u r a g e d p a r i s h d e c e i v i n g s t a r t i n g g a l l a n t r y p l a i n l y p a r d o n m a n i f e s t f a u l t h a s t y s l u m b e r p i t e o u s l y e v o k e w i t h n e a r l y h e a r t t h a t m a n k i n d d i s p o s i n g b r i t i s h i n t e r e s t i n g f l o o r p a i n a b s o l u t e l y h o l y c l o s e l y u t i l i t y b r i s t o l i m p o r t a n c e r e c o v e r y s o f t f i g t u p p e n c e c e n t u r y e x p e n s i v e e d i t i o n a d a m ' s s u p p l y s o n s w i r e s h i e l d s g u i d e d b e a t i n g h o u r c h e a p e r t e n b e l i e f p r o v e t h e o r y f u l l y j a n i u s a m c u r i o u s e x h i b i t s n o w s a n g t e l e g r a m m i s u n d e r s t a n d t h r o n e d o c t o r ' s y o u t h c o n f e s s i o n s e x p e r i m e n t s g o l d e n n e s s b r e a t h b o r n e m o d e l a s t a l p s h a r d s i c k n e s s s u g g e s t i o n s c o r r i d o r h a l f f e a t u r e s c o n c i s e l y d i s c u s s i n g t h e r e ' l l s y n t h e s i s p r e s u m a b l y c e r e b r a l p u r s u i t s a c c o r d i n g r e p u g n a n t w a i t t h o r n b r u i s e d s t a v e b a b y c r o a k e d p o w e r h o w e v e r p r a c t i c e b a y c o n b e f o r e r e m e m b e r s p e c t a t o r b a b i e s p r o o f i r e l a n d p h e n o m e n o n d i s t i n g u i s h l i g h t l y c h a i n n e a r e s t i n f a n t s t a t i o n p r o d u c t s e t s h e e p i s h p r a y e r f e m a l e c o m p r i s e d p r o f e s s i o n b a r b a r i t y e x p e r i e n c e d f o o l u n i n t e r e s t i n g d ' y e n i h i l o e x e r c i s e o b s t r u c k t e i g h t e e n d i f f e r p r i c e u t t e r m o s t d e s p e r a t i o n s p e c u l a t i o n l o u d e r s p e c u l a t i o n s f a k i r s r h o n e a m i s s s h i p p i a n o r e a l m s s e e m e d s o l i t a r y s c r a p i n g c o n s i d e r e d d o u b l e m a y b e s u p r e m e w h o l e s o m e f l o t s a m c r e a t e d b e d s i d e s o w e r b y ' s b i l l s s u p p o s e h o n o u r a b l e s e n s i t i v e m o r a l i s t s e a s i e r s t o o d d i s t r i c t a s k b i l l y ' s b i t t e r n e s s w i d o w t h o u g h t m e d i c a l n e x t a r o s e p a n t o m i m e t r e a s u r e p e e v i s h l y w h a t l i s t e n e r s s e r v e d h i g h l y r e s u m e d t h o s e t r a v e l l i n g r u l e e n t r e n c h m e n t s p a r t l y m e r c y d o u b t e d g i v e n d e e p t a k e g e n t l e m e n r e f o r m a t i o n a b b e y c o m m u n i t y f i n g e r c h i c k e n o b e d i e n c e w i v e s t h r o w n c o m p o s e u n i n t e l l i g i b l e m a i n c l o s e d c l a s s i c a l m p u l l i n g c o n t a g i o n s i r e m p t y m i s t r e s s s u b m i s s i v e l y s c r u p l e c i v i l i z a t i o n t o b a c c o a l m o s t l a b e l l e d a c c o m p a n y i n g a b o a r d u n d o u b t e d l y l o f t y s o m e t i m e s a c q u a i n t a n c e s l i t t l e c r e a t o r c o m m o d i o u s s o m e w h a t t w e n t i e t h n o t i c e s d e b t c r e a t u r e s e y f f e r t w r i n k l e d b o u n d l e s s s p e c t a t o r s b e c a u s e w e e k s p e c i f i e d g o w n c a s t l e s w a i t i n g t e l l s p o p u l a r n e c e s s a r y c e r e m o n i a l q u i e t l y f r i e n d s p r e c i s e s u f f i c i e n c y h o n e s t y d e c l i n e s c i e n t i f i c t e r r o r e c h o g r i d d l e b a r c h e s t e r f u n d a m e n t a l l y k e p t p u s h i n g d i s t r e s s i n g u s e t a y c o n d u c t i n t e n s e p e a c e s h o u l d c u l t u r e s q u i t e l o w e r e d p r o d i g y s o w e r b y d u t y c h i l d h o o d s t a g g e r e d t r u t h l a n d s c a p e o f f e r a c t i o n d e a n ' s l o o k s w o r s h i p p e e r a g e c r a c k f i r m o u n c e s m i s t a k e n o t i c e a b l e l o a d i n g o w n d i s m i s s e d w h e r e ' s w a r e e l e v a t i o n s e s p e c i a l l y w r o t e p r e s e n c e j o u r n e y s h a k s p e a r e w h a t e v e r d e n i e d e a r m e a d o w a i s y s u b j e c t w a s t e a p p a r a t u s a s t u t e n e s s p o n d e r o u s s p l a s h l a u g h e d g r a n t i n g a l t o g e t h e r c o u g h s e r v a n t s l e s t b e g i n n i n g s m o k e l e t t i n g c a u s e t r y d i v i n e o c c u p i e s h e r o e s t h e r e ' s o n e ' s k e e p r e a d e r s p o s t b o o k s h a r d l y r a p i d s l e e p y e n d u r i n g a f f o r d s s l e e k v i s c o u n t r e a d i l y p e r s i s t e d r e i n s u n f o r e s e e n l a p i n s t r u m e n t d e t e r m i n e d n o r i n t e n t l y d i s c o v e r v e x a t i o n p u p i l s a r t i s a n s s t a r v a t i o n p u r c h a s e d m o u n t e d d a y s i g m u n d l a i d g r e a t f e w e n t i r e l y e x t r e m e c o n s i d e r a b l y p o l e m i c a l g o d m o r i b u s l o w e s t r a t h e r e x p e c t p o i n t b r o t h m a n ' s a n x i o u s i n t e r c o s t i a l s p o n y d r a w e r s p h i l o s o p h e r s n e e d s s h o w m e d i c a t r i x r e f l e c t s s n u g c a m e e x p e n s e s p i r i t u a l i t y s p r i n g d e v o i d d e a l i n g s e c o n d a r y g e t t i n g c h e s t k i n d d e f e c t i v e s t r u g g l e s s p a i n f a n n y s t a l k e d d e x t e r o u s l y t h i n k s y e s b a g s f a i n t k n o w n o l d w i p e c a l l i n g r u n n i n r e s p o n s i v e r i g h t l y f u r i o u s l y c h a r a c t e r p e o p l e y e m o u n t a i n i m a g i n a r y c o n t r a s t t h i t h e r w o r k e d w a l k i n g y o u n g e r e x p e c t e d m e c h a n i c a l s e l l r i d i c u l e p e r f e c t e d o v e r l o o k e n t h u s i a s m h y p n o t i s m d o o r f a m i l i a r d r e a m l a n d t h r o u g h c a l l t r o u s s e a u d e l i g h t a l o n e d i s t r i c t s f l o w s b l a n k g a s p i n g d i s c o u r a g i n g m o d e s t d e s c r i b e s t o u c h e d u n e d u c a t e d s c u l p t o r e a t e r c o l o u r e d d e g r e e w e r e a r a b i n l i s t e n i n g t o n e d c h a r l e s o u t l o o k v a i n h e r e ' s w r e t c h e d d i s c o v e r i n g s o n g s p a g e s r e c e i v e d c o n g e s t i o n b o w e d u n i t e d s u p p o s i t i o u s s e a t a d v a n t a g e o u s w h i s p e r l a w t r a c t b o b f l o w i n g t e r m s h u m a n c r a d l e r e t u r n e d r a c k b a d t i r e d s h a p e a s s i s t a n c e l o n g i n g p u n c t u a l l y h u n d r e d m i d d l e d y k e s e n e r g y m a d n e s s u n s e l f i s h a b u n d a n c e v i e w c u r e t e m p l e e x a l t e d d e v e l o p m e n t o b j e c t y e a r s t e a e n d o w e d s p i r i t s t a b l e s b a g p e r f e c t i n g j u s t i c e r e d r i f f h o u s e h o l d l a u g h t e r t h r o e s w a k i n g l o o k m a n y p a s s a g e s c a r e t r u c k l i n g s h r i e k e d d i s t a n c e h e ' d d e s i r e s t r a i t e n e d s o o n t o l d i n t e n s e l y f a n c i e s p u r c h a s i n g m a r r y h y p n o t i z e r c a s e g a r d e n m a n a g i n g l u m p s d e t e r m i n e d a n g e r o u s r e s o l v e p e r c e i v e l i d s p a r s o n l a n d g o e s w e n d l o f t i e r d o c t o r s b e s t w e e k s p a r o d i s t s p e r s o n a t e d p l a y i n g n o r t h w a r d h o s t i l e r e m a i n c o m m e r c i a l i ' l l s a f f r o n c o n d i t i o n c a n ' t c o m p a n i o n ' s o p e r a s q u e s t i o n a b l e n a r r a t i v e t r u n k s s t r o k e d s t a b l e d e s i r a b l e p r e y e f f o r t s m e a n a r t e r y d a u g h t e r s s u n k e n a m o u n t s i m p r o p e r e n e r g i e s b o t h a t t a c h t e d i o u s n e s s w i s h c r e d i t e d s a t i s s i t t i n g b u t t e r a t h e n i a n c h o r d a b s o l u t e s t a t u e s m i l i n g l e a r n e d i n n o c e n t h o r s e g r a s p q u a r t e r c o r r e s p o n d e n t c o u r s e d s l e e p l e s s w i f e ' s s e c o n d p o i n t e d t r a d i t i o n a l v i c t o r i a b a t h h i d e t f a s h i o n e d f a t h e r i n c h g r a c e m i n d t h e n s c a r c e l y k i s s e s e v e r y w h e r e s t i p e n d p u l s e r e s t r a i n s u m m e r c l o u d r a p i d l y w o o d s s i s t e r r e m a r k d e s p i s e h o o k j i m c o n t i g u o u s r i c h m o n d f r a m l e y p o p e ' s s e c t i o n c o u n t i n g a l l n o b o d y p a s s d o o m s u m m o n f e a r i n c r e d u l o u s d i n e d o c u m e n t a r i l y r e s e r v e d p e r m i t t e d h u m b l e s t c h a r l o t t e b e d r e a m i l y a s s u m e s m e r e s p o k e q u a l i t i e s c a p t a i n d e l u s i o n s a n c i e n t s n o t i c e s i g n a l f a c i l i t a t e a c c o r d a n c e w i s h i n g p r e o c c u p i e d n o v e l s n e e r s u b j e c t s i n f o r m e d c h a n g e s p e r f e c t l y s p i r i t u a l i s e s d a n c i n b a s e d s m o o t h w a y s c a n d y t r e e s i n g e n i a l e x p e r i m e n t i n d e e d p o r t i o n o p e r a t i o n s b e l o n g s p r o s t i t u t e d o f t e n g r e e n a u d a c i t y m e s m e r ' s u g l i f y i n g d i f f i c u l t i e s o b e y e d l o v i n g h u n t i n g f u r n i s h e d h y p n o t i s t s e n t r a n c e t e n d e r d i v e r t c r o u p t h i n k c l a s p e d l i k e c o n f u s i o n c o m m i t t e d f o l k f o r g i v e r o c k s i n s i n u a t e d g l o w i n g c o n g e n i a l s m i l e s c r a f t g r o u n d s a m e w o r l d t r a n s c e n d e n t a l p o w e r s l o c k c o m p l e t e l y a s s u m e d w h o e v e r s l e e p s h a m e f u l h a r a n g u e d s o n g d e c l a r e d q u e e n b r o o m s t i c k u p d e g r e e s s u b j e c t i v e b r i g a d e d i n s e n s i b l e a p a r t r e c o m m e n d p o u n d s e d u c a t e r e c r u i t s e n d e a v o u r c o n f i d e n c e s e e m s e x p e r t d o u b t l e s s m a i l o v e r v o l u n t e e r s f o o d t h o u s a n d d e f i n e d c o m p e l l e d s i d e s g l o r y d e v o t e e s l e d g e c a r r i e d h a d n ' t l u x u r i e s d e v i c e m o r a l l y p r o f e s s e d e x p r e s s i o n a s s i s t s i n g l e d g o n e s t r e t c h e d l i t a n i e s b a r b a r i a n s p o c o c u r a n t e a l f r e d t e r r i b l e b r a s s w o r l d ' s b o y w a t e r ' s b e l i e v i n g p l a n e t s f a s t i d i o u s n o t e n j o y m e n t s p i t e h o w w o r k l o u d l y c o n c e a l e d p h e n o m e n a m i n u t e t h e m ' s m e e k w e l l s w o r s t f a c t o r s s p i r i t u a l a p p e t i t e s o l e m n t h e r e d o a b l e s h a r e d c o s t u m e e n g r o s s m y s t i c w e i g h l o s s m o r r o w s o u n d e d p o k e r v e r d u r e f u l f i l w i l l i n g s i g h t m u s k e t r y f l e e t c o m m e n t a r y h o p e w o o d u n e g o i s t i c i n t e n t i o n a n c i e n t r e l i s h e d o c c u p i e d t h o u g h t s c o m m u n i t i e s c o r r e s p o n d e n c e p e r s o n n o w a d a y s r e m i t d e s i r e d d e v e l o p e d s t a y e d i n d i f f e r e n t l y s t e a l t h i l y e x p e r i m e n t e d d i r e a l e s e a r c h i n g m i g h t y e l l o w e r n a t u r a l t e a r s i n c r e a s e d a g r e e d h o m e r r o m a n s p r e v i o u s l y s h e e t a s p e c t s s l i p p e d n o n r e f l e c t i m p e r s o n a t i n g p a n t i n g w e t t e d c e r e b r u m g r o w n a r d e n t h a p p i e s t p o s s i b l e a r m s c l e r g y m a n r e t u r n e n g l i s h u n h a p p y s h e d m e n t a l l y h e l v e t i u s g e n u s o r g a n s p r o d u c e w a n t s p a i d l e g s v a r i o u s g a i n s a i d t r u s t e l e m e n t s p o l e i n v e n t i n g i n t e r e s t e d c h a s e n a i v e t e s d o i n g i t s c u r a t e w a g n e r ' s n e w s b o y b l u s h e d l o v e l i e r r e s p o n s e w o r t h f a r e p r o m i s e a m u s e d o m e s t i c s t e p o c c a s i o n r a g g e d b i r d s m u r m u r d e c e p t i o n m i s f o r t u n e f a s t h o l d i n g p a r a d o x i c a l o r g a n i c i n d u c e d p l a t f o r m c r a y t u r e m o r a l i s t i c s e r m o n s c o c k e ' s k n o w m e a n t i m e s h o u l d n ' t c e n t e s p r i t t u r n e d g e n t l e m a n p e r c e p t i o n s i m m e d i a t e l y a d v o c a t e s a n n e a l e d o r a n g e s o n e s p o k e n p u e r f r o m a d d i n g n e l l y c o m p a r a t i v e l y h a t l o n e l y r e f i n e d a d m i r e b u n d l e s t a y i s l e e y e s b r a i n s t h i r d t u r n e n d l e s s d e m e a n e d g r a n t l y f a v o u r i t e o p i n i o n d r e s s e d h e a r t y b i g s m i l e d d e s c r i p t i o n s m e m o r y f u l l r e f u l g e n t c o n s i d e r a t i o n s m e t h o d g a l i a n i s a c r i f i c e r s h a k e s p e a r e ' s o v e r s h a d o w i n g s a v o u r e d h o n e s t s u m n e r v o y a g e a h g i l d p a p a ' s s e m i i d e a r e a l i t y s e c r e t l y r e m a i n e d k n o w s h e d o n i s m o r d i n a r y r e q u i r e d b o n d d e l u s i o n d i s t u r b a n c e c o m m o n l y v i e n n a a c q u i r i n g p a s s e d g r i n p h y s i c s c o n t r o l l e d r e s o u n d e d m i n d s w a t e r o r i e n t a l s i x t y i n a s m u c h g o t g l a d s h e e p e a r n s t e w k e s b u r y o b l i g i n g f i e r c e l y w r i t e r a r e m i g h t y p r e t t y r e g a r d e d w e d d i n g e a r n e s t p r a c t i s e t a s t e d h a v e n c a l l s w h e t h e r t h i s c o a x i n g l y f o r e s t v i v i d c o n v i n c e d s p i n s t e r m e e t s t u p i d i t y l o o k e d i n d i v i d u a l a b i l i t y r h y m e c h e e r f u l n u r s e r y a n g r i l y s h e a b s e n t d a s h e d f r a n t i c m e m b r a n e s m i s m a n a g i n g c o r r e s p o n d e d c a p t i o u s w o t a n s a r v e o r c o n d e m n i n g m o v i n g v e r b s o v e r h e a r s t a l l d e e p e s t s t a i r s a p p r o p r i a t e c a l m e r l a u g h i n g l y i n g s e n s a t i o n f e s t i v a l c o m p l e t e s u f f i c e c o n f i d e q u i e t h a p p i l y f a c u l t y u n f a v o u r a b l y w a s n ' t t i l l l o o k i n g e y e l i d s c o m f o r t c l a i m e x c l a i m e d a p p l a u s e r i d e s m u t t e r e d f o r g e t t i n g b o b b y ' s a c c e n t s u g g e s t i n g p r e b e n d a r y s t a n d i n g p e n c i l w a r t h i n r u s s i a m u s i c c h o s e f u r t h e r l u c y ' s f r o n t a p p e a r a n c e s y o u a b o u t b r o a d m o n t h f i n i s h e d a c c u s e s p r i m e v i c e p r o f e s s i o n a l b o a r d w o n d e r f u l i f d i d p e r s o n s r a i s e e u r o p e f a l s e h o o d a n g r y a c c o m p a n i e d r u f f l e d f a l m o u t h b e g g a r s i n g l e a m e r i c a n u n l u c k y i n t r o d u c t i o n p r o p e r a r t i b u s c h a p s s u s p i c i o n s i d e h a p p e n e d d o w n w a r d s o u t r i s e n p o s s e s s e d t h e y ' d p a r t i c u l a r i n d i f f e r e n t s a n k d a r e s a y c o n c e i v e e s t a t e h u m a n i t y o p e n s b a r l e y c a t h e d r a l s t o n e p r o d u c e d c h a n n e l r e m e m b r a n c e j a m e s c r o w d d r i v e l o n d o n w o m e n s a n g u i n e o u s c a u s e d p r o l o n g i n g p e r c e i v e d c h a n c e c o n j u n c t i o n r e g r e t t i n g p u t w h i c h p e r f o r m c l a i r v o y a n c e y e a r s h o r t c o m i n g s p r e j u d i c e d e n c h a n t i n g o b s e r v e l a r r y s t o p p e d p e a s a n t r y d i s p l a c i n g c o n v e r s i n g s c o r e a l l y b e h a v e s a c c o r d s t u d i e d e x h o r t e d b a l t i c r e f r e s h m e n t c i r c u m s t a n c e k i s s p l u n g e d m o s t d e l a y g u a v a p i c t u r e d b i l l y m o r e o v e r p r e t e n t i o u s l y t r i e d t h e r e o f s t i f f b e l i e v e d c o l d n e s s r e s t h e r o i s m u p r o a r i o u s l y e x i s t e d p r i n c e s s e s i m p r i n t e d u n l i k e l y i n t i m a t e c h i l d r e n s e n d m o u n t a i n ' s h e a p b r o k e n a c t i o n s s y m p t o m s f o r c e s d o c t o r p h r a s e b a s e s o r r y g o i n g p a s s i n g r a n k l a b y r i n t h s f e e l s m u t u a l i n e x p r e s s i v e n e s s e r e p l y e x a m p l e c o n s e q u e n t l y a l l o w w h i f f h o r r i d l y s u r e c h a r l a t a n p o t s u c h s e n s e n e w a d v a n t a g e a w a k e f r e e d o m c r u s a d e t r e m b l i n g a i n ' t a t l a n t i c o ' c l o c k b o u n d c a b i n p r o m p t e d m o m e n t s s p e a k e r s e a s o n s i n g f a k i r e x p e d i e n t p r o f i t s p r a i s e n i n e t e e n t h s h i n d y f o r e h e a d s c o t c h c o n s t r a i n e t h s u p p e r v a l h a l l a s a c r i f i c e h o m e r i c w h i t e e x p l a n a t i o n h y p n o t i s t s u f f i c i e n t d e a r s t u f f e x p a n d p e s s i m i s m p a t i e n t s n o r t h w i c k e d s t a r s c o r r e s p o n d i e n c e r e c r u i t f e a r f u l l y c h a p t e r e l e g a n t t r i c k p r i c e s s i x w i n t e r c h e a p s t r e a m r e v e n g e s t e l l a r d e a l m o o n b e y o n d b e c o m i n g r e p u l s i v e m y s t i c a l d e a n e r y s m a l l a h e a d y e ' d b e t t e r s w o r d s l i v i n g j e l l y b o l d e r v e r y p l a y e d t r e a s u r e s a c c e p t e d h e c t i c r e v e r e n d d e s t r u c t i o n o x e n b r o u g h t s p r e a d s m o r e i n d u l g e v a u l t c o m e h e ' l l o f f e r e d r i d e c o a c h c o n s e n t e d n ' t g e t s u b m i t t e d s o u n d s d e g r a d a t i o n s t a l k l a t i n i m p e r f e c t l y a l w a y s b r e a k f a s t n e i t h e r c r i e d s e n s i b l y v a s t f i e l d p l e a s a n t c a r r i a g e g a v e f a l l i d e a s t a x e d l i v e s b o n b o n s u t i l i t a r i a n s s e v e n t y t i m e m y s e l f r a y s o n m a y c o u n t y s u n d a y s a r s t h e r e f o r e p r i e s t s g r o u p p r o m i s e d c o r d i a l i t y c o n s c i o u s n e s s s o u l m y s t e r i o u s c h a n s o n s n o n e c a s t o n h i g h b o b b y h i t h e r t o f a t h e r ' s c i r c u s o u r t h u s n i n e t y l i g h t s t r o n g l o n g o p e r a t e s r e v i v a l t o l e r a b l y r e p e a t i n g c h a m b e r u l t i m a t e s o l i c i t e d r e s p i r a t i o n l i q u i d m e g a n t i c l i k e s a n g l o i n v i s i b l e o p e n m u l t i f a r i o u s n e s s c h a o s u n d e r s c a l p r e a l l y p i n c h r a r e r a g r e e a b l e o b l i g e d d i s g u i s i n g g e n t l y f a i r y r u i n c o n c u r r e d w r a p p e d o f e x e r t v a l e s f r e e l y f i x r e s p e c t a b l y m a l i c e m u c h p a r t i c u l a r l y b e t w i x t d r a w i n g n i g h e s c a p e d p h i l o s o p h i z i n g h y p n o t i c h y p n o t i z e d m e a s u r e u n d e r s t o o d m e a n w h i l e s u g a r b r a v e r y g r e a s y g r a n d c o o k e t o r r e n t ' s s u b l i m e r p l e b e i a n c o s t u m e s h e a d e d n o n s e n s e d o e s n ' t s t a t e g r o w i n g o b e i s a n c e d r o p p e d b e e n f i r m l y r e g u l a r p e t t i n g p r o f o u n d l y m o t i v e s s t o o p i n g b e i n g s p o s s i b i l i t y h y b r i d h o u s e h a r m o n y l a m p v i r t u e d i f f i c u l t m o d e r n h i g h w a y u s u a l e u d a e m o n i s m s c h o l a r s e n t i m e n t s e e i n g i n n e r m i n e w i n d r e p o r t s b a s i s f a i l t r a y n o r b l e s s l o d g e d d i n n e r w a l l s s e v e r n s a n d y a l r e a d y i l l s i x t e e n c l o s e r e s i d e n c e s h a r p l a k e t i d i n g s h e s i t a t i n g l y c l o t h i n g o b s e r v a t i o n r e c o g n i z e d t r a v e r s i n g d a u g h t e r ' s w i t h d r e w g e n e r a l l y h e a v e n ' s u n c o n s c i o u s m u s i n g c o n f e s s c h a n g e d u g f o r g e d d o m i n a t e w h e r e b y f r a n c e l e a s t o v e r s p r e a d k n o w l e d g e c o r i n t h i a n s t r u c k p i c t u r e c l o t h e a y s t a i r p r e s e n t s t a t e s s o n d a m a s k c o m m a n d e d n a p l e s i n s t a n c e b e c a m e s n a p p i n g w e n t r e g r a d a t i o n s t a k e s c h i l d k i n d n e s s b o o k s h e l f s t a r t a r i s t o p h a n i c f e e l q u e s t i o n e d l e t t e r s f r e q u e n t l y a p p e a r r e m a i n s s e l l i n g h y p n o s i s f l u n g i r k b a c k w o r n h a n o v e r i a n s s t i m u l a t i o n k i t c h e n y a w l h a p p i n e s s h i g h e r g o p r o x i m i t y c o r n i s h h a n d l e s p i r i t s o f f e r s r e l a t i o n g a m e q u i n c e y ' s s p e a k i n c o u l d h a n d s d e v i l r y j u d i c i o u s l y p o t a t o e s t a l k e d c l e r g y m e n b o u g h t e v i d e n t b o w a d v a n c e c r i m e t w o e d u c a t e d e a t e n g u i l t y t o w a r d s o c c a s i o n s r i d i c u l o u s a f f o r d i n t e l l e c t u a l l y c o u n t e r h a v e n ' t e a r s c h e e k s f a t n i g h t u t i l i t a r i a n i s m e a g e r r i g i d c o r n e r a r r a n g e s t a n d a r d a r i s t o t l e p u t t i n g w a l e s p i c k e d w e f a c e s u n a b l e i m p o r t a n t h e a d i n s i d e s o l d b e d s h o e d o m e e x p e r i e n c e b e l i e v e t h e y m a g n e t i z e r c o l u m n s p i c k s i n q u a c k r e c i t e d o m i s s i o n f i n d s o p e r a t o r s a r t i s a n w i n d f a l l s y o r k v a c a n t d e r v i s h e s f a n c i e d a n i m a l b o b ' s i m p a t i e n t h i s p r e b e n d p r e s s e d c h e e k h a n d i n g m o d e s h o g g l e s t o c k a m o n g c a n n o t l a r g e p r o f e s s i o n a l s t h r e w p r o c e s s s e n t i m e n t s d o n e b e i n g c o n t e n t s e r e n e b e a r i n g s t a r t e d p e r i o d l o r d ' s m a r t h i e u ' s w a n d e r i n g s i n c r e a s i n g c l u n g b r e a d r e f l e c t i o n s o t h e r w i s e u n d e f e n d e d s p l e n d i d h o p e s b e l o n g i n g s o u n d b r u s h y i v o r y e q u a l l y s o m e o n e o r d e r e d s o r r a t h r i v e s d i s t r i b u t e g a u n t t a l e u n c e a s i n g d i s m a y s a d g l a s s y a r t i c l e l u f t o n ' s e v e n o ' e r c a s t s s h a l l o w n i n e d i f f e r e n t c l o t h s m i s s a p p e a l s a n d r e w s w h i p o ' e r f o r g o t s o o t h i n g w e s t u n j u s t g r a m m a r o d e w r o n g s t r e n g t h m a r k e d d r i n k g a r d e n e r i n j u r y e a s i e s t p e r f o r m a n c e s t o u c h c o r n w a l l m y s t e r i e s t e l l i n g r e t r e a t e d w a l l a k i n d e e d s i n s t e a d t a s t e s w r e t c h s y s t e m s m a t c h l a n c e t a f o o t s t o n e h e n g e s t a t i n g s n a k e s e x h i b i t i o n o a r s t o p r e s e n t a t i o n l u f t o n s e a t e d n a t i o n a l d u e w o r t h l e s s s p e c u l a t e s u p p o s e d a v e r a g e g r o o m d e s i n t e r e s s e f i r m a m e n t c h o o s e c o m m e n c e d t u r f g r i e v o u s e n t r a p p e d f a r m e r e q u a l i n t e r v a l e v i d e n t l y p h y s i c s u p e r f l u i t i e s h u m o u r p r o v i d e u n t i l p r o p e r l y d u t i e s e x c e s s a r t f u l d i r e c t i o n d e s t i n e d s i g h s t e a d i l y g o o d y i n t r o d u c e r e t u r n s p i l l o w m o r a l s p r o g r e s s e d r i p d i s c o o r s i n c h e e r f u l l y o p p o s e d g r i s e l d a d e m i d i s t r e s s a m b i t i o n c u l t u r e d s t o r y r e s i s t v i s i t s s l a v e r y d e c e i v e p h i l a n t h r o p y h o n o u r d i f f e r i n g m o u r n f o r n i n t d r o w n e d d r i v e n h u s b a n d s a t u r d a y s c o i n c i d e n c e s e n a t e u r h a l t m a n a g e c o r p o r a l ' s f o u n t a i n u n d e r g o i n g l i f e a l o n g c o m p l i c a t e d g e s t u r e l o s t d a n c e p u r c h a s e r m o r a l i z i n g c u r i o s i t y g o d ' s b e a t e n c h i e f d e c i d e d a p o l o g e t i c a l l y c l a s s e s g e n u i n e m o t o r s h o p f o r t u n e s u n s r o a r r o c k i n g s u i t i n g r e d r e l a t i o n s h i p s p u n i s h t e n d e n c y h o r a c e f u l n e s s a r o u n d r e p e a t e d p s y c h o l o g i s t e x p r e s s l y s i n c e o c c a s i o n a l l y f o u n d e n d e a v o r s t w i c e o b s e r v e d l i s t e n e i t h e r o m i t t e d f r i g h t e n c a l m l y c a r e e r s t e a d y s c e n e t e a r f o l l o w e d s a t i s f y b a s k e t s p a n g l e s f r i n c h t i t l e b a c k w a r d s d i e s h a n d k e r c h i e f s e d u c t i o n o w i n g i r r i t a b i l e s a f e d i s c u s s i o n s z e a l s c e n e s c h a f f e d c o a t p a t i e n t v i s i t b l u n d e r s f i t t e d f u m e s w h a t ' s p u r p o s e d o c t r i n e g i l d e d s a y s h a d e d b o x a r r o g a n c e r o m a n i z e d d a m d r a g o o n c o n v e r s e d a d m i r a t i o n e x t e r n a l a n d r e a d f u l c o n s o r t h e i g h t e n i n g s i t g a t h e r e d f u r y m a s s c a r e f u l d i g g i n g w o m e n ' s r e m o t e m i l k g o r g e d o l i v i a w e l l m a l v o l i o b o u n d s p i n b u l l c u l t u r e d i s c u s s i o n m i s f o r t u n e d b r i n g s i l v e r n e a r i t c h i n g f i v e a g e n t l i k e d t y p e l a w y e r p a p a t h o u g h t f u l p o o r p e e p e d w a l k e d c l e a r d i s c l o s e s m o c k i n g n a m e l y p r e a c h e r s o p e n e d c l a s s p e r s u a d e t e m p o r i a l w a s h i n g t o n g r a d u a l l y h y p n o t i z a t i o n h u n t o r i e n t s o m e r s e t s u b j u g a t i o n p e r s p i r a t i o n d i v e r s i o n f o o t s t e p s b e l o n g e d t r e a t s t h e e x p r e s s e s h e r s e l f t e n t l e a v i n g s i g h t e d h a n d i n n o t e t a l l h e r o d e a t h p r i n c i p l e s r e a s o n o p e n i n g h e r s d a u g h t e r s e d u c t i v e o b v a g u e h a v e a d d r e s s e d e n d s i n c r e a s e m o r r i s f i n d c o r p o r a l i n q u i r e i n m o s t f a c e i m p a t i e n c e f l o g g e d l o n g e r c r a w l e d s u r g e d o r i g i n a l e n t h u s i a s t i c o p p o s i t e y e ' r e h y p n o t i z i n g s u r r o u n d e r s p e r w e a t h e r i n c l u m s y f r e s h a c c e s s s e v e r a l b u y c o n c e r n i n g u n t r u e a n d s u c h l i k e m o m e n t f e l l o w t h o r o u g h l y s e n d i n g g r o a n i n g q u a n t i t i e s d o m a i n o u r s e l v e s s a t i s f i e d l o t s h o r t h o t l o v e ' s m i s t e r s m i l e t s h a m e n e w s p a p e r s g o l d s m i t h ' s b o d y a c c o u n t a b l e m y d i s l i k e f l o w e r s s t r a n g e a p p e r t a i n a d v e r t i s i n g o p p o r t u n i t i e s m i n u t e s u t t e r l y s p e n t r e s p e c t r e l i e v e d m o r a l i t i e s r o c k y a n s w e r s d e s c r i b e a c t i v i t i e s f o r w a r d s d r o p p i n a n o t h e r y o u n g d e n i a l e n j o y e d s u d d e n l y h e a r t i e s t t o n e d o z e d s a n c t i t y l i t e r a t u r e n e v e r t h e l e s s s u b t l e f o u r s u b t l e t y a f f a i r c o w l d c l a d r e l i n q u i s h e d o p e n l y p s y c h i c c e n t s l l r e s p i r a t o r y t a l k a r i s t o p h a n e s h y s t e r i c a l e i g h t s i x t h i r r e g u l a r w i f e b i t y e t m o t h e r s c a n t y d i s i n t e r e s t e d l y b u t c h e r ' s c r u s a d e s l o r d p a t h s p e a k i n g g r a n d f a t h e r s s u p p l i e d o b e y i n g c o m p a t i b l e c h e r i s h r i s e a d d i c t e d e f f e c t s p r e a d a d d i t i o n a l p i n i n g g o d s e n d s e x t e n t h e ' s a p p r o a c h e d a t o n i n g w h i l e f l o r e n t i n e a p p e a r a n c e s p e e d y a p r i l e u r o p e a n s e y e v a l u a t i o n s s a t i s f a c t o r y a r m s c h o o l w o r l d l y m e r e l y l a d i e s d i s t u r b e d m e d i t a t i o n s e v e r y t h i n g t r u s t e d w o n ' t r a i m e n t a f t e r w a r d s c r o w d e d k n e e s r i d i n g p r i n c i p l e m a t t e r s c r e w i n g p r o v e s f i n a l l y p a y p l e a s i n g b u t l e r g i v e e a r l i e r s h o o k p r e s e n t l y a l b a n y e n o r m o u s l y s o m e f o r m s a f f e c t e d g l i d i n g i n t e n s i f i e d w a n t c l o s e r g r e e k c a l l e d k i n g c o n t r a d i c t o r y a n x i o u s l y r a i s e d s n a r e s r e a c h e d w i t h o u t a s k e d f a s t e r c h r i s t i a n m e r i t s m e l a n c h o l y s i n c e r e n o a u t u m n p r e s u m p t i o n l o v e r a f t e r c r a g g s a r t h y p n o t i z e i m m e a s u r a b l e a s s u r i n g d i l a t e a p p l i e s v e n t u r e w o n d e r s c h o o l s u n f a l t e r i n g s u b s e q u e n t p u z z l e d s o m e h o w h o l l a n d i a u n n u m b e r e d o n c e e v e r y f o r m c o o l e r t o g e t h e r p l u m s w a l k s a p p o r t i o n e d t h o u g h c h a r m s r e t u r n i n g c e n t e r t i d e a f t e r w a r d b e n t h a m s a w d r o w s y f i x i n g h e r b e r t d e s i r e s l o v e s s h o w e d t h i n g s a c r o s s f o n d l e d l a t e r w i d e s t o r m s b l i n d i n t e l l i g e n c e b o d k i n s k i n d s h e x a m e t e r s m a i d e a s y v e s s e l s f a i x g i n g e r b r e a d r e d u c e f o l l i e s p l e n t i f u l w i l d a d m i t r u n m e e t i n g d o l l a g a i n h i s t o r i c a l k i n d l y p o o r l y a n s w e r e d c o m m u n i c a t e s m u f f f r i e n d f r o p r i c k i n g e n t e r t a i n m e n t c a u s e s m u n i f i c e n c e m e r e c u r r i n g u p r o o t e d g r e a t e s t c o n c o c t i o n c o a r s e a u t h e n t i c p r i d e t w e l v e s h o w m a n r i n g i n g d i d n ' t i l l i g a n t m i l f o r d b a l l a d s s t i l l d e c o u r a g e o b v i o u s i t ' s s u c c e s s f u l b a l a n c e r a t e c o m m a n d u n c o n d i t i o n a l l y m o r a l i t y w h i r l e d f a c t h e r e a f t e r u t t e r c a n l o v e h u g r e q u i r e s a s i d e t h u n d e r s t o r m d o e s p h r a s e s c o n s i d e r p r e c o c i o u s z o l a ' s e v e n t s l o u r d e s i n w a r d c o n s t a n t l y w h y c a n v a s d r i n k e r c l e a r e d p l a i n w a r d m a k e t i m b e r s c h e m i n g p o e t s s l u i c e o n e s s i e g e w a s h m i l t o n e a s t m o r a l w h i t h e r e a t i n g c e r t a i n f i n e a t t e n d a n c e d e s c r i b e d t e m p e r a m e n t b e g g e d r i c h e s m e n e l e v e n s i m p l e u n c l a s p d e m o c r a t i c p a r s o n a g e t u n e d r e a m m e c h a n i s m o t h e r m a n i f e s t a t i o n s n a r r o w a t r o b a r t s t u r n i n g c o n v u l s i v e s a l t p e t r e d e e p l y c l e a n d w a r f s t e a c h i n g r e l i e f a b l e p a p e r s c u r a c y h a m m e r p r o u d l y m e d i o c r i t y v i r t u e s a n y h o w h a r d n e s s h a v i n g d i s t u r b s t e n d r e a d s a c r e t d i s c i p l i n e o f f i c e l i b e r a l s o f t l y p r o t e c t m o t l e y c o m f o r t s h a i r c o n t r a r y s e n s a t i o n s c r e d i t u p o n f i n a l t i n g l i n g m a s k s p i r i t u a l i s i n g a i r r e s p e c t a b l e g r e y l e d b a r e c i r c u m s t a n c e s g a l l o n e n g a g e d h e a r t h s t o n e g o o d n e s s c o n t r o l f o o l s y e l l o w i s l a n d h e a r b r o t h e r ' s p l a c e d a m u s e d j u s t p r o f o u n d n u m b a c c i d e n t a l b l e s s e d w i g s s e a r c h c h a r l a t a n i s m c r a c k l e d h e a s k i n g p r o t e s t a b s e n c e g a z e t e n d e r e d b u r s t s p a r t d i s c o u r a g e m e n t i n c i d e n t a l l y q u o t e f i n e r y t a k i n g h o l d a b o u n d i n g s c i s s o r s w e a k e r e x p l a i n t o o m i s f o r t u n e s s h i p s d e m o n i a c a l b e n t l e t r i g h t s o r t w i s h e d t h i n g p r e s e n t s g l i b l y t i s s h i n e o r d e r f a i l e d l a y c h i e f l y e x p r e s s e d s e e n s c a r r e d i m p o s s i b l e r e g a r d s d e a f e n i n g l a u g h r o o m d a n d y a c c o u n t s f o r m e r l y i m p e r s o n a t i o n s m o o d p o p e h e i r e s s a c r e s n e w s w o m a n c o u r t i n g j o v e a c c o u n t w a n d e r e d l a t t e r a d o p t e d w o u l d n ' t s t r i v e a f e a r ' d b a d l y c o u n t e d i n v o l u n t a r i l y t h e i r g i r l s j u m p e d s t r i c t e s t h e a l e r s h a s v i r g i n i a a p p e a r s c u t r e d u c e d b l i s t e r r e m e m b e r e d p r o m o t i o n i n j u r e d c l a i m s f r o n t a l n o r m a l h e a l t h p r o t e s t e d b o l d l y s p e n d i n g m o o r i s h r e l i g i o n b e c o m e s f u t u r e s e x h u r r y s e m i c i r c l e f o o t e x p e r i e n c e s f e e d n u r s e s s i t u a t i o n c l i n g c o u r t s h i p g e r m a n s p r o p e r t y d i s a z e a w a y p r e v a i l e d v o i c e s o n ' s u s e f u l n e s s m o v e d p u c k ' s s p r u n g r e a l i s t i c n i n e t e e n s u n k e a s i l y p e r q u i s i t e s g e r m a n y f e e l i n g p r e c e d e n t i n f i n i t e l y b r o t h e r r e a c h i n g w i n e i n f l u e n c e i s n ' t e n e m i e s p e r f o r m e r s p e r f o r m a n c e e f f i c i e n t o u t w a r d e x c e e d i n g l y c h u r c h c o n c e a l m e n t a n y t h i n g c o m m e n t c a n e u n c o m m o n g r a n d e u r t r u l y b o o k d r i n k i n g t i l l a g e s a m p l e l o v e l y r u n n i n g s a x o n s w a s e n t e r e d a s t o n i s h m e n t b r e a k i r i s h p o e t ' s w i t n e s s m e n t i o n e d g r u m b l i n g m a n n e r r e t i r e m e n t c a s u a l s u p e r h u m a n s a y s s i c k n a t u r e s m a s t e r ' s h u n t e r i n s t r u c t i o n s i m m e d i a t e r e s u l t s h e r c h a i r a c t i n v e n t i v e n e s s s w e d e n b o r g m a k i n g b o t t l e s h o u s e s p o e t d a r k l i e i l l n e s s a g e d l e g i t i m a t e p u l l s t a t e m e n t s c o m p a n i o n i n t e r e s t s t h e r e b y m i n g l i n g s e i z i n g r e f l e c t i o n f r e n c h w i s h e s f a c i n g t e n t s b r i t a i n a l s o t e m p t e d s u p e r i r o n p e a s a n t t h i n k i n g s w e e t h e a r t s f o l l o w i n g r e q u i r e p l e a s a n t l y m e t a p h o r v o l t a i r e a n s p a r e f l o o d d e l i c a t e c o n f i d e n t t h i n k e r c o o p e r ' s t o d a y p r e d e c e s s o r v i e w s o p e r a t i o n d i n o c c u r r e d c o n t r a c t e d s u f f e r i n g h e r e i n c e n t i v e f o r c e b u l l s s o l a c e i n c o m p r e h e n s i b l e t r a i n n u t s i n q u i r e d p u c k s p e e c h e s t e e m d i m i n i s h i n g i n o r g a n i c i n t e l l i g e n t q u i c k l y m u s c l e n a m e e n d u r e s t r i p e d p a t t e r n r e q u i r e m e n t s t a l k s a t t e n d e d t e m p o r a l i g n o b l e v o i c e s l e a v e s n i h i l m a s t e r d a n g e r f l a m e s b o a t g i v e s l i b r a r y p e n n i l e s s a m e r i c a h u s h i ' m w o r s e s h a m e f a c e d g l a n c e m a n t a c t d e l i b e r a t e l y t r u e c l o t h m a s q u e r a d e s d i f f i d e n t t i r e b r e a t h i n g w e a t h e r r e j e c t p a s t t o s s e d l i n e p r e c i s e l y s t r o k e w o r k s u s e d a p p e a r e d c o n t i g i t g i r l r e a l w o o i n g d o u b t s i m i l a r i t y d u r i n g t e m p e r c l o c k e d p e n k n i v e s o m n i u m e x c i t e m e n t i n t o v i c t i m s r a n d o m s t r i p e s d i v i n i n g l i k e n e s s c a p a b l e i m p o s t o r h e l p w a y s p o i l e d p r o p h e t s s a c r e d e n g a g e m e n t s s a y i n g e x t r a o r d i n a r y i s p r o v i d e n c e c r a w l e y d r a w o p p o r t u n i t y i m p r o v e d g l a s t o n b u r y s e a c i g a r e t t e e x h i b i t p r o b a b l y b e h i n d h o m e s w a i n s j u l y c h a i n s d i f f i c u l t y c l a i r v o y a n t i f a c u l t i e s c o m i n g r e t a l i a t o r y r e s p o n d e d t i m e s e m p l o y e d b r o w n g l o u c e s t e r s l u c y s c o r n a u g u s t v i c i o u s m u s h a c r y i n g c a r b i n e e r d w a r f g u a r d t o m o r r o w g o o d s r e p u l s e d r u b b e d o p e r a t i n g c a s e s g a y s a i d u n k n o w n s u b j e c t ' s i n f i n i t e p r o d u c i n g h a s t e n e d h i m g i v i n g e l s e l a d y n e v e r c o n c e r n s i l e n c e m e a n l y a s c h a n c e d f i n e r r o m a n t i c s h o o t t h e s e i t s e l f d i s p o s e b e t r a y s h u d d e r i n g a r e n t s c h i l d ' s n e e d a r t i s a n ' s e s s e n t i a l l y s t o p t o o k c o n v i n c e i g n o r a n c e p a t i e n c e p r i v i l e g e d s h a l l r e f i n e m e n t d e t e r m i n e s m o v e m e n t s m o n e y a c c u s t o m e d f o o t i n g d r e a m t c o l o r s r e l a t i o n s a v o i d d i s c e r n m e n t s t o c k i n t e n d e d r e c o r d s l i v e r i v e r d e v i l a m u s i n g f i d d l e i m p l i e s a n s w e r i n t e r e s t e m p l o y m e n t e n g a g e m e n t m a s s i v e h o r s e s s i n t r y a w a r e p r e f e r m e n t u g l y f e v e r t h e m s e l v e s w r i t t e n m a k e s c r a w l e y ' s b e l i e v e r n a t i v e m o n t h ' s l a d e n e x t i n g u i s h l o u d h u s b a n d ' s t w i r l e d h o n o r f u n n y l e a n e d m a m m a p l a n e t s u f f u s e d c o m p a n i o n s f a m i l y i t i n c l i n e s a b u n d a n t g a l l e d h e a r d f a r e w e l l a u t h o r i t a t i v e a z u r e o p e r a t e d s w a m p l a b o u r u s e f u l c u s h m a n r e j o i c i n g b a n k r e a s o n s h a s t e g a r m e n t a u d i e n c e w e a r i n e s s t h r e e w o v e n h o a r s e w h o i n c o m e s p a r i s s t u p i d s w i r l i n g c a r g o d e c e i t e m p r e s s e s a c t u a t e d w a t e r s m a r v e l l o u s l y l a n g u a g e d a s h o n i n v e n t i o n g r e a t l y s g i r l ' s m a n a g e d i n t e r r o g a t i o n n e g l e c t e d a s s ' s d a n e s o p i u m t y p e s d o m i n a t i n g i m p r e s s i o n p r o v i n c i a l i s m v a n i t y r e s u l t h e a v i l y a r i s e s e n d s c h o l a r s h i p r i c h r e s p o n d e n t e r t a i n i n g t a s t e r e p r e s e n t c h i a j a s u f f e r e d s u p p o r t c i v i l i z a t i o n s m a t t e r s o b j e c t e d m a l e y o u r o w n e d e x i s t s r e s i s t e d b r i t o n s l i n e r f a r g u i d e d i s c o v e r e d u n w i l l i n g c u r l y c o t t a g e s h a r e o b l i g e i n v a r i a b l y i n s t i n c t d i s g u s t a p p r o v a l o p e r a t o r c a r e f u l l y n o r t h m e n w i t h i n m o t i o n f o r t i f i e d s h e f f i e l d f o l l y m a t e r i a l o f f e r i n g l a n d l a d y b a r o c c o w e a k t i p p e d p u r i t a n m e l l o w t h r a v e l s i m p a r t c a r r y c o u n t r y p o r k p l a c e r s t r a i g h t r o s e w h o l e i m p a d e p r e f e r e n c e s m a t u r i t y s p e a k c o u l d n ' t d u k e w a n t e d s e e m i n g l y e n o u g h w a r m l a k e ' s b r a i n g a t h e r m o i s t e n f o r p h e n o m e n o n s a r m e d s n o r e d p r a c t i c a l l y a g e e x p l a i n e d f a r m y a r d h y d r a u l i c d r e w w o o d e d d i s e n g a g e c o n v e r s a t i o n n a r r o w n e s s h u r r i e d l y b r o o m p o p u l a c e d e a f p u r e l y g l o r i f i c a t i o n s h i s t o r y c a r n i v a l h a s t i l y e f f o r t s u g g e s t i v e u n a p p r o a c h a b l e e a r t h i n s t i n c t i v e b r i g h t d i v i n i t y s c a l e c l a y m u s t s e n t w h o s e a b s o r b e d h e i g h t g a z i n g s t r o n g e s t s y s t e m w a r s u b t l e r l i s t e n e d u n e q u i v o c a l b o y s j o y e n t e r i n g s e l d o m a d m i r e d w e a r s t o r m f e a t s o u t r a g e o u s s h a k e s p e a r e f l i n t e v e r y b o d y w i t c h m a g n e t i c a l t h o u g h s u c c e s s e s t h e e d i l a t e d e v i l r o a r s d o r e a l i t i e s q u e s t i o n c u l t i v a t o r b u s i l y u s i n g s h o w i n g b r i n g i n g d o w n l e t h a r g y a t t e n t i o n r o l l g r a t i f i c a t i o n c o n t e m p t s u g g e s t e d p o c k e t b e d r o o m r e g a i n e d s t a n d b e a r p l a u s i b l e m e a n i n g t e l l o r g a n b r o k e p u r s e s o n s y b a r p u b l i c h a l c y o n c o n n e c t i o n f o r c i n g s i n k i n g d i s g u i s e d e v e r e s t a b l i s h e d e m p e r o r s a s l e e p y o u ' r e s l i g h t r e p l i e d b o n h o m m e l e s s o n d i e d d e l i g h t e d f r e q u e n t c a s t f i l l e d p r o b a b i l i t y c o v e r e d h a d u n u s u a l r i d f a s h i o n f i r e e m b o d i m e n t u n f o r t u n a t e l y p e r f e c t i o n d e s e r v e f e e t a t t a c k e d f i r s t r e a d i n g b y a u t h o r i t y c r y c o c k e c o l o u r s c o l d m o n u m e n t s s u m s e r i e s s e p a r a t e r a r e l y n o t h i n g b l e n d e d b o x w o o d f e l l o w s w o r d s e n g l a n d a f r a i d c o o l i n g t o n g u e m a t r i m o n y r a c e s s w e l l w i d a n y d i s t i n g u i s h e d m a g n e t i s m w a l k a c k n o w l e d g e d s u c c e s s i o n w h i s p e r e d b l e e d i n g w o u l d m e m b e r d i s s a t i s f a c t i o n t h r o w i n t e l l e c t u a l t a b l e c h i l d r e n ' s t r y i n g b e g i n n i n r e l u c t a n c e g r o s s l y g o o d i m a g i n a t i o n b e c o m e b e a u t i f u l g u n t e r o t h e r s f l a t t e r m a s c u l i n e t r o u b l e b e h a v e d p e r h a p s m i t i g a t e d f a l t e r e d h a p p y w a i s t k n e w t a m c o u n t e n a n c e l o d g i n g s c o a l s c o m p u l s i o n e a r l y s u r p r i s e d d i m i n i s h e d s h o w s a g o a c k n o w l e d g m e n t p e d a n t p r i v i l e g e e n c h a n t e d b a n t e r p r o b a b l e p r a y d e s t i n y p e r v e r s i t y i n c u m b e n t r h i n e l i n e s b u r i e d w a s t e d b a t h e d s e v e n c o a s t s e c o n d l y r o s e t t e v e r s e s p r e p a r e d s w e e t h e a r t s t e r n c r e a t i v e a p p r e c i a t e c a u t i o u s w h o m d e l e c t u s h a n d s o m e m a r r i e d y i e l d i n g k i n g ' s r e m i t t e d s h o r e f i e l d s f u l l e s t d i m d e s c r i p t i o n c o m p a n y l u c k y m i d b e e r c o n s c i o u s d e c r e a s e d p r e a c h e d i n c l u d i n g p r o t e c t e d t h a t ' s f o r m e r m e a n t j o i n t ================================================ FILE: data/smoke_test/vocab.pruned.txt ================================================ spot word jaunty nearer heavy bell flint's moralist resolved eighth european mouth missus moss party pale mill celts dispensed frankly sympathy mad flattered devils vomit continued leave philosophy indemnity waited net tested saxon protective glitterin previous dead learn forth letter cares above excellences flaubert grammont employments preparatory exhausted gravely voltaire fifteen intimacy reasonably mire eggs humble something damage poetry mingle low stick v coverless fell met silent casts troth only lived us reasonings gait seventh humbug striving habit general taken attracted drained worthy secret arrive off clouds hand them ingenuus inevitable eagerly melody cunning voluntarily gold blood than conscience breaking nature color attitude where disposes storeroom imperfect anger a systematically relieve packed pleasure flatteries slur acceptances peculiar bestowed labyrinth arrived ventured society affairs afternoon wheels prince chime each beats distempered naturally personal reputation evening vaste madame l insidious cetera smothered clothes noticed wonders blue suggestion forrest morning meditated artillery passes impose certainly business fathers nay intoxication unexpected stroking but repeat disturb possibly oh accept lips please heartily acquisition enjoy accentuation accounted sweet fixed definite vigorously problems follow manifested fastin delays drawn evening's islands between notwithstanding terribly passion ready superimposed expectations religious respects self emotional made invariable contemplate effects immoral rested mainmast strings design capacity added unload don't greater special left less ought inexpressive drama charity excuse foreign other's style congratulations enmity fair thirty so top diaphoresis forward stage outside grew scowl free port grenadier understanding strained services disagreeable when anomaly blame days merry complaisance obtained fluid medium i'd privileges rag haggard inaudible devotion unconsciously rock honesty' round severe amount tankard painfully enclosures settled pleased yourselves shelved hours chaste hereditarily comes marvelous yea inconsistency hardest steps riddle suit applications profundis sanctified ex sins noise uttered discoverer noble rider cross awoke himself means felt understand spanish defending express skies twilight cornered prospect gods soever are redoubled machine twenty mix judging utmost eventually exciting beg lunch senses o'shanter considerable infantry edge clerical italian angel dreams tension mirth filterin struggle will define ladyship d'epinay objective unconsciousness we'll wholly artist's lodging pile number bedclothes meat roasted firstlings comfortable slow supersede shred course places against securing problem sat lose cures income visions ponderously depends concurrence began sake eve's drown conscientious waves assent thence false freethinker acquaintance stare mark packet shaftesbury hearers nicely substance extends stared haired wound charioteer see thy fingers silly musical fit glazed charge directed encouraged parish deceiving starting gallantry plainly pardon manifest fault hasty slumber piteously evoke with nearly heart that mankind disposing british interesting floor pain absolutely holy closely utility bristol importance recovery soft fig tuppence century expensive edition adam's supply sons wire shields guided beating hour cheaper ten belief prove theory fully janius am curious exhibits now sang telegram misunderstand throne doctor's youth confessions experiments goldenness breath borne mode last alps hard sickness suggestions corridor half features concisely discussing there'll synthesis presumably cerebral pursuits according repugnant wait thorn bruised stave baby croaked power however practice baycon before remember spectator babies proof ireland phenomenon distinguish lightly chain nearest infant station product set sheepish prayer female comprised profession barbarity experienced fool uninteresting d'ye nihilo exercise obstruckt eighteen differ price uttermost desperation speculation louder speculations fakirs rhone amiss ship piano realms seemed solitary scraping considered double maybe supreme wholesome flotsam created bedside sowerby's bills suppose honourable sensitive moralists easier stood district ask billy's bitterness widow thought medical next arose pantomime treasure peevishly what listeners served highly resumed those travelling rule entrenchments partly mercy doubted given deep take gentlemen reformation abbey community finger chicken obedience wives thrown compose unintelligible main closed classical m pulling contagion sir empty mistress submissively scruple civilization tobacco almost labelled accompanying aboard undoubtedly lofty sometimes acquaintances little creator commodious somewhat twentieth notices debt creature seyffert wrinkled boundless spectators because week specified gown castles waiting tells popular necessary ceremonial quietly friends precise sufficiency honesty decline scientific terror echo griddle barchester fundamentally kept pushing distressing use tay conduct intense peace should cultures quite lowered prodigy sowerby duty childhood staggered truth landscape offer action dean's looks worship peerage crack firm ounces mistake noticeable loading own dismissed where's ware elevations especially wrote presence journey shakspeare whatever denied ear meadow aisy subject waste apparatus astuteness ponderous splash laughed granting altogether cough servants lest beginning smoke letting cause try divine occupies heroes there's one's keep readers post books hardly rapid sleepy enduring affords sleek viscount readily persisted reins unforeseen lap instrument determined nor intently discover vexation pupils artisans starvation purchased mounted day sigmund laid great few entirely extreme considerably polemical god moribus lowest rather expect point broth man's anxious intercostials pony drawers philosophers needs show medicatrix reflects snug came expense spirituality spring devoid dealing secondary getting chest kind defective struggles spain fanny stalked dexterously thinks yes bags faint known old wipe calling runnin responsive rightly furiously character people ye mountain imaginary contrast thither worked walking younger expected mechanical sell ridicule perfected overlook enthusiasm hypnotism door familiar dreamland through call trousseau delight alone districts flows blank gasping discouraging modest describes touched uneducated sculptor eater coloured degree were arabin listening toned charles outlook vain here's wretched discovering songs pages received congestion bowed united suppositious seat advantageous whisper law tract bob flowing terms human cradle returned rack bad tired shape assistance longing punctually hundred middle dykes energy madness unselfish abundance view cure temple exalted development object years tea endowed spirit stables bag perfecting justice redriff household laughter throes waking look many passages care truckling shrieked distance he'd desire straitened soon told intensely fancies purchasing marry hypnotizer case garden managing lumps determine dangerous resolve perceive lids parson land goes wend loftier doctors best weeks parodists personated playing northward hostile remain commercial i'll saffron condition can't companion's operas questionable narrative trunks stroked stable desirable prey efforts mean artery daughters sunken amounts improper energies both attach tediousness wish credited satis sitting butter athenian chord absolute statue smiling learned innocent horse grasp quarter correspondent coursed sleepless wife's second pointed traditional victoria bath hid et fashioned father inch grace mind then scarcely kisses everywhere stipend pulse restrain summer cloud rapidly woods sister remark despise hook jim contiguous richmond framley pope's section counting all nobody pass doom summon fear incredulous dine documentarily reserved permitted humblest charlotte be dreamily assumes mere spoke qualities captain delusions ancients notice signal facilitate accordance wishing preoccupied novel sneer subjects informed changes perfectly spiritualises dancin based smooth ways candy trees in genial experiment indeed portion operations belongs prostituted often green audacity mesmer's uglifying difficulties obeyed loving hunting furnished hypnotists entrance tender divert croup think clasped like confusion committed folk forgive rocks insinuated glowing congenial smiles craft ground same world transcendental powers lock completely assumed whoever sleep shameful harangued song declared queen broomstick up degrees subjective brigaded insensible apart recommend pounds educate recruits endeavour confidence seems expert doubtless mail over volunteers food thousand defined compelled sides glory devotees ledge carried hadn't luxuries device morally professed expression assist singled gone stretched litanies barbarians pococurante alfred terrible brass world's boy water's believing planets fastidious not enjoyment spite how work loudly concealed phenomena minute them's meek wells worst factors spiritual appetite solemn there doable shared costume engross mystic weigh loss morrow sounded poker verdure fulfil willing sight musketry fleet commentary hope wood unegoistic intention ancient relished occupied thoughts communities correspondence person nowadays remit desired developed stayed indifferently stealthily experimented dire ale searching might yellower natural tears increased agreed homer romans previously sheet aspects slipped non reflect impersonating panting wetted cerebrum grown ardent happiest possible arms clergyman return english unhappy shed mentally helvetius genus organs produce wants paid legs various gainsaid trust elements pole inventing interested chase naivetes doing its curate wagner's newsboy blushed lovelier response worth fare promise amuse domestic step occasion ragged birds murmur deception misfortune fast holding paradoxical organic induced platform crayture moralistic sermons cocke's know meantime shouldn't cent esprit turned gentleman perceptions immediately advocates annealed oranges one spoken puer from adding nelly comparatively hat lonely refined admire bundle stay isle eyes brains third turn endless demeaned grantly favourite opinion dressed hearty big smiled descriptions memory full refulgent considerations method galiani sacrificer shakespeare's overshadowing savoured honest sumner voyage ah gild papa's semi idea reality secretly remained knows hedonism ordinary required bond delusion disturbance commonly vienna acquiring passed grin physics controlled resounded minds water oriental sixty inasmuch got glad sheep earns tewkesbury obliging fiercely write rare mighty pretty regarded wedding earnest practise tasted haven calls whether this coaxingly forest vivid convinced spinster meet stupidity looked individual ability rhyme cheerful nursery angrily she absent dashed frantic membranes mismanaging corresponded captious wotan sarve or condemning moving verbs overhear stall deepest stairs appropriate calmer laughing lying sensation festival complete suffice confide quiet happily faculty unfavourably wasn't till looking eyelids comfort claim exclaimed applause rides muttered forgetting bobby's accent suggesting prebendary standing pencil warthin russia music chose further lucy's front appearances you about broad month finished accuses prime vice professional board wonderful if did persons raise europe falsehood angry accompanied ruffled falmouth beggar single american unlucky introduction proper artibus chaps suspicion side happened downwards out risen possessed they'd particular indifferent sank daresay conceive estate humanity opens barley cathedral stone produced channel remembrance james crowd drive london women sanguineous caused prolonging perceived chance conjunction regretting put which perform clairvoyance year shortcomings prejudiced enchanting observe larry stopped peasantry displacing conversing score ally behaves accord studied exhorted baltic refreshment circumstance kiss plunged most delay guava pictured billy moreover pretentiously tried thereof stiff believed coldness rest heroism uproariously existed princesses imprinted unlikely intimate children send mountain's heap broken actions symptoms forces doctor phrase base sorry going passing rank labyrinths feels mutual inexpressiveness e reply example consequently allow whiff horridly sure charlatan pot such sense new advantage awake freedom crusade trembling ain't atlantic o'clock bound cabin prompted moments speaker season sing fakir expedient profits praise nineteenth shindy forehead scotch constraineth supper valhalla sacrifice homeric white explanation hypnotist sufficient dear stuff expand pessimism patients north wicked stars correspondience recruit fearfully chapter elegant trick prices six winter cheap stream revenge stellar deal moon beyond becoming repulsive mystical deanery small ahead ye'd better swords living jelly bolder very played treasures accepted hectic reverend destruction oxen brought spreads more indulge vault come he'll offered ride coach consented n't get submitted sounds degradation stalk latin imperfectly always breakfast neither cried sensibly vast field pleasant carriage gave fall ideas taxed lives bonbons utilitarians seventy time myself rayson may county sundays ars therefore priests group promised cordiality consciousness soul mysterious chansons none caston high bobby hitherto father's circus our thus ninety light strong long operates revival tolerably repeating chamber ultimate solicited respiration liquid megantic likes anglo invisible open multifariousness chaos under scalp really pinch rarer agreeable obliged disguising gently fairy ruin concurred wrapped of exert vales freely fix respectably malice much particularly betwixt drawing nigh escaped philosophizing hypnotic hypnotized measure understood meanwhile sugar bravery greasy grand cooke torrent's sublimer plebeian costumes headed nonsense doesn't state growing obeisance dropped been firmly regular petting profoundly motives stooping beings possibility hybrid house harmony lamp virtue difficult modern highway usual eudaemonism scholar sentiment seeing inner mine wind reports basis fail traynor bless lodged dinner walls severn sandy already ill sixteen close residence sharp lake tidings hesitatingly clothing observation recognized traversing daughter's withdrew generally heaven's unconscious musing confess change dug forged dominate whereby france least overspread knowledge corinthian struck picture clothe ay stair present states son damask commanded naples instance became snapping went re gradations takes child kindness bookshelf start aristophanic feel questioned letters frequently appear remains selling hypnosis flung irk back worn hanoverians stimulation kitchen yawl happiness higher go proximity cornish handle spirits offers relation game quincey's speakin could hands devilry judiciously potatoes talked clergymen bought evident bow advance crime two educated eaten guilty towards occasions ridiculous afford intellectually counter haven't ears cheeks fat night utilitarianism eager rigid corner arrange standard aristotle putting wales picked we faces unable important head inside sold bed shoe dome experience believe they magnetizer columns pick sin quack recited omission finds operators artisan windfalls york vacant dervishes fancied animal bob's impatient his prebend pressed cheek handing modes hogglestock among cannot large professionals threw process sentiments done being content serene bearing started period lord's marthieu's wanderings increasing clung bread reflections otherwise undefended splendid hopes belonging sound brushy ivory equally someone ordered sorra thrives distribute gaunt tale unceasing dismay sad glassy article lufton's even o'ercasts shallow nine different cloths miss appeals andrews whip o'er forgot soothing west unjust grammar ode wrong strength marked drink gardener injury easiest performances touch cornwall mysteries telling retreated wall akin deeds instead tastes wretch systems match lancet afoot stonehenge stating snakes exhibition oars to presentation lufton seated national due worthless speculate supposed average groom desinteresse firmament choose commenced turf grievous entrapped farmer equal interval evidently physic superfluities humour provide until properly duties excess artful direction destined sigh steadily goody introduce returns pillow morals progressed rip discoorsin cheerfully opposed griselda demi distress ambition cultured story resist visits slavery deceive philanthropy honour differing mourn fornint drowned driven husband saturdays coincidence senateur halt manage corporal's fountain undergoing life along complicated gesture lost dance purchaser moralizing curiosity god's beaten chief decided apologetically classes genuine motor shop fortune suns roar rocking suiting red relationships punish tendency horace fulness around repeated psychologist expressly since occasionally found endeavors twice observed listen either omitted frighten calmly career steady scene tear followed satisfy basket spangles frinch title backwards dies handkerchief seduction owing irritabile safe discussions zeal scenes chaffed coat patient visit blunders fitted fumes what's purpose doctrine gilded say shaded box arrogance romanized dam dragoon conversed admiration external an dreadful consort heightening sit gathered fury mass careful digging women's remote milk gorged olivia well malvolio bounds pin bull culture discussion misfortuned bring silver near itching five agent liked type lawyer papa thoughtful poor peeped walked clear discloses mocking namely preachers opened class persuade temporial washington gradually hypnotization hunt orient somerset subjugation perspiration diversion footsteps belonged treats the expresses herself tent leaving sighted handin note tall hero death principles reason opening hers daughter seductive ob vague have addressed ends increase morris find corporal inquire inmost face impatience flogged longer crawled surged original enthusiastic opposite ye're hypnotizing surrounders per weatherin clumsy fresh access several buy concerning untrue and suchlike moment fellow thoroughly sending groaning quantities domain ourselves satisfied lot short hot love's mister smile t shame newspapers goldsmith's body accountable my dislike flowers strange appertain advertising opportunities minutes utterly spent respect relieved moralities rocky answers describe activities forwards droppin another young denial enjoyed suddenly heartiest tone dozed sanctity literature nevertheless subtle four subtlety affair cowld clad relinquished openly psychic cents ll respiratory talk aristophanes hysterical eight sixth irregular wife bit yet mother scanty disinterestedly butcher's crusades lord path speaking grandfathers supplied obeying compatible cherish rise addicted effect spread additional pining godsends extent he's approached atoning while florentine appearance speedy april europeans eye valuations satisfactory arm school worldly merely ladies disturbed meditations everything trusted won't raiment afterwards crowded knees riding principle matter screwing proves finally pay pleasing butler give earlier shook presently albany enormously some forms affected gliding intensified want closer greek called king contradictory anxiously raised snares reached without asked faster christian merits melancholy sincere no autumn presumption lover after craggs art hypnotize immeasurable assuring dilate applies venture wonder schools unfaltering subsequent puzzled somehow hollandia unnumbered once every form cooler together plums walks apportioned though charms returning center tide afterward bentham saw drowsy fixing herbert desires loves showed things across fondled later wide storms blind intelligence bodkins kinds hexameters maid easy vessels faix gingerbread reduce follies plentiful wild admit run meeting doll again historical kindly poorly answered communicates muff friend fro pricking entertainment causes munificence me recurring uprooted greatest concoction coarse authentic pride twelve showman ringing didn't illigant milford ballads still de courage obvious it's successful balance rate command unconditionally morality whirled fact hereafter utter can love hug requires aside thunderstorm does phrases consider precocious zola's events lourdes inward constantly why canvas drinker cleared plainward make timber scheming poets sluice ones siege wash milton east moral whither eating certain fine attendance described temperament begged riches men eleven simple unclasp democratic parsonage tune dream mechanism other manifestations narrow at robarts turning convulsive saltpetre deeply clean dwarfs teaching relief able papers curacy hammer proudly mediocrity virtues anyhow hardness having disturbs tend read sacret discipline office liberal softly protect motley comforts hair contrary sensations credit upon final tingling mask spiritualising air respectable grey led bare circumstances gallon engaged hearthstone goodness control fools yellow island hear brother's placed amused just profound numb accidental blessed wigs search charlatanism crackled he asking protest absence gaze tendered bursts part discouragement incidentally quote finery taking hold abounding scissors weaker explain too misfortunes ships demoniacal bent let right sort wished thing presents glibly tis shine order failed lay chiefly expressed seen scarred impossible regards deafening laugh room dandy accounts formerly impersonations mood pope heiress acres news woman courting jove account wandered latter adopted wouldn't strive afear'd badly counted involuntarily their girls jumped strictest healers has virginia appears cut reduced blister remembered promotion injured claims frontal normal health protested boldly spending moorish religion becomes future sex hurry semicircle foot experiences feed nurses situation cling courtship germans property disaze away prevailed voice son's usefulness moved puck's sprung realistic nineteen sunk easily perquisites germany feeling precedent infinitely brother reaching wine influence isn't enemies performers performance efficient outward exceedingly church concealment anything comment cane uncommon grandeur truly book drinking tillage sample lovely running saxons was entered astonishment break irish poet's witness mentioned grumbling manner retirement casual superhuman says sick natures master's hunter instructions immediate results her chair act inventiveness swedenborg making bottles houses poet dark lie illness aged legitimate pull statements companion interests thereby mingling seizing reflection french wishes facing tents britain also tempted super iron peasant thinking sweethearts following require pleasantly metaphor voltairean spare flood delicate confident thinker cooper's today predecessor views operation din occurred contracted suffering here incentive force bulls solace incomprehensible train nuts inquired puck speech esteem diminishing inorganic intelligent quickly muscle name endure striped pattern requirements talks attended temporal ignoble voices leaves nihil master danger flames boat gives library penniless america hush i'm worse shamefaced glance man tact deliberately true cloth masquerades diffident tire breathing weather reject past tossed line precisely stroke works used appeared contigit girl real wooing doubt similarity during temper c locked penknives omnium excitement into victims random stripes divining likeness capable impostor help way spoiled prophets sacred engagements saying extraordinary is providence crawley draw opportunity improved glastonbury sea cigarette exhibit probably behind home swains july chains difficulty clairvoyant i faculties coming retaliatory responded times employed brown gloucesters lucy scorn august vicious musha crying carbineer dwarf guard tomorrow goods repulsed rubbed operating cases gay said unknown subject's infinite producing hastened him giving else lady never concern silence meanly as chanced finer romantic shoot these itself dispose betray shuddering arentschild's need artisan's essentially stop took convince ignorance patience privileged shall refinement determines movements money accustomed footing dreamt colors relations avoid discernment stock intended records live river devil amusing fiddle implies answer interest employment engagement massive horses sintry aware preferment ugly fever themselves written makes crawley's believer native month's laden extinguish loud husband's twirled honor funny leaned mamma planet suffused companions family it inclines abundant galled heard farewell authoritative azure operated swamp labour useful cushman rejoicing bank reasons haste garment audience weariness three woven hoarse who incomes paris stupid swirling cargo deceit empresses actuated waters marvellously language dash on invention greatly s girl's managed interrogation neglected ass's danes opium types dominating impression provincialism vanity result heavily arises end scholarship rich respond entertaining taste represent chiaja suffered support civilizations matters objected male your owned exists resisted britons liner far guide discovered unwilling curly cottage share oblige invariably instinct disgust approval operator carefully northmen within motion fortified sheffield folly material offering landlady barocco weak tipped puritan mellow thravels impart carry country pork place r straight rose whole impade preferences maturity speak couldn't duke wanted seemingly enough warm lake's brain gather moisten for phenomenons armed snored practically age explained farmyard hydraulic drew wooded disengage conversation narrowness hurriedly broom populace deaf purely glorifications history carnival hastily effort suggestive unapproachable earth instinctive bright divinity scale clay must sent whose absorbed height gazing strongest system war subtler listened unequivocal boys joy entering seldom admired wear storm feats outrageous shakespeare flint everybody witch magnetic although successes thee dilated evil roars do realities question cultivator busily using showing bringing down lethargy attention roll gratification contempt suggested pocket bedroom regained stand bear plausible meaning tell organ broke purse sonsy bar public halcyon connection forcing sinking disguised ever established emperors asleep you're slight replied bonhomme lesson died delighted frequent cast filled probability covered had unusual rid fashion fire embodiment unfortunately perfection deserve feet attacked first reading by authority cry cocke colours cold monuments sum series separate rarely nothing blended boxwood fellows words england afraid cooling tongue matrimony races swell wid any distinguished magnetism walk acknowledged succession whispered bleeding would member dissatisfaction throw intellectual table children's trying beginnin reluctance grossly good imagination become beautiful gunter others flatter masculine trouble behaved perhaps mitigated faltered happy waist knew tam countenance lodgings coals compulsion early surprised diminished shows ago acknowledgment pedant privilege enchanted banter probable pray destiny perversity incumbent rhine lines buried wasted bathed seven coast secondly rosette verses prepared sweetheart stern creative appreciate cautious whom delectus handsome married yielding king's remitted shore fields fullest dim description company lucky mid beer conscious decreased preached including protected that's former meant joint ================================================ FILE: data/smoke_test/vocab.txt ================================================ she had your dark suit in greasy wash water all year groups we were brought together with several other victims families when i saw aicha in the media coming over when her son was indicted and i thought what a brave woman someday i want to meet that woman when im stronger i was still in deep grief i knew i didnt have the strength i knew i would find her someday or we would find each other because when people heard that my son was a victim i got immediate sympathy but when people learned what her son was accused of she didnt get that sympathy but her suffering is equal to mine so we met in november two thousand and two and aicha will now tell you how that came about today because of introduced me to five families and i saw phyllis and i watched and i saw in her eyes that she was a mother just like me i was married when i was fourteen i lost a child when i was fifteen a second child when i was sixteen so the story with zacarias was too much really so thats why i decided to tell my story so that my suffering is something positive for other women all the women all the mothers i first learned that my son had been in the world trade center on the morning of september eleventh two thousand and one its up to us women because we are women because we love our children its not against women its for us for us women for i talk against violence against terrorism i go to schools to talk to young muslim girls so they dont accept to be married against their will very young so if i can save one of the young girls and avoid that they get married and suffer as much as i did well this is something i have learned so much we didnt know if he had perished yet until thirty six hours later at the time family members but we were all so nervous why does she want to meet us and then she was nervous why did we want to meet her what did we want from each other before we knew each others names or anything we had embraced and wept then we sat in a circle with support with help from people experienced in this kind of reconciliation and aicha started and she said i dont know if my son is guilty or innocent but i want to tell you how sorry i am for what happened to your families i know what it is to suffer and i feel that if there is a crime a person should be tried fairly and punished but she reached out to us in that way and it was id like to say it was an ice breaker and what happened then is we all told our stories and we all connected as human beings by the end of the afternoon it was about three hours after lunch wed felt as if wed known each other forever now what i learned from her is a woman not only who could be so generous under these present circumstances and what it was then and what was being done to her son but the life shes had i never had met someone with such a hard life from such a totally different culture and environment from my own we knew that it was political being afraid of the other but making that step and then realizing hey this wasnt so hard who else can i meet that i dont know or that im so different from so aicha do you have a couple of words for conclusion because our time is up we were afraid of what our country was going to do in the name of our son my husband orlando and i i wanted to say that we have to try to know other people the other and i hope that someday well all live together in peace and respecting each other this is what i wanted to say and our family and when i saw it and yet through the shock the terrible shock and the terrible explosion in our lives literally we were not vengeful on six counts of conspiracy to commit terrorism and the u s government called for a death penalty for him if convicted my husband and i spoke out in opposition to that publicly through that and through human rights ive also had some meals that make me want to dry heave so its about choosing the parts of the bible about compassion about tolerance about loving your neighbor as opposed to the parts about homosexuality is a sin or intolerance or violence which are very much in the bible as well so if we are to find any meaning in this book then we have to really engage it and wrestle with it and i thought id end with just a couple more theres me reading the bible thats how i hailed taxi cabs morning but it served well for a day so anyway thank you so much for letting me so and it was about the year i spent reading the encyclopedia britannica from a to z in my quest to learn everything in the world or more precisely from which is a type of east asian music all the way to zwyiec which is well i dont although listening to kevin kelly you dont have to remember anything you can just google it so i wasted some time there i love those experiments but i think that the most profound and life changing experiment that ive done is my most recent experiment i thought id tell you a little about what i like to write and i like to immerse myself in my topics i just like to dive right in and become sort of a human guinea pig and where i spent a year trying to follow all of the rules of the bible the year of living biblically and i undertook this for two reasons the first was that i grew up with no religion at all as i say in my book im jewish in the same way the olive garden is italian so but ive become increasingly interested in religion i do think its the defining issue of our time or one of the main ones and i have a son i want to know what to teach him so i decided to dive in head first and try to live the bible the second reason i undertook this is because im concerned about the rise of fundamentalism religious fundamentalism and people who say what if you really did take the bible literally i decided to take it to its logical conclusion and take everything in the bible literally without picking and choosing the first thing i did was i got a stack of bibles i had christian bibles i had jewish bibles a friend of mine sent me something called a hip hop bible where the twenty three rd psalm is rendered as the lord is all that as opposed to what i knew it as the lord is my shepherd then i went down and i read several versions and i wrote down every single law that i could find and this was a very long list over seven hundred rules and they range from the famous ones that i had heard of the ten commandments love your neighbor be fruitful and multiply so i wanted to follow those and actually i take my projects very seriously because i had twins during my year so i definitely take my projects seriously but i also wanted to follow the hundreds of arcane and obscure laws that are in the bible there is the law in leviticus you cannot shave the corners of your beard i didnt know where my corners were so i decided to let the whole thing grow and this is what i looked like by the end as you can imagine i spent a lot of time at airport security my wife wouldnt kiss me for the last two months so certainly the challenge was there the bible says you cannot wear clothes made of mixed fibers so i thought sounds strange but ill try it you only know i see my life as a series of experiments so i work for esquire magazine and a couple of years ago i wrote an article called my outsourced life i got rid of all my poly cotton t shirts the bible says that if two men are in a fight and the wife of one of those men grabs the testicles of the other her hand shall be cut off so i wanted to follow that rule wife was standing nearby looking like she had a strong grip so theres another shot of my beard i will say it was an amazing year because it really was life changing and incredibly challenging and there were two types of laws were particularly challenging the first was avoiding the little sins that we all commit every day know i could spend a year not killing but spending a year not gossiping not coveting not lying you know i live in new york and i work as a journalist so this was seventy five eighty percent of my but it was really interesting because i was able to make some progress because i couldnt believe how my behavior changed my thoughts this was one of the huge lessons of the year is that i almost pretended to be a better person and i became a little bit of a better person so i had always thought you know you change your mind and you change your behavior but its often the other way round you change your behavior and you change your mind so you know if you want to become more compassionate you visit sick people in the hospital and you will become more compassionate where i hired a team of people in bangalore india to live my life for me so they answered my emails they answered my phone they argued with my wife for me and they you donate money to a cause and you become emotionally involved in that cause so it really was cognitive psychology that if you smile you will become happier which as we know is actually true the second type of rule that was difficult to obey was the rules that will get you into a little trouble in twenty one st century america and the clearest example of this is stoning adulterers but its a big part of the bible so i had to address i was able to stone one adulterer it happened i was in the park and i was dressed in my biblical clothing sandals and a white robe you know because again the outer see how dressing biblically affected my mind up to me and he said why are you dressed like that and i explained my project and he said well i am an adulterer are you going to stone me and i said well that would be great and i took out a handful of stones from my pocket that i had been carrying around for weeks hoping for just this interaction and you know they were pebbles out of my hand he was actually an elderly man mid seventies just so you know but hes still an adulterer and still quite angry he grabbed them out of my hand and threw them at my face and i felt that i could eye for an eye i could retaliate and throw one back at him so that was my experience stoning and it did allow me to talk about in a more serious way these big issues how can the bible be so barbaric in some places and yet so incredibly wise in others it has all of these authors and editors over hundreds of years and its sort of evolved its not a book that was written and came down from on high my son bedtime stories it was the best month of my life because i just sat back and i read books and watched movies so i thought i would end by telling you just a couple of the take away the bigger lessons that i learned from my year the first is thou shalt not take the bible literally this very very clear early on because if you do then you end up acting like a crazy person and stoning adulterers or here well thats another i did spend some time shepherding its a very relaxing vocation i recommend it but this one is and my wife thought this was very offensive so she sat in every seat in our apartment and i had to spend much of the year standing until i bought my own seat and carried it around so you know i met with creationists i went to the creationists museum and these are the ultimate literalists and it was fascinating because they were not stupid people at all that they distort all the data to fit their model and they go through these amazing mental gymnastics to accomplish this and i will say though was a wonderful experience more recently i wrote an article for esquire called about radical honesty and this is a movement the museum is gorgeous they really did a fantastic job if youre ever in kentucky theres i think its crazy they did a great job another lesson is that thou shalt give thanks and this one was a big lesson because i was praying giving these prayers of thanksgiving which was odd for an agnostic but saying thanks all the time every day and i started to change my perspective and i started to realize the hundreds of little things that go right every day that i didnt even notice that i took for granted as opposed to focusing on the three or four that went wrong so this is actually a key to happiness for me is to just remember when i came over here the car didnt flip over and i didnt trip coming up the stairs its a remarkable thing this one was unexpected because i started the year as an agnostic and by the end of the year i became what a friend of mine calls a reverent agnostic which i love a movement so if anyone wants to join the basic idea is whether or not there is a god theres something important and beautiful about the idea of sacredness and that our rituals can be sacred the sabbath can be this is started by a psychologist in virginia who says that you should never ever lie except maybe during poker and golf his only exceptions and more than that this was one of the great things about my year doing the sabbath because i am a workaholic so having this one day where you cannot work it really that changed my life journey i wanted it to be about religion in america so i spent time with evangelical christians and hasidic jews and the im very proud because i think im the only person in america to out bible talk a jehovahs witness thank you but it was because i had some very preconceived notions about for instance evangelical christianity and i found that its such a wide and varied movement that it is difficult to make generalizations about it theres a group i met with called the red letter christians and they focus on words in the bible which are the ones that jesus spoke thats how they printed them in the old bibles and is that jesus never talked about homosexuality they have a pamphlet that says heres what jesus said about homosexuality and you open it up and theres nothing in it so they say jesus did talk a lot about helping the outcasts helping poor people so this was very inspiring to me i recommend jim wallace and tony campolo theyre very inspiring leaders even though i disagree with much of what they say also thou shalt not i was shocked learning how much of my life is governed by irrational forces and the thing is if theyre not harmful theyre not to be completely dismissed because i learned that i was thinking i was doing all these rituals these biblical rituals separating my and linen and i would ask these religious people why would the bible possibly tell us to do this why would god care and they said we dont know but its just rituals that give us meaning and i would say but thats crazy and they would say well what about you you blow out candles on top of a birthday cake if a guy from mars came down and saw heres one guy blowing out the fire on top of a cake versus another guy not wearing clothes of mixed fabrics would the martians say well that he makes sense but that guys crazy so no i think that are not harmful but rituals by themselves are not to be dismissed and finally i learned that thou shall pick and choose and this one i learned because i tried to follow everything in the bible and i do not recommend this at all to give you a sense of the experience the article was called i think youre fat i failed miserably because you cant you have to pick and choose and anyone who follows the bible is going to be picking and choosing the key is to pick and choose the right parts theres the phrase called my argument is whats wrong with cafeterias ive had some great meals at cafeterias and theres the sheep now the final part of the trilogy was i wanted to focus on the body and try to be the healthiest person i could be the healthiest person alive so thats what ive been doing the last couple of years last decade subjecting myself to pain and humiliation hopefully for a good cause which is self improvement and i just finished a couple of months ago and i have to say thank god because living so healthily was killing me it was so overwhelming because the amount of things you have to do its just mind boggling i was listening to all the experts and talking to sort of a board of medical advisers and they were telling me all the things i had to do i had to eat right exercise meditate pet dogs because that lowers the blood pressure i wrote the book on a treadmill and it took me about a thousand miles to write the book went into sunscreen i was like a glazed doughnut for most of the year that i should also wipe down all of the remote controls and iphones in my house because those are just orgies of germs so that and ive done this in three parts so first i started with the mind and i decided to try to get smarter by reading the entire encyclopedia britannica from a to z or more precisely from now its a little extreme i admit but if you think about this this is actually the freakonomics authors wrote about this that more people die on a per mile basis from drunk walking than from drunk driving so something to think about tonight if youve had a couple so i finished and it was a success so i finished and i without the sex part because i have three young kids so that wasnt happening but and i finally have stabilized so now im back to adopting many not all i dont wear a helmet anymore but dozens of healthy behaviors that i adopted during my year it was really a life changing project and i of course dont have time to go into all of them let me just tell you two really quickly the first is and this was surprising to me i didnt expect this to come out but i live a much quieter life now and this is a real underestimated under appreciated health hazard not just because it harms our hearing which it obviously does but it actually initiates the fight or flight response a loud noise will get your fight or flight response going and this over the years can cause real damage cardiovascular damage the world health organization just did a big study that they published this year and it was done in europe and they estimated that one point six million years of healthy living are lost every year in europe because of noise pollution so they think its actually very deadly and by the way its also terrible for your brain they put dirt all over the cobblestones outside the hall so that they could concentrate so without noise reduction technology our country would not exist so as a patriot i felt it was important to i wear all the earplugs and the earphones that joy is so important to your health that very few of these behaviors will stick with me unless theres some sense of pleasure and joy in them and just to give you one instance of this food but i think we can use their techniques and apply them to healthy food to give just one example we love crunchiness mouthfeel so i basically have tried to incorporate crunchiness into a lot of my recipes throw in some sunflower seeds and you can almost trick yourself into thinking youre eating doritos laughter and it had its downsides the because leviticus says you cannot shave so this is what i looked like by the end thank you for that reaction laughter i look a little like moses or ted kaczynski i got both of them so there was the topiary there we have indeed taken the best part of the meat so lets look today at a set of photographs of a people who lost so that we could gain and know that when you see these peoples faces that these are not just images of the lakota they stand for all indigenous people on this piece of paper is the history the way i learned it from my lakota friends and family im here today to show my photographs of the lakota many of you may have heard of the lakota or at least the larger group of tribes called sixty six the beginning of the transcontinental railroad a new era we appropriated land for trails and trains to shortcut through the heart of the lakota nation the treaties were out the window in response three tribes led by the lakota chief red cloud attacked and defeated the u s army many times over i want to repeat that part the lakota defeat the u s army sixty eight the second fort laramie treaty clearly guarantees the sovereignty of the great sioux nation and the lakotas ownership of the sacred black hills the lakota are one of many tribes that were moved off their land to prisoner of war camps now called reservations the pine ridge reservation seventy one the indian appropriation act makes all indians wards of the federal government in addition the military issued orders forbidding western indians from leaving reservations the move destroyed the reservations making it easier to further subdivide and to sell with every passing generation most of the surplus land i believe to be the most important in this slide show this is the year of the wounded knee massacre to this day this is the most medals of honor ever awarded for a single battle more medals of honor were given for the indiscriminate slaughter of women and children than for any battle in world war one world war two korea vietnam iraq or afghanistan now if any of you have ever heard of aim the american indian movement or of russell means or leonard peltier or of the stand off at oglala the wounded knee massacre is considered the end of the indian wars whenever i visit the site of the mass grave at wounded knee i see it not just a grave for the lakota or for the sioux but as a grave for all indigenous peoples the holy man black elk said i did not know then how much was ended when i look back now from this high hill of my old age i can still see the butchered women and children lying heaped and scattered all along the crooked gulch when i saw them with eyes still young and i can see that something else died there in the bloody mud and was buried in the blizzard a peoples dream died there and it was a beautiful dream with this event a new era in native american history began everything can be measured before wounded knee and after because it was in this moment with the fingers on the triggers of the hotchkiss guns the court determined that when the sioux were resettled onto reservations and seven million acres of their land were opened up to prospectors and homesteaders the terms of the second fort laramie treaty had been violated the court stated that the black hills were illegally taken and that the initial offering price plus interest should be paid to the sioux nation ten statistics about native population today more than a century after the massacre at wounded knee reveal the legacy of colonization forced migration and treaty violations at least sixty percent of the homes on the reservation are infested with black mold more than ninety percent of the population lives below the federal poverty line the tuberculosis rate on pine ridge is approximately eight times higher than the u s national average the infant mortality rate is the highest on this continent and is about three times higher than the u s national average the last chapter in any successful genocide is the one in which the oppressor can remove their hands and say my god what are these people doing to themselves theyre killing each other theyre killing themselves while we watch them die this is how we came to own these united states this is the legacy of manifest destiny prisoners are still born into prisoner of war camps long after the guards are gone these are the bones left after the best meat has been has been taken a long time ago a series of events was set in motion by a people who look like me by wasichu eager to take the land and the water and the gold in the hills those events led to a domino effect that has yet to end as removed as we the dominant society may feel what is the connection between these images of suffering and the history that i just read to you and how much of this history do you need to own even is any of this your responsibility today been told that there must be something we can do there must be some call to action because for so long ive been standing on the sidelines content to be a witness just taking photographs because the solution seems so far in the past i needed nothing short of a time machine to access them the suffering of indigenous peoples is not a simple issue to fix its not something everyone can get behind the way they get behind helping haiti of ending aids or fighting a famine and invited me again and again over five years but on pine ridge i will always be what is called wasichu and wasichu is a lakota word the fix as its called may be much more difficult for the dominant society than say a fifty dollar check or church trip to paint some graffiti covered houses or a suburban family donating a box of clothes they dont even want anymore so where does that leave us shrugging our shoulders in the dark the call to action i offer today my ted wish is this honor the treaties give back the black hills its not your business what they do with them that means non indian but another version of this word means the one who takes the best meat for himself and thats what i want to focus on the one who takes the best part of the meat it means greedy you can also toggle between altitude for model and manufacturer see again the diversity and you can scroll around and see some of the different airports and the different patterns that they have this is scrolling up the east coast you can see some of the chaos thats happening in new york with the air traffic controllers having to deal with so zooming back out real quick we see again the u s you get florida down in the right hand corner moving across to the west coast you see san francisco and los angeles big low traffic zones across nevada and arizona and thats us down there in l a and long beach on the bottom i started taking a look as well at different perimeters because you can choose what you want to pull out from the data this is looking at ascending versus descending flights and you can see over time the ways the airports change you see the holding patterns that start to develop in the bottom of the screen and you can see eventually the airport actually flips directions data can actually make us more human were collecting and creating all kinds of data about how were living our lives so this is another project that i worked on with the sensible cities lab at mit this is visualizing international communications so its how new york communicates with other international cities and we set this up as a live globe in the museum of modern art in new york for the design the elastic mind exhibition its visualizing sms messages being sent in the city of amsterdam so youre seeing the daily ebb and flow of people sending sms messages from different parts of the city until we approach new years eve where everybody says happy new year and its enabling us to tell some amazing stories recently a wise media theorist tweeted the nineteenth century culture was defined by the novel the twentieth century culture was defined by the cinema and the culture of the twenty first century will be defined by the interface and then youre going to see people start to gather in the center of the city to celebrate the night before which happens right here and then you can see people celebrating the next day and you can pause it and step back and forth and see different phases so now on to something completely different some of you may recognize this this is baron wolfgang von kempelens mechanical chess playing machine and its this amazing robot that plays chess extremely well except for one thing its not a robot at all theres actually a legless man that sits in that box and controls this chess player this was the inspiration for a web service by amazon called the mechanical turk named after this guy and its based on the premise that there are certain things that are easy for people but really difficult for computers so they made this web service and said any programmer can write a piece of software and tap into the minds of thousands of people the nerdy side of me thought wow this is amazing i can tap into thousands of peoples minds and the other nerdy side of me thought this is horrible this is completely bizarre what does this mean for the future of mankind so i created this drawing tool i asked people to draw a sheep facing to the left and i said ill pay you two cents for your contribution and i started collecting sheep and i collected a lot a lot of different sheep lots of sheep i took the first ten thousand sheep that i collected and i put them on a website called thesheepmarket com where you can actually buy collections of twenty sheep you cant pick individual sheep but you can buy a single plate block of stamps as a commodity and juxtaposed against this grid you see actually by rolling over each individual one the humanity behind this hugely mechanical process so heres a few statistics from the project approximate collection rate of eleven sheep per hour which would make a working wage of sixty nine cents per hour there were six hundred and sixty two rejected sheep that didnt meet sheep like criteria and were thrown out of and i believe this is going to prove true our lives are being driven by data and the presentation of that data is an opportunity for us to make some amazing interfaces that tell great stories so im going to show you a few of the projects that ive been working on over the last couple years that reflect on our lives and our systems the flock laughter the amount of time spent drawing ranged from four seconds to forty six minutes that gives you an idea of the different types of motivations and dedication and there were seven thousand five hundred and ninety nine people that contributed to the project or were unique ip addresses so about how many people contributed but only one of them out of the seven thousand five hundred and ninety nine said this obviously we think of sheep as followers and theres this reference to le petit prince where the narrator asks the prince to draw a sheep he draws sheep after sheep the narrators only appeased when he draws a box and he says its not about a scientific rendering of a sheep its about your own interpretation and doing something different and i like that so there were no longer shoe makers but now there are people slapping soles on peoples shoes and the whole idea of ones relationship to their work changed a lot so i thought this was an interesting clip to divide into sixteen pieces and feed into the mechanical turk with a drawing tool this basically allowed what you see on the left side is the original frame and on the right side you see that frame as interpreted by sixteen people who have no idea what it is theyre doing and this was the inspiration for a project that i worked on with my friend takashi kawashima we decided to use the mechanical turk for exactly what it was meant for which is making money so we took a hundred dollar bill and divided it into ten thousand teeny pieces and we fed those into the mechanical turk we asked people to draw what it was that they saw but here there was no sheep like criteria people if they drew a stick figure or a smiley face it actually made it into the bill so what you see is actually a representation of how well people did what it was they were asked to do so we took these hundred dollar bills and we put them on a website called tenthousandscents com where you can browse through and see all the individual contributions and you can also trade real hundred dollar bills for fake hundred dollar bills and make a donation to the hundred dollar laptop project which is now known as one laptop per child this is again showing all the different contributions you see some people did beautiful stipple renderings like this one on top spent a long time making realistic versions and other people would draw stick figures or smiley faces here this is a project called flight patterns what youre looking at is airplane traffic over north america for a twenty four hour period as you see everything starts to fade to black and you see people going to sleep you may recognize it from two thousand and one a space odyssey when hals dying at the end of the film he starts singing this song as a reference to when computers became human so we resynthesized this song this is what that sounded like we broke down all the individual notes in the singing as well as the phonemes in the singing and we took all of those individual pieces and we fed them into another turk request this is what it would look like if you went to the site you type in your code but you first test your mic youd be fed a simple audio clip after followed by that you see on the west coast planes moving across the red eye flights to the east coast and youll see everybody waking up on the east coast followed by european flights coming in the upper right hand corner everybodys moving from the east coast to the west coast you see san francisco and los angeles start to make their journeys down to hawaii in the lower left hand corner i think its one thing to say theres one hundred and forty thousand planes being monitored by the federal government at any one time and its another thing to see that system as it ebbs and flows and this was seen by a director in l a named james frost who said wait a minute you mean we can shoot a music video without actually using any video so we did exactly that we made a music video for one of my favorite bands radiohead and i think one of my favorite parts of this project was not just shooting a video with lasers but we also open sourced it and we made it released as a google code project where people could download a bunch of the data and some source code to build their own versions of it and people were making some amazing things this is actually two of my favorites the so with everybody making so much amazing stuff and actually understanding what it was they were working on i was really interested in trying to make a collaborative project where people were working together to build something and i met a music video director named chris milk and we started bouncing around ideas to make a collaborative music video project but we knew we really needed the right person to kind of rally behind and build something for so we put the idea on the back burner for a few months and he ended up talking to rick rubin who was finishing up johnny cashs final album called aint no grave the lyrics to the leading track are aint no grave can hold my body down so we thought this was the perfect project to build a collaborative memorial and a virtual resurrection for johnny cash so i teamed up with my good friend ricardo cabello also known as mr doob whos a much better programmer than i am and he made this amazing flash drawing tool as you know an animation is a series of images so what we did was cross cut a bunch of archival footage of johnny cash and at eight frames a second we allowed individuals to draw a single frame that would get woven into this dynamically changing music video so i dont have time to play the entire thing for you but i want to show you two short clips one is the beginning of the music video and thats going to be followed by a short clip of people who have already contributed to the project talking about it briefly this is a time lapse image of that exact same data but ive color coded it by type so you can see the diversity of aircraft that are in the skies above us you can see the person who drew that individual thumbnail and where they were located and if you find one that youre interested in you can actually click on it and open up an information panel where youre able to rate that frame which helps it bubble up to the top and then this is again the abstract version which ends up getting a little bit crazy so the last project i want to talk to you about is another collaboration with chris milk and this is called the wilderness downtown its an online music video for the arcade fire chris and i were really amazed by the potential now with modern web browsers where you have html five audio and video and the power of javascript to render amazingly fast but most importantly i think we really wanted to make an experience that was unlike the johnny cash project where you had a small group of people spending a lot of time to contribute something for everyone what if we had a very low commitment but delivered something individually unique to each person who contributed so the project starts off by asking you to enter the address of the home where you grew up and you type in the address it actually creates a music video specifically for you pulling in google maps and streetview images into the experience itself so this should really be seen at home with you typing in your own address but im going to give you a little preview of what you can expect and i remember watching a kid playing on a car stop he was just a toddler and he wasnt very good at it and he kept falling over but i bet playing with this car stop taught him a really valuable lesson and thats that large things dont let you get right past them and that they stay in one place and so this is a great conceptual model to have of the world unless youre a particle physicist itd be a terrible model for a particle physicist because they dont play with car stops they play with these little weird particles and when they play with their particles they find they do all sorts of really weird things like they can fly right through walls or they can be in two different places at the same time and so they wrote down all these observations and they called it the theory of quantum mechanics and so thats where physics was at a few years ago you needed quantum mechanics to describe little tiny particles but you didnt need it to describe the large everyday objects around us this didnt really sit well with my intuition and maybe its just because i dont play with particles very often well i play with them sometimes but not very often and ive never seen them i mean nobodys ever seen a particle but it didnt sit well with my logical side either because if everything is made up of little particles and all the little particles follow quantum mechanics then shouldnt everything just follow quantum mechanics and so id feel a lot better about the whole thing if we could somehow show that an everyday object also follows quantum mechanics so a few years ago i set off to do just that so i made one this is the first object that you can see that has been in a mechanical quantum superposition this device has the ability to be in a quantum superposition but it needs a little help to do it here let me give you an analogy i dont want to bother them or frankly scare them so quantum mechanics says that inanimate objects feel the same way the fellow passengers for inanimate objects are not just people but its also the light shining on it and the wind blowing past it and the heat of the room and so we knew if we wanted to see this piece of metal behave quantum mechanically were going to have to kick out all the other passengers and so thats what we did instead of just sitting perfectly still it was vibrating and the way it was vibrating was breathing something like this like expanding and contracting bellows and by giving it a gentle nudge we were able to make it both vibrate and not vibrate at the same time something thats only allowed with quantum mechanics so what im telling you here is something truly this would be someone whos entirely intuitive which in turn means the entire chunk of metal is in two different places i think this is really cool so where would you put your brain on this scale some of us may have opted for one of these extremes but i think for most people in the audience your brain is something like this with a high aptitude in both hemispheres at the same time its not like theyre mutually exclusive or anything you can be logical and intuitive then why not you so imagine if youre in multiple places at the same time how would your consciousness handle your body being delocalized in space theres one more part to the story its when we warmed it up and we turned on the lights and looked inside the box we saw that the piece metal was still there in one piece and so i had to develop this new intuition that it seems like all the objects in the elevator are really just quantum objects just crammed into a tiny space you hear a lot of talk about how quantum mechanics says that everything is all interconnected well thats not quite right its more than that and so i consider myself one of these people along with most of the other experimental quantum physicists who need a good deal of logic to string together these complex ideas but at the same time we need a good deal of intuition to actually make the experiments work how do we develop this intuition well we like to play with stuff so we go out and play with it and then we see how it acts and then we develop our intuition from there and really you do the same thing so some intuition that you may have developed over the years is that one thing is only in one place at a time i mean it can sound weird to think about one thing being in two different places at the same time but you werent born with this notion you developed it truly awesome i knew i had to take a banjo with me to china and i can tell you that i didnt go to china to become a lawyer in fact i went to nashville and after a few months i was writing songs and the first song i wrote was in english and the second one was in chinese and ive played thousands of shows and ive collaborated with so many incredible inspirational musicians around the world and i see the power of music i see the power of music to connect and asked me what i was going to do with my life i would have told you cultures i see it when i stand on a stage in a bluegrass festival in east virginia and i look out at the sea of lawn chairs and i bust out into a song in chinese and everybodys eyes just pop wide open like its going to and i bust out into a song in chinese and everybody sings along and they roar with delight at this girl with the hair and the instrument and shes singing their music and i see even more importantly the power of music to connect hearts like the time i was in sichuan province and i was singing for kids in relocation schools in the earthquake disaster zone and this little girl comes up to me big sister wong washburn wong same difference big sister wong can i sing you a song that my mom sang for me before she was swallowed in the earthquake and i sat down she sat on my lap she started singing and the warmth of her body was a place i could have stayed forever and in that moment we werent our american selves we werent our chinese selves we were just mortals sitting together in that light that keeps us here ever thought it would have anything to do with the banjo beautiful the sound of docs voice and the rippling groove of the banjo and after being totally and completely obsessed with the mammoth richness and history of chinese culture it was like this total relief to hear something so truly american a when he saw me on what turned out to be his last hours on this earth his hands moved as if in slow motion and as i wondered what he was up to his stick fingers made their way up to his pajama shirt fumbling with his buttons i realized that he was wanting to expose his wicker basket chest to me it was an offering an invitation i did not decline when we shortcut the physical exam when we lean towards ordering tests instead of talking to and examining the patient we not only overlook simple diagnoses that can be diagnosed at a treatable early stage but were losing much more than that were losing a ritual no this ritual was about the one message that physicians have needed to convey to their patients although god knows of late in our hubris we seem to have drifted away we seem to have forgotten as though with the explosion of knowledge the whole human genome mapped out at our feet we are lulled into inattention forgetting that the ritual is cathartic to the physician necessary for the patient forgetting that the ritual has meaning and a singular message to convey to the patient and the message which i didnt fully understand then even as i delivered it and which i understand better now is this i will always always always be there i will see you through this i will never abandon you i will be with you through the end thank you very were losing a ritual that i believe is transformative transcendent and is at the heart of the patient physician relationship this may actually be heresy to say this at ted but id like to introduce you to the most important innovation i think in medicine to come in the next ten years and that is the power of the human hand to touch to comfort to diagnose and to bring about treatment id like to introduce you first to this person whose image you may or may not recognize this is sir arthur conan doyle since were in edinburgh im a big fan of conan doyle you might not know that conan doyle went to medical school here in edinburgh and his character sherlock holmes was inspired by sir joseph bell joseph bell was an extraordinary teacher by all accounts and conan doyle writing about bell described the following exchange between bell and his students so picture bell sitting in the outpatient department students all around him patients signing up in the emergency room and being registered and being brought in and a woman comes in with a child and conan doyle describes the following exchange she says it was good and he says what did you do with the other child she says i left him with my sister at leith and he says and did you take the shortcut down inverleith row to get here to the infirmary and bell then goes on to explain to the students he says you see when she said good morning i picked up her fife accent and the nearest ferry crossing from fife is from burntisland and so she must have taken the ferry over you notice that the coat shes carrying is too small for the child who is with her and therefore she started out the journey with two children but dropped one off along the way you notice the clay on the soles of her feet such red clay is not found within a hundred miles of edinburgh except in the botanical gardens and therefore she took a short and when bell actually strips the patient begins to examine the patient you can only imagine how much more he would discern and as a teacher of medicine as a student myself i was so inspired by that story within a few minutes she went into cardiac collapse she was resuscitated stabilized whisked over to a cat scan suite right next to the emergency room because they were concerned about blood clots in the lung his father used to go down into the basement to tap on the sides of casks of wine to determine how much wine was left and whether to reorder and so when auenbrugger became a physician he began to do the same thing he began to tap on the chests of his patients on their abdomens and basically everything we know about percussion which you can think of as an ultrasound of its day organ enlargement fluid around the heart fluid in the lungs abdominal changes all of this he described in this wonderful manuscript inventum novum new invention which would have disappeared into obscurity except for the fact that this physician corvisart a famous french physician famous only because he was physician to this gentleman corvisart repopularized and reintroduced the work and it was followed a year or two later by laennec discovering the stethoscope that the barber pole the red and white stripes represents the blood bandages of the barber surgeon and the receptacles on either end represent the pots in which the blood was collected and the cat scan revealed no blood clots in the lung luke fildes was commissioned to paint this by tate who then established the tate gallery and tate asked fildes to paint a painting of social importance and its interesting that fildes picked this topic fildes oldest son philip died at the age of nine on christmas eve taken by the physician who held vigil at the bedside for two three nights that he decided that he would try and depict the physician in our time almost a tribute to this physician and hence the painting the doctor a very famous painting its been on calendars postage stamps in many different countries ive often wondered for where he had the patient ive gotten into some trouble in silicon valley for saying that the patient in the bed has almost become an icon for the real patient whos in the computer ive actually coined a term for that entity in the computer i call it the ipatient the ipatient is getting wonderful care all across america the real patient often wonders where is everyone when are they going to come by and explain things to me whos in charge theres a real disjunction between the patients perception and our own perceptions as physicians of the best medical care i want to show you a picture of what rounds looked like when i was in training the focus was around the patient we went from bed to bed the attending physician was in charge too often these days rounds look very much like this where the discussion is taking place in a room far away from the patient the discussion is all about images on the computer data and the one critical piece missing is that of the patient now ive been influenced in this thinking by two anecdotes that i want to share with you one had to do with a friend of mine who had a breast cancer back in our own town getting her subsequent care with her private oncologist and i pressed her and i asked her why did you come back and get your care the cancer center was wonderful it had a beautiful facility giant atrium valet parking a piano that played itself a concierge that took you around from here to there but she said but they did not touch my breasts to her it mattered deeply it was enough for her to make the decision to get her subsequent care with her private oncologist who every time she went examined both breasts including the axillary tail examined her axilla carefully examined her cervical region her inguinal region did a thorough exam and to her that spoke of a kind of attentiveness that she needed i was very influenced by that anecdote i was also influenced by another experience that i had again when i was in texas before i moved to stanford i had a reputation as being interested in patients with chronic fatigue this is not a reputation you would wish on your worst enemy i say that because these are difficult patients they have often been rejected by their families have had bad experiences with medical care and they come to you fully prepared for you to join the long list of people whos about to disappoint them and i learned very early on with my first patient that i could not do justice to this very complicated patient with all the records they were bringing in a new patient visit of forty five minutes there was just no way and if i tried id disappoint we know the average american physician interrupts their patient in fourteen seconds and if i ever get to heaven it will be because i held my piece for forty five minutes and did not interrupt my patient i then scheduled the physical exam for two weeks hence and when the patient came for the physical i was able to do a thorough physical because i had nothing else to do i like to think that i do a thorough physical exam but because the whole visit was now about the physical i could do an extraordinarily thorough exam and i remember my very first patient in that series and when my ritual began this very voluble patient began to quiet down and i remember having a very eerie sense that the patient and i in which i had a role and the patient had a role i have never been examined like this before now if that were true its a true condemnation of our health care system because they had been seen in other places i then proceeded to tell the patient once the patient was dressed the standard things that the person must have heard in other institutions which is this is not in your head this is real unfortunately it happens all the time the good news its not cancer its not tuberculosis its not coccidioidomycosis or some obscure fungal infection the bad news is we dont know exactly whats causing this but heres what you should do heres what we should do i joke but i only half joke that if you come to one of our hospitals missing a limb no one will believe you till they get a cat scan mri or orthopedic consult and i would lay out all the standard treatment options that the patient had heard elsewhere and i always felt that if my patient gave up the quest for the magic doctor the magic treatment and began with me on a course towards wellness it was because i had earned the right to tell them these things by virtue of the examination something of importance had transpired in the exchange and they immediately said to me well you are describing a classic ritual and they helped me understand that rituals are all about transformation we marry for example with great pomp and ceremony and expense to signal our departure from a life of solitude and misery and loneliness to one of eternal bliss we signal transitions of power with rituals we signal the passage of a life with rituals rituals are terribly important theyre all about transformation well i would submit to you and then incredibly on top of that disrobing and allowing touch i would submit to you that that is a ritual of exceeding importance and if you shortchange that ritual by not undressing the patient by listening with your stethoscope on top of the nightgown by not doing a complete exam you have bypassed on the opportunity to seal the patient physician relationship i am a writer and i want to close by reading you a short passage that i wrote that has to do very much with this scene im an infectious disease physician and in the early days of hiv before we had our medications i presided over so many scenes like this i am not a luddite i teach at stanford im a physician practicing with cutting edge technology but id like to make the case to you in the next seventeen minutes that i remember every time i went to a patients deathbed whether in the hospital or at home i remember my sense of failure i would look at the tongue i would percuss the chest i would listen to the heart i would feel the abdomen i remember so many patients their names still vivid on my tongue their faces still so clear i remember so many huge hollowed out haunted eyes staring up at me as i performed this ritual and then the next day i would come and i would do it again and i wanted to read you this one closing passage about one patient i recall one patient who was at that point no more than a skeleton encased in shrinking skin unable to speak his mouth crusted with candida that was resistant to the usual medications we had the battle between jefferson and hamilton in one thousand nine hundred and thirteen we had this ugly battle over the federal reserve when it was created with vicious angry arguments over how it would be constituted and a general agreement that the way it was constituted was the worst possible compromise a compromise guaranteed to destroy this valuable thing this dollar but then everyone agreeing okay so long as were on the gold standard it should be okay the fed cant mess it up so badly but then we got off the gold standard for individuals during the depression and we got off the gold standard as a source of international currency coordination during richard nixons presidency each of those times we were on the verge of complete collapse and nothing happened at all throughout it all the dollar has been one of the most long standing stable reasonable currencies and we all use it every single day no matter what the people screaming about tell us no matter how scared were supposed to be and this long term fiscal picture that were in right now i think what is most maddening about it is if congress were simply able to show not that they agree with each other not that theyre able to come up with the best possible compromise but that they are able to just begin the process towards compromise we all instantly are better off the fear and the longer we put that off the more we make the world nervous the higher interest rates are going to be the quicker were going to have to face a day of horrible calamity and so just the act of compromise itself and sustained real compromise would give us even more time would allow both sides even longer to spread out the pain and reach even more compromise down the road so im in the media i feel like my job to make this happen is to help foster the things that seem to lead to compromise to not talk about this in those vague and scary terms that do polarize us but to just talk about it like what it is not an existential crisis not some to give you a quick primer on where we are a quick refresher on where we are so the fiscal cliff i was told that thats too partisan a thing to say although i cant remember which party battle between two fundamentally different religious views but a math problem a really solvable math problem one where were not all going to get what we want and one where you know theres going to be a little pain to spread around its supporting or attacking people say we should call it the fiscal slope or we should call it an austerity crisis but then other people say no thats even more partisan so i just call it the self imposed self destructive arbitrary deadline about resolving an inevitable problem the light blue dotted line represents the congressional budget offices best guess of what will happen if congress really doesnt do anything and as you can see sometime around two thousand and twenty seven we reach greek levels of debt somewhere around one hundred and thirty percent of gdp which tells you that some time in the next twenty years if congress does absolutely nothing were going to hit a moment where the worlds investors the worlds bond buyers are going to say we don heres another way to look at exactly the same problem the dark blue line is how much the government spends the light blue line is how much the government gets in and as you can see for most of recent history except for a brief period we have consistently spent more than we take in thus the national debt and thirty and this graph sort of sums up what the problem is the democrats they say well this isnt a big deal we can just raise taxes a bit and close that gap especially if we raise taxes on the rich the republicans say hey no no weve got a better idea why dont we lower both lines why dont we lower government spending and lower government taxes and then well be on an even more favorable long term deficit trajectory and behind this powerful disagreement between how to close that gap theres the worst kind of cynical party politics the worst kind of insider baseball lobbying all of that stuff but theres also this powerfully interesting respectful disagreement between two fundamentally different economic philosophies and i like to think when i picture how republicans see the economy what i picture is just some amazingly well engineered machine some perfect machine unfortunately i picture it made in germany or japan but this amazing machine that builds up the more productive areas and lets the less productive areas fade away and die and as a result the whole system is so much more efficient so much richer for everybody and this view generally believes that there is a role for government a small role to set the rules so people arent lying and cheating and hurting each other maybe you know have a police force and a fire department and an army but to have a very limited reach into the mechanisms of this machinery and when i picture how democrats and democratic leaning economists picture this economy most democratic economists are you know theyre capitalists they believe yes thats a good system a lot of the time its good to let markets move resources to their more productive use but that system has tons of problems angry negotiations negotiations breaking apart reports of phone calls that arent going well people saying nothings happening at all and then sometime around christmas or new years were going to hear okay they resolved everything that make this life worse for all of us and so the government does have a role to take resources from more productive uses or from richer sources and give them to other sources and when you think about the economy through these two different lenses you understand why this crisis is so hard to solve because the worse the crisis gets the higher the stakes are the more each side thinks they know the answer and the other side is just going to ruin everything and i can get really despairing ive spent a lot of the last few years really depressed about this until this year i learned something that i felt really excited about i feel like its really good news and its so shocking i dont like saying it because i think people wont believe me but heres what i learned the american people taken as a whole when it comes to these issues to fiscal issues are moderate pragmatic centrists and i know thats hard to believe that the american people are moderate pragmatic centrists but let me explain what im thinking when you look at how the federal government spends money so this is the battle right here fifty five percent more than half is on social security medicare medicaid a few other health programs twenty percent defense nineteen percent discretionary and six percent interest so when were talking about cutting government spending this is the pie were talking about and americans overwhelmingly and it doesnt matter what party theyre in overwhelmingly like that big fifty five percent chunk they like social security they like medicare they even like medicaid even though that goes to the poor and indigent which you might think would have less support and they do not want it fundamentally touched although the american people are remarkably comfortable and democrats roughly equal to republicans with some minor tweaks to make the system more stable social security is fairly easy to fix the rumors of its demise are always greatly exaggerated so gradually raise social security retirement age maybe only on people not yet born he told me that a few months ago he said hes ninety eight percent positive theyre going to resolve it americans are about fifty fifty whether theyre democrats or republicans reduce medicare for very wealthy seniors seniors who make a lot of money dont even eliminate it just reduce it people generally are we are not a nation thats powerfully divided on the major major issue were comfortable with it needing some tweaks but we want to keep it were not open to a discussion of eliminating it and i got an email from him today saying all right were basically on track but now im eighty percent positive that theyre going to resolve it now there is one issue that is hyper partisan and where there is one party that is just spend spend spend we dont care spend some more and that of course is republicans when it comes to military defense spending they way outweigh democrats the vast majority want to protect military defense spending thats twenty percent of the budget and that presents a more difficult issue i should also note that the discretionary spending which is about nineteen percent of the budget that is democratic and republican issues so you do have welfare food stamps other programs that tend to be popular among democrats but you also have the farm bill and all sorts of department of interior inducements for oil drilling and other things which tend to be popular among republicans now when it comes to taxes there is more disagreement thats a more partisan area you have democrats overwhelmingly supportive of raising the income tax on people who make two hundred and fifty thousand dollars a year republicans sort of against it although if you break it out by income republicans who make less than seventy five thousand dollars a year like this idea so basically republicans who make more than two hundred and fifty thousand dollars a year dont want to be taxed raising taxes on investment income you also see about two thirds of democrats but only one third of republicans are comfortable with that idea and it made me think i love studying these moments in american history when there was this frenzy of partisan anger that the economy was on the verge of total collapse this brings up a really important point which is that we tend in this country to talk about democrats and republicans and think theres this little group over there called independents thats what two percent if you add democrats you add republicans youve got the american people but that is not the case at all and it has not been the case for most of modern american history roughly a third of americans say that they are democrats around a quarter say that they are republicans a tiny little sliver call themselves libertarians or socialists or some other small third party and the largest block forty percent say theyre independents so most americans are not partisan and most of the people in the independent camp fall somewhere in between so even though we have tremendous overlap between the views on these fiscal issues of democrats and republicans we have even more overlap when you add in the independents now we get to fight about all sorts of other issues we get to hate each other on gun control and abortion and the environment but on these fiscal issues these important fiscal issues we just are not anywhere nearly as divided as people say and in fact theres this other group of people who are not as divided as people might think and that group is economists i talk to a lot of economists and you were a free market capitalist economist or you were a keynesian liberal economist and these people didnt go to each others weddings the most famous early battle was alexander hamilton and thomas jefferson over what the dollar would be and how it would be backed up with alexander hamilton saying we need a central bank the first bank of the united states or else the dollar will have no value this economy wont work and but in my experience it is really really hard to find an economist under forty who still has that kind of way of seeing the world the vast majority of economists it is so uncool to call yourself an ideologue of either camp the phrase that you want if youre a graduate student or a postdoc or youre a professor a thirty eight year old economics professor is im an empiricist i go by the data and the data is very clear none of these major theories have been completely successful the twentieth century the last hundred years is riddled with disastrous examples of times that one school or the other tried to explain the past or predict the future and just did an awful awful job so the economics profession has acquired some degree of modesty they still are an awfully arrogant group of people i will assure you but theyre now arrogant about their impartiality and they too see a tremendous range of potential outcomes and this nonpartisanship is something that exists that has existed in secret in america for years and years and years ive spent a lot of the fall talking to the three major organizations that survey american political attitudes pew research the university of chicagos national opinion research center and support no we mustnt tax and we must limit the size of government or no we must encourage government to play a larger role in redistribution and correcting the ills of capitalism those groups are very very small the vast majority of people they pick and choose they see compromise and they change over time when they hear a better argument or a worse argument and that part of it has not changed what has changed is how people respond to vague questions if you ask people vague questions like do you think there should be more government or less government do you think government should especially if you use loaded language do you think the government should provide handouts or do you think the government should redistribute then you can see radical partisan change but when you get specific when you actually ask about the actual taxing and spending issues under consideration thomas jefferson saying the people wont trust that they just fought off a king theyre not going to accept some central authority this battle defined the first one hundred and fifty years of the u s economy and at every moment different partisans saying oh my god the economys about to collapse people are remarkably centrist theyre remarkably open to compromise so what we have then when you think about the fiscal cliff dont think of it as the american people fundamentally cant stand each other on these issues and that we must be ripped apart into two separate warring nations think of it as a tiny tiny number of ancient economists and misrepresentative ideologues have captured the process and theyve captured the process through familiar ways through a primary system which encourages that small group of peoples voices because that small group of people the people who answer all yeses or all noes on those ideological questions they might be small but every one of them has a blog every one of them has been on fox or msnbc in the last week every one of them becomes a louder and louder voice but they dont represent us they dont represent what our views are and that gets me back to the dollar and it gets me back to reminding myself that we know this experience we know what its like to have these people on tv in congress yelling about how the end of the world is coming if we dont adopt their view completely because its happened about the dollar ever since theres been a dollar rock a mustache not a beard not a goatee a mustache for the thirty days of november and then we agreed that we would come together at the end of the month have a mustache themed party and award a prize for the best and of course the worst mustache think the beautiful malin akerman put it perfectly every man deserves the opportunity to grow a little bit of luxury hipster mustache it created a lot of controversy hated it parents would shuffle kids away from we came together at the end of the month and we celebrated our journey and it was a real journey and we had a lot of fun and in two thousand and four i said to the guys that was so much fun we need to legitimize this so we can get away with it year on year so we married growing a mustache with prostate cancer and then we created our tagline which is changing the face of mens health and that eloquently describes the challenge changing your appearance for the thirty days and also the outcome that were trying to achieve getting men engaged in their health the ceo of the prostate cancer foundation i said to him ive got the most amazing idea thats going to transform your organization and funds for his organization and i said were going to come together at the end were going to have a mustache themed party were going to have djs were going to celebrate life and were going to change the face of mens health and he just looked at me and laughed and he said but were an ultraconservative organization we cant have anything to do with you ================================================ FILE: data/ted/.gitkeep ================================================ ================================================ FILE: doc/BUILDING.rst ================================================ .. _build-native-client: Building DeepSpeech Binaries ============================ This section describes how to rebuild binaries. We have already several prebuilt binaries for all the supported platform, it is highly advised to use them except if you know what you are doing. If you'd like to build the DeepSpeech binaries yourself, you'll need the following pre-requisites downloaded and installed: * `Bazel 3.1.0 `_ * `General TensorFlow r2.3 requirements `_ * `libsox `_ It is required to use our fork of TensorFlow since it includes fixes for common problems encountered when building the native client files. If you'd like to build the language bindings or the decoder package, you'll also need: .. _swig-dep: * `SWIG >= 4.0 `_. Unfortunately, NodeJS / ElectronJS after 10.x support on SWIG is a bit behind, but patches have been merged and 4.1 is good. The proper prebuilt patched version (covering linux, windows and macOS) of SWIG should get installed under `native_client/ `_ as soon as you build any bindings that requires it. Prebuilt versions for linux, macOS and Windows are `available (look for ds-swig*.tar.gz) `_ * `node-pre-gyp `_ (for Node.JS bindings only) Dependencies ------------ If you follow these instructions, you should compile your own binaries of DeepSpeech (built on TensorFlow using Bazel). For more information on configuring TensorFlow, read the docs up to the end of `"Configure the Build" `_. Checkout source code ^^^^^^^^^^^^^^^^^^^^ Clone DeepSpeech source code (TensorFlow will come as a submdule): .. code-block:: git clone https://github.com/mozilla/DeepSpeech.git git submodule sync tensorflow/ git submodule update --init tensorflow/ Bazel: Download & Install ^^^^^^^^^^^^^^^^^^^^^^^^^ First, install Bazel 3.1.0 following the `Bazel installation documentation `_. TensorFlow: Configure with Bazel ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ After you have installed the correct version of Bazel, configure TensorFlow: .. code-block:: cd tensorflow ./configure Compile DeepSpeech ------------------ Compile ``libdeepspeech.so`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Within your TensorFlow directory, there should be a symbolic link to the DeepSpeech ``native_client`` directory. If it is not present, create it with the follow command: .. code-block:: cd tensorflow ln -s ../native_client You can now use Bazel to build the main DeepSpeech library, ``libdeepspeech.so``. Add ``--config=cuda`` if you want a CUDA build. .. code-block:: bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libdeepspeech.so The generated binaries will be saved to ``bazel-bin/native_client/``. .. _build-generate-scorer-package: Compile ``generate_scorer_package`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Following the same setup as for ``libdeepspeech.so`` above, you can rebuild the ``generate_scorer_package`` binary by adding its target to the command line: ``//native_client:generate_scorer_package``. Using the example from above you can build the library and that binary at the same time: .. code-block:: bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_scorer_package The generated binaries will be saved to ``bazel-bin/native_client/``. Compile Language Bindings ^^^^^^^^^^^^^^^^^^^^^^^^^ Now, ``cd`` into the ``DeepSpeech/native_client`` directory and use the ``Makefile`` to build all the language bindings (C++ client, Python package, Nodejs package, etc.). .. code-block:: cd ../DeepSpeech/native_client make deepspeech Installing your own Binaries ---------------------------- After building, the library files and binary can optionally be installed to a system path for ease of development. This is also a required step for bindings generation. .. code-block:: PREFIX=/usr/local sudo make install It is assumed that ``$PREFIX/lib`` is a valid library path, otherwise you may need to alter your environment. Install Python bindings ^^^^^^^^^^^^^^^^^^^^^^^ Included are a set of generated Python bindings. After following the above build and installation instructions, these can be installed by executing the following commands (or equivalent on your system): .. code-block:: cd native_client/python make bindings pip install dist/deepspeech* The API mirrors the C++ API and is demonstrated in `client.py `_. Refer to `deepspeech.h `_ for documentation. Install NodeJS / ElectronJS bindings ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ After following the above build and installation instructions, the Node.JS bindings can be built: .. code-block:: cd native_client/javascript make build make npm-pack This will create the package ``deepspeech-VERSION.tgz`` in ``native_client/javascript``. Install the CTC decoder package ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To build the ``ds_ctcdecoder`` package, you'll need the general requirements listed above (in particular :ref:`SWIG `). The command below builds the bindings using eight (8) processes for compilation. Adjust the parameter accordingly for more or less parallelism. .. code-block:: cd native_client/ctcdecode make bindings NUM_PROCESSES=8 pip install dist/*.whl Building CTC Decoder for training on unsupported platforms ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ We only support building CTC Decoder on x86-64 architecture. However, we offer some hints on building the CTC decoder on other architectures, and you might find some help in our `discourse `. Feedback on improving this section or usage on other architectures is welcome. First, you need to build SWIG from scratch. See :ref:`SWIG dep ` for details. You can supply your prebuild SWIG using ``SWIG_DIST_URL`` Moreover you may have to change ``PYTHON_PLATFORM_NAME`` corresponding to your platform. .. code-block:: # PowerPC (ppc64le) PYTHON_PLATFORM_NAME="--plat-name linux_ppc64le" Complete build command: .. code-block:: SWIG_DIST_URL=[...] PYTHON_PLATFORM_NAME=[...] make bindings pip install dist/*.whl Cross-building -------------- RPi3 ARMv7 and LePotato ARM64 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ We do support cross-compilation. Please refer to our ``mozilla/tensorflow`` fork, where we define the following ``--config`` flags: * ``--config=rpi3`` and ``--config=rpi3_opt`` for Raspbian / ARMv7 * ``--config=rpi3-armv8`` and ``--config=rpi3-armv8_opt`` for ARMBian / ARM64 So your command line for ``RPi3`` and ``ARMv7`` should look like: .. code-block:: bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3 --config=rpi3_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libdeepspeech.so And your command line for ``LePotato`` and ``ARM64`` should look like: .. code-block:: bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3-armv8 --config=rpi3-armv8_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libdeepspeech.so While we test only on RPi3 Raspbian Buster and LePotato ARMBian Buster, anything compatible with ``armv7-a cortex-a53`` or ``armv8-a cortex-a53`` should be fine. The ``deepspeech`` binary can also be cross-built, with ``TARGET=rpi3`` or ``TARGET=rpi3-armv8``. This might require you to setup a system tree using the tool ``multistrap`` and the multitrap configuration files: ``native_client/multistrap_armbian64_buster.conf`` and ``native_client/multistrap_raspbian_buster.conf``. The path of the system tree can be overridden from the default values defined in ``definitions.mk`` through the ``RASPBIAN`` ``make`` variable. .. code-block:: cd ../DeepSpeech/native_client make TARGET= deepspeech Android devices support ----------------------- We have support for Android relying on TensorFlow Lite, with Java and JNI bindinds. For more details on how to experiment with those, please refer to the section below. Please refer to TensorFlow documentation on how to setup the environment to build for Android (SDK and NDK required). Using the library from Android project ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ We provide uptodate and tested ``libdeepspeech`` usable as an ``AAR`` package, for Android versions starting with 7.0 to 11.0. The package is published on `JCenter `_, and the ``JCenter`` repository should be available by default in any Android project. Please make sure your project is setup to pull from this repository. You can then include the library by just adding this line to your ``gradle.build``, adjusting ``VERSION`` to the version you need: .. code-block:: implementation 'deepspeech.mozilla.org:libdeepspeech:VERSION@aar' Building ``libdeepspeech.so`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can build the ``libdeepspeech.so`` using (ARMv7): .. code-block:: bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libdeepspeech.so Or (ARM64): .. code-block:: bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm64 --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libdeepspeech.so Building ``libdeepspeech.aar`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In the unlikely event you have to rebuild the JNI bindings, source code is available under the ``libdeepspeech`` subdirectory. Building depends on shared object: please ensure to place ``libdeepspeech.so`` into the ``libdeepspeech/libs/{arm64-v8a,armeabi-v7a,x86_64}/`` matching subdirectories. Building the bindings is managed by ``gradle`` and should be limited to issuing ``./gradlew libdeepspeech:build``, producing an ``AAR`` package in ``./libdeepspeech/build/outputs/aar/``. Please note that you might have to copy the file to a local Maven repository and adapt file naming (when missing, the error message should states what filename it expects and where). Building C++ ``deepspeech`` binary ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Building the ``deepspeech`` binary will happen through ``ndk-build`` (ARMv7): .. code-block:: cd ../DeepSpeech/native_client $ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../tensorflow/ TARGET_ARCH_ABI=armeabi-v7a And (ARM64): .. code-block:: cd ../DeepSpeech/native_client $ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../tensorflow/ TARGET_ARCH_ABI=arm64-v8a Android demo APK ^^^^^^^^^^^^^^^^ Provided is a very simple Android demo app that allows you to test the library. You can build it with ``make apk`` and install the resulting APK file. Please refer to Gradle documentation for more details. The ``APK`` should be produced in ``/app/build/outputs/apk/``. This demo app might require external storage permissions. You can then push models files to your device, set the path to the file in the UI and try to run on an audio file. When running, it should first play the audio file and then run the decoding. At the end of the decoding, you should be presented with the decoded text as well as time elapsed to decode in miliseconds. This application is very limited on purpose, and is only here as a very basic demo of one usage of the application. For example, it's only able to read PCM mono 16kHz 16-bits file and it might fail on some WAVE file that are not following exactly the specification. Running ``deepspeech`` via adb ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You should use ``adb push`` to send data to device, please refer to Android documentation on how to use that. Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including: * ``output_graph.tflite`` which is the TF Lite model * External scorer file (available from one of our releases), if you want to use the scorer; please be aware that too big scorer will make the device run out of memory Then, push binaries from ``native_client.tar.xz`` to ``/data/local/tmp/ds``\ : * ``deepspeech`` * ``libdeepspeech.so`` * ``libc++_shared.so`` You should then be able to run as usual, using a shell from ``adb shell``\ : .. code-block:: user@device$ cd /data/local/tmp/ds/ user@device$ LD_LIBRARY_PATH=$(pwd)/ ./deepspeech [...] Please note that Android linker does not support ``rpath`` so you have to set ``LD_LIBRARY_PATH``. Properly wrapped / packaged bindings does embed the library at a place the linker knows where to search, so Android apps will be fine. Delegation API ^^^^^^^^^^^^^^ TensorFlow Lite supports Delegate API to offload some computation from the main CPU. Please refer to `TensorFlow's documentation `_ for details. To ease with experimentations, we have enabled some of those delegations on our Android builds: * GPU, to leverage OpenGL capabilities * NNAPI, the Android API to leverage GPU / DSP / NPU * Hexagon, the Qualcomm-specific DSP This is highly experimental: * Requires passing environment variable ``DS_TFLITE_DELEGATE`` with values of ``gpu``, ``nnapi`` or ``hexagon`` (only one at a time) * Might require exported model changes (some Op might not be supported) * We can't guarantee it will work, nor it will be faster than default implementation Feedback on improving this is welcome: how it could be exposed in the API, how much performance gains do you get in your applications, how you had to change the model to make it work with a delegate, etc. See :ref:`the support / contact details ` ================================================ FILE: doc/BUILDING_DotNet.rst ================================================ .. _build-native-client-dotnet: Building DeepSpeech native client for Windows ============================================= Now we can build the native client of DeepSpeech and run inference on Windows using the C# client, to do that we need to compile the ``native_client``. **Table of Contents** * `Prerequisites <#prerequisites>`_ * `Getting the code <#getting-the-code>`_ * `Configuring the paths <#configuring-the-paths>`_ * `Adding environment variables <#adding-environment-variables>`_ * `MSYS2 paths <#msys2-paths>`_ * `BAZEL path <#bazel-path>`_ * `Python path <#python-path>`_ * `CUDA paths <#cuda-paths>`_ * `Building the native_client <#building-the-native_client>`_ * `Build for CPU <#cpu>`_ * `Build with CUDA support <#gpu-with-cuda>`_ * `Using the generated library <#using-the-generated-library>`_ Prerequisites ------------- * Windows 10 * `Windows 10 SDK `_ * `Visual Studio 2019 Community `_ v16.5.4.0 * `Visual Studio 2019 BuildTools `_ v16.5.4.0 * `TensorFlow Windows pre-requisites `_ Inside the Visual Studio Installer enable ``MS Build Tools`` and ``VC++ 2019 v16.00 (v160) toolset for desktop``. If you want to enable CUDA support you need to follow the steps in `the TensorFlow docs for building on Windows with CUDA `_. We highly recommend sticking to the recommended versions of CUDA/cuDNN in order to avoid compilation errors caused by incompatible versions. We only test with the versions recommended by TensorFlow. Getting the code ---------------- We need to clone ``mozilla/DeepSpeech``. .. code-block:: bash git clone https://github.com/mozilla/DeepSpeech git submodule sync tensorflow/ git submodule update --init tensorflow/ Configuring the paths --------------------- There should already be a symbolic link, for this example let's suppose that we cloned into ``D:\cloned`` and now the structure looks like: .. code-block:: . ├── D:\ │ ├── cloned # Contains DeepSpeech and tensorflow side by side │ │ └── DeepSpeech # Root of the cloned DeepSpeech │ │ ├── tensorflow # Root of the cloned mozilla/tensorflow └── ... Change your path accordingly to your path structure, for the structure above we are going to use the following command if the symbolic link does not exists: .. code-block:: bash mklink /d "D:\cloned\DeepSpeech\tensorflow\native_client" "D:\cloned\DeepSpeech\native_client" Adding environment variables ---------------------------- After you have installed the requirements there are few environment variables that we need to add to our ``PATH`` variable of the system variables. MSYS2 paths ~~~~~~~~~~~ For MSYS2 we need to add ``bin`` directory, if you installed in the default route the path that we need to add should looks like ``C:\msys64\usr\bin``. Now we can run ``pacman``: .. code-block:: bash pacman -Syu pacman -Su pacman -S patch unzip BAZEL path ~~~~~~~~~~ For BAZEL we need to add the path to the executable, make sure you rename the executable to ``bazel``. To check the version installed you can run: .. code-block:: bash bazel version PYTHON path ~~~~~~~~~~~ Add your ``python.exe`` path to the ``PATH`` variable. CUDA paths ~~~~~~~~~~ If you run CUDA enabled ``native_client`` we need to add the following to the ``PATH`` variable. .. code-block:: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\bin Building the native_client ^^^^^^^^^^^^^^^^^^^^^^^^^^ There's one last command to run before building, you need to run the `configure.py `_ inside ``tensorflow`` cloned directory. At this point we are ready to start building the ``native_client``, go to ``tensorflow`` sub-directory, following our examples should be ``D:\cloned\DeepSpeech\tensorflow``. CPU ~~~ We will add AVX/AVX2 support in the command, please make sure that your CPU supports these instructions before adding the flags, if not you can remove them. .. code-block:: bash bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --copt=/arch:AVX --copt=/arch:AVX2 //native_client:libdeepspeech.so GPU with CUDA ~~~~~~~~~~~~~ If you enabled CUDA in `configure.py `_ configuration command now you can add ``--config=cuda`` to compile with CUDA support. .. code-block:: bash bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --config=cuda --copt=/arch:AVX --copt=/arch:AVX2 //native_client:libdeepspeech.so Be patient, if you enabled AVX/AVX2 and CUDA it will take a long time. Finally you should see it stops and shows the path to the generated ``libdeepspeech.so``. Using the generated library --------------------------- As for now we can only use the generated ``libdeepspeech.so`` with the C# clients, go to `native_client/dotnet/ `_ in your DeepSpeech directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libdeepspeech.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory. ================================================ FILE: doc/C-API.rst ================================================ .. _c-usage: C API ===== .. toctree:: :maxdepth: 2 Structs See also the list of error codes including descriptions for each error in :ref:`error-codes`. .. doxygenfunction:: DS_CreateModel :project: deepspeech-c .. doxygenfunction:: DS_FreeModel :project: deepspeech-c .. doxygenfunction:: DS_EnableExternalScorer :project: deepspeech-c .. doxygenfunction:: DS_DisableExternalScorer :project: deepspeech-c .. doxygenfunction:: DS_AddHotWord :project: deepspeech-c .. doxygenfunction:: DS_EraseHotWord :project: deepspeech-c .. doxygenfunction:: DS_ClearHotWords :project: deepspeech-c .. doxygenfunction:: DS_SetScorerAlphaBeta :project: deepspeech-c .. doxygenfunction:: DS_GetModelSampleRate :project: deepspeech-c .. doxygenfunction:: DS_SpeechToText :project: deepspeech-c .. doxygenfunction:: DS_SpeechToTextWithMetadata :project: deepspeech-c .. doxygenfunction:: DS_CreateStream :project: deepspeech-c .. doxygenfunction:: DS_FeedAudioContent :project: deepspeech-c .. doxygenfunction:: DS_IntermediateDecode :project: deepspeech-c .. doxygenfunction:: DS_IntermediateDecodeWithMetadata :project: deepspeech-c .. doxygenfunction:: DS_FinishStream :project: deepspeech-c .. doxygenfunction:: DS_FinishStreamWithMetadata :project: deepspeech-c .. doxygenfunction:: DS_FreeStream :project: deepspeech-c .. doxygenfunction:: DS_FreeMetadata :project: deepspeech-c .. doxygenfunction:: DS_FreeString :project: deepspeech-c .. doxygenfunction:: DS_Version :project: deepspeech-c ================================================ FILE: doc/C-Examples.rst ================================================ C API Usage example =================== Examples are from `native_client/client.cc`. Creating a model instance and loading model ------------------------------------------- .. literalinclude:: ../native_client/client.cc :language: c :linenos: :lineno-match: :start-after: sphinx-doc: c_ref_model_start :end-before: sphinx-doc: c_ref_model_stop Performing inference -------------------- .. literalinclude:: ../native_client/client.cc :language: c :linenos: :lineno-match: :start-after: sphinx-doc: c_ref_inference_start :end-before: sphinx-doc: c_ref_inference_stop Full source code ---------------- See :download:`Full source code<../native_client/client.cc>`. ================================================ FILE: doc/Contributed-Examples.rst ================================================ User contributed examples ========================= There are also several user contributed examples available on a separate examples repository: `https://github.com/mozilla/DeepSpeech-examples `_. ================================================ FILE: doc/Decoder.rst ================================================ .. _decoder-docs: CTC beam search decoder ======================= Introduction ^^^^^^^^^^^^ DeepSpeech uses the `Connectionist Temporal Classification `_ loss function. For an excellent explanation of CTC and its usage, see this Distill article: `Sequence Modeling with CTC `_. This document assumes the reader is familiar with the concepts described in that article, and describes DeepSpeech specific behaviors that developers building systems with DeepSpeech should know to avoid problems. Note: Documentation for the tooling for creating custom scorer packages is available in :ref:`scorer-scripts`. The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in `BCP 14 `_ when, and only when, they appear in all capitals, as shown here. External scorer ^^^^^^^^^^^^^^^ DeepSpeech clients support OPTIONAL use of an external language model to improve the accuracy of the predicted transcripts. In the code, command line parameters, and documentation, this is referred to as a "scorer". The scorer is used to compute the likelihood (also called a score, hence the name "scorer") of sequences of words or characters in the output, to guide the decoder towards more likely results. This improves accuracy significantly. The use of an external scorer is fully optional. When an external scorer is not specified, DeepSpeech still uses a beam search decoding algorithm, but without any outside scoring. Currently, the DeepSpeech external scorer is implemented with `KenLM `_, plus some tooling to package the necessary files and metadata into a single ``.scorer`` package. The tooling lives in ``data/lm/``. The scripts included in ``data/lm/`` can be used and modified to build your own language model based on your particular use case or language. See :ref:`scorer-scripts` for more details on how to reproduce our scorer file as well as create your own. The scripts are geared towards replicating the language model files we release as part of `DeepSpeech model releases `_, but modifying them to use different datasets or language model construction parameters should be simple. Decoding modes ^^^^^^^^^^^^^^ DeepSpeech currently supports two modes of operation with significant differences at both training and decoding time. Note that Bytes output mode is experimental and has not been tested for languages other than Chinese Mandarin. Default mode (alphabet based) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The default mode, which uses an alphabet file (specified with ``--alphabet_config_path`` at training and export time) to determine which labels (characters), and how many of them, to predict in the output layer. At decoding time, if using an external scorer, it MUST be word based and MUST be built using the same alphabet file used for training. Word based means the text corpus used to build the scorer should contain words separated by whitespace. For most western languages, this is the default and requires no special steps from the developer when creating the scorer. Bytes output mode ^^^^^^^^^^^^^^^^^ **Note**: Currently, Bytes output mode makes assumptions that hold for Chinese Mandarin models but do not hold for other language targets, such as not predicting spaces. In bytes output mode the model predicts UTF-8 bytes directly instead of letters from an alphabet file. This idea was proposed in the paper `Bytes Are All You Need `_. This mode is enabled with the ``--bytes_output_mode`` flag at training and export time. At training time, the alphabet file is not used. Instead, the model is forced to have 256 labels, with labels 0-254 corresponding to UTF-8 byte values 1-255, and label 255 is used for the CTC blank symbol. If using an external scorer at decoding time, it MUST be built according to the instructions that follow. Bytes output mode can be useful for languages with very large alphabets, such as Mandarin written with Simplified Chinese characters. It may also be useful for building multi-language models, or as a base for transfer learning. Currently these cases are untested and unsupported. Note that bytes output mode makes assumptions that hold for Mandarin written with Simplified Chinese characters and may not hold for other languages. UTF-8 scorers are character based (more specifically, Unicode codepoint based), but the way they are used is similar to a word based scorer where each "word" is a sequence of UTF-8 bytes representing a single Unicode codepoint. This means that the input text used to create UTF-8 scorers should contain space separated Unicode codepoints. For example, the following input text: ``早 上 好`` corresponds to the following three "words", or UTF-8 byte sequences: ``E6 97 A9`` ``E4 B8 8A`` ``E5 A5 BD`` At decoding time, the scorer is queried every time a Unicode codepoint is predicted, instead of when a space character is predicted. From the language modeling perspective, this is a character based model. From the implementation perspective, this is a word based model, because each character is composed of multiple labels. **Acoustic models trained with ``--bytes_output_mode`` MUST NOT be used with an alphabet based scorer. Conversely, acoustic models trained with an alphabet file MUST NOT be used with a UTF-8 scorer.** UTF-8 scorers can be built by using an input corpus with space separated codepoints. If your corpus only contains single codepoints separated by spaces, ``generate_scorer_package`` should automatically enable bytes output mode, and it should print the message "Looks like a character based model." If the message "Doesn't look like a character based model." is printed, you should double check your inputs to make sure it only contains single codepoints separated by spaces. Bytes output mode can be forced by specifying the ``--force_bytes_output_mode`` flag when running ``generate_scorer_package``, but it is NOT RECOMMENDED. See :ref:`scorer-scripts` for more details on using ``generate_scorer_package``. Because KenLM uses spaces as a word separator, the resulting language model will not include space characters in it. If you wish to use bytes output mode but still model spaces, you need to replace spaces in the input corpus with a different character **before** converting it to space separated codepoints. For example: .. code-block:: python input_text = 'The quick brown fox jumps over the lazy dog' spaces_replaced = input_text.replace(' ', '|') space_separated = ' '.join(spaces_replaced) print(space_separated) # T h e | q u i c k | b r o w n | f o x | j u m p s | o v e r | t h e | l a z y | d o g The character, '|' in this case, will then have to be replaced with spaces as a post-processing step after decoding. Implementation ^^^^^^^^^^^^^^ The decoder source code can be found in ``native_client/ctcdecode``. The decoder is included in the language bindings and clients. In addition, there is a separate Python module which includes just the decoder and is needed for evaluation. A pre-built version of this package is automatically downloaded and installed when installing the training code. If you want or need to manually build and install it from source, see the :github:`native_client README `. ================================================ FILE: doc/DeepSpeech.rst ================================================ DeepSpeech Model ================ The aim of this project is to create a simple, open, and ubiquitous speech recognition engine. Simple, in that the engine should not require server-class hardware to execute. Open, in that the code and models are released under the Mozilla Public License. Ubiquitous, in that the engine should run on many platforms and have bindings to many different languages. The architecture of the engine was originally motivated by that presented in `Deep Speech: Scaling up end-to-end speech recognition `_. However, the engine currently differs in many respects from the engine it was originally motivated by. The core of the engine is a recurrent neural network (RNN) trained to ingest speech spectrograms and generate English text transcriptions. Let a single utterance :math:`x` and label :math:`y` be sampled from a training set .. math:: S = \{(x^{(1)}, y^{(1)}), (x^{(2)}, y^{(2)}), . . .\}. Each utterance, :math:`x^{(i)}` is a time-series of length :math:`T^{(i)}` where every time-slice is a vector of audio features, :math:`x^{(i)}_t` where :math:`t=1,\ldots,T^{(i)}`. We use MFCC's as our features; so :math:`x^{(i)}_{t,p}` denotes the :math:`p`-th MFCC feature in the audio frame at time :math:`t`. The goal of our RNN is to convert an input sequence :math:`x` into a sequence of character probabilities for the transcription :math:`y`, with :math:`\hat{y}_t =\mathbb{P}(c_t \mid x)`, where for English :math:`c_t \in \{a,b,c, . . . , z, space, apostrophe, blank\}`. (The significance of :math:`blank` will be explained below.) Our RNN model is composed of :math:`5` layers of hidden units. For an input :math:`x`, the hidden units at layer :math:`l` are denoted :math:`h^{(l)}` with the convention that :math:`h^{(0)}` is the input. The first three layers are not recurrent. For the first layer, at each time :math:`t`, the output depends on the MFCC frame :math:`x_t` along with a context of :math:`C` frames on each side. (We use :math:`C = 9` for our experiments.) The remaining non-recurrent layers operate on independent data for each time step. Thus, for each time :math:`t`, the first :math:`3` layers are computed by: .. math:: h^{(l)}_t = g(W^{(l)} h^{(l-1)}_t + b^{(l)}) where :math:`g(z) = \min\{\max\{0, z\}, 20\}` is a clipped rectified-linear (ReLu) activation function and :math:`W^{(l)}`, :math:`b^{(l)}` are the weight matrix and bias parameters for layer :math:`l`. The fourth layer is a recurrent layer `[1] `_. This layer includes a set of hidden units with forward recurrence, :math:`h^{(f)}`: .. math:: h^{(f)}_t = g(W^{(4)} h^{(3)}_t + W^{(f)}_r h^{(f)}_{t-1} + b^{(4)}) Note that :math:`h^{(f)}` must be computed sequentially from :math:`t = 1` to :math:`t = T^{(i)}` for the :math:`i`-th utterance. The fifth (non-recurrent) layer takes the forward units as inputs .. math:: h^{(5)} = g(W^{(5)} h^{(f)} + b^{(5)}). The output layer is standard logits that correspond to the predicted character probabilities for each time slice :math:`t` and character :math:`k` in the alphabet: .. math:: h^{(6)}_{t,k} = \hat{y}_{t,k} = (W^{(6)} h^{(5)}_t)_k + b^{(6)}_k Here :math:`b^{(6)}_k` denotes the :math:`k`-th bias and :math:`(W^{(6)} h^{(5)}_t)_k` the :math:`k`-th element of the matrix product. Once we have computed a prediction for :math:`\hat{y}_{t,k}`, we compute the CTC loss `[2] `_ :math:`\cal{L}(\hat{y}, y)` to measure the error in prediction. (The CTC loss requires the :math:`blank` above to indicate transitions between characters.) During training, we can evaluate the gradient :math:`\nabla \cal{L}(\hat{y}, y)` with respect to the network outputs given the ground-truth character sequence :math:`y`. From this point, computing the gradient with respect to all of the model parameters may be done via back-propagation through the rest of the network. We use the Adam method for training `[3] `_. The complete RNN model is illustrated in the figure below. .. image:: ../images/rnn_fig-624x598.png :alt: DeepSpeech BRNN ================================================ FILE: doc/DotNet-API.rst ================================================ .NET Framework ============== DeepSpeech Class ---------------- .. doxygenclass:: DeepSpeechClient::DeepSpeech :project: deepspeech-dotnet :members: DeepSpeechStream Class ---------------------- .. doxygenclass:: DeepSpeechClient::Models::DeepSpeechStream :project: deepspeech-dotnet :members: ErrorCodes ---------- See also the main definition including descriptions for each error in :ref:`error-codes`. .. doxygenenum:: DeepSpeechClient::Enums::ErrorCodes :project: deepspeech-dotnet Metadata -------- .. doxygenclass:: DeepSpeechClient::Models::Metadata :project: deepspeech-dotnet :members: Transcripts CandidateTranscript ------------------- .. doxygenclass:: DeepSpeechClient::Models::CandidateTranscript :project: deepspeech-dotnet :members: Tokens, Confidence TokenMetadata ------------- .. doxygenclass:: DeepSpeechClient::Models::TokenMetadata :project: deepspeech-dotnet :members: Text, Timestep, StartTime DeepSpeech Interface -------------------- .. doxygeninterface:: DeepSpeechClient::Interfaces::IDeepSpeech :project: deepspeech-dotnet :members: ================================================ FILE: doc/DotNet-Examples.rst ================================================ .NET API Usage example ====================== Examples are from `native_client/dotnet/DeepSpeechConsole/Program.cs`. Creating a model instance and loading model ------------------------------------------- .. literalinclude:: ../native_client/dotnet/DeepSpeechConsole/Program.cs :language: csharp :linenos: :lineno-match: :start-after: sphinx-doc: csharp_ref_model_start :end-before: sphinx-doc: csharp_ref_model_stop Performing inference -------------------- .. literalinclude:: ../native_client/dotnet/DeepSpeechConsole/Program.cs :language: csharp :linenos: :lineno-match: :start-after: sphinx-doc: csharp_ref_inference_start :end-before: sphinx-doc: csharp_ref_inference_stop Full source code ---------------- See :download:`Full source code<../native_client/dotnet/DeepSpeechConsole/Program.cs>`. ================================================ FILE: doc/Error-Codes.rst ================================================ .. _error-codes: Error codes =========== Below is the definition for all error codes used in the API, their numerical values, and a human readable description. .. literalinclude:: ../native_client/deepspeech.h :language: c :start-after: sphinx-doc: error_code_listing_start :end-before: sphinx-doc: error_code_listing_end ================================================ FILE: doc/Flags.rst ================================================ .. _training-flags: Command-line flags for the training scripts =========================================== Below you can find the definition of all command-line flags supported by the training scripts. This includes ``DeepSpeech.py``, ``evaluate.py``, ``evaluate_tflite.py``, ``transcribe.py`` and ``lm_optimizer.py``. Flags ----- .. literalinclude:: ../training/deepspeech_training/util/flags.py :language: python :linenos: :lineno-match: :start-after: sphinx-doc: training_ref_flags_start :end-before: sphinx-doc: training_ref_flags_end ================================================ FILE: doc/Geometry.rst ================================================ Geometric Constants =================== This is about several constants related to the geometry of the network. n_input ------- Each of the at maximum ``n_steps`` vectors is a vector of MFCC features of a time-slice of the speech sample. We will make the number of MFCC features dependent upon the sample rate of the data set. Generically, if the sample rate is 8kHz we use 13 features. If the sample rate is 16kHz we use 26 features... We capture the dimension of these vectors, equivalently the number of MFCC features, in the variable ``n_input``. By default ``n_input`` is 26. n_context --------- As previously mentioned, the RNN is not simply fed the MFCC features of a given time-slice. It is fed, in addition, a context of :math:`C` frames on either side of the frame in question. The number of frames in this context is captured in the variable ``n_context``. By default ``n_context`` is 9. Next we will introduce constants that specify the geometry of some of the non-recurrent layers of the network. We do this by simply specifying the number of units in each of the layers. n_hidden_1, n_hidden_2, n_hidden_5 ---------------------------------- ``n_hidden_1`` is the number of units in the first layer, ``n_hidden_2`` the number of units in the second, and ``n_hidden_5`` the number in the fifth. We haven't forgotten about the third or sixth layer. We will define their unit count below. The RNN consists of an LSTM RNN that works "forward in time": .. image:: ../images/LSTM3-chain.png :alt: Image shows a diagram of a recurrent neural network with LSTM cells, with arrows depicting the flow of data from earlier time steps to later timesteps within the RNN. The dimension of the cell state, the upper line connecting subsequent LSTM units, is independent of the input dimension. n_cell_dim ---------- Hence, we are free to choose the dimension of this cell state independent of the input dimension. We capture the cell state dimension in the variable ``n_cell_dim``. n_hidden_3 ---------- The number of units in the third layer, which feeds in to the LSTM, is determined by ``n_cell_dim`` as follows .. code:: python n_hidden_3 = n_cell_dim n_hidden_6 ----------- The variable ``n_hidden_6`` will hold the number of characters in the target language plus one, for the :math:`blank`. For English it is the cardinality of the set .. math:: \{a,b,c, . . . , z, space, apostrophe, blank\} we referred to earlier. ================================================ FILE: doc/HotWordBoosting-Examples.rst ================================================ Hot-word boosting API Usage example =================================== With DeepSpeech 0.9 release a new API feature was introduced that allows boosting probability from the scorer of given words. It is exposed in all bindings (C, Python, JS, Java and .Net). Currently, it provides three methods for the Model class: - ``AddHotWord(word, boost)`` - ``EraseHotWord(word)`` - ``ClearHotWords()`` Exact API binding for the language you are using can be found in API Reference. General usage ------------- It is worth noting that boosting non-existent words in scorer (mostly proper nouns) or a word that share no phonetic prefix with other word in the input audio don't change the final transcription. Additionally, hot-word that has a space will not be taken into consideration, meaning that combination of words can not be boosted and each word must be added as hot-word separately. Adjusting the boosting value ---------------------------- For hot-word boosting it is hard to determine what the optimal value that one might be searching for is. Additionally, this is dependant on the input audio file. In practice, as it was reported by DeepSpeech users, the value should be not bigger than 20.0 for positive value boosting. Nevertheless, each usecase is different and you might need to adjust values on your own. There is a user contributed script available on ``DeepSpeech-examples`` repository for adjusting boost values: `https://github.com/mozilla/DeepSpeech-examples/tree/master/hotword_adjusting `_. Positive value boosting ----------------------- By adding a positive boost value to one of the words it is possible to increase the probability of the word occurence. This is particularly useful for detecting speech that is expected by the system. In the output, overextensive positive boost value (e.g. 250.0 but it does vary) may cause a word following the boosted hot-word to be split into separate letters. This problem is related to the scorer structure and currently only way to avoid it is to tune boost to a lower value. Negative value boosting ----------------------- Respectively, applying negative boost value might cause the selected word to occur less frequently. Keep in mind that words forming similar sound of a boosted word might be used instead (e.g. homophones "accept" as "except") or it will be split into separate parts (e.g. "another" into "an other"). Previously mentioned problem where extensive boost value caused letter splitting doesn't arise for negative boost values. Example ------- To use hot-word boosting just add hot-words of your choice before performing an inference to a ``Model``. You can also erase boosting of a chosen word or clear it for all hot-words. .. code-block:: python ds = Model(args.model) ... ds.addHotWord(word, boosting) ... print(ds.stt(audio)) Adding boost value to a word repeatedly or erasing hot-word without previously boosting it results in an error. ================================================ FILE: doc/Java-API.rst ================================================ Java ==== DeepSpeechModel --------------- .. doxygenclass:: org::deepspeech::libdeepspeech::DeepSpeechModel :project: deepspeech-java :members: Metadata -------- .. doxygenclass:: org::deepspeech::libdeepspeech::Metadata :project: deepspeech-java :members: getNumTranscripts, getTranscript CandidateTranscript ------------------- .. doxygenclass:: org::deepspeech::libdeepspeech::CandidateTranscript :project: deepspeech-java :members: getNumTokens, getConfidence, getToken TokenMetadata ------------- .. doxygenclass:: org::deepspeech::libdeepspeech::TokenMetadata :project: deepspeech-java :members: getText, getTimestep, getStartTime ================================================ FILE: doc/Java-Examples.rst ================================================ Java API Usage example ====================== Examples are from `native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java`. Creating a model instance and loading model ------------------------------------------- .. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java :language: java :linenos: :lineno-match: :start-after: sphinx-doc: java_ref_model_start :end-before: sphinx-doc: java_ref_model_stop Performing inference -------------------- .. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java :language: java :linenos: :lineno-match: :start-after: sphinx-doc: java_ref_inference_start :end-before: sphinx-doc: java_ref_inference_stop Full source code ---------------- See :download:`Full source code<../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java>`. ================================================ FILE: doc/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = DeepSpeech SOURCEDIR = . BUILDDIR = .build PIP_INSTALL ?= pip3 install --user # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help pip3 Makefile pip3: $(PIP_INSTALL) -r ../ci_scripts/docs-requirements.txt submodule: git submodule update --init --remote -- ../doc/examples # Add submodule update dependency to Sphinx's "html" target html: Makefile submodule pip3 @PATH=$$HOME/.local/bin:`pwd`/../node_modules/.bin/:$$PATH \ $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) dist: html cd $(BUILDDIR)/html/ && zip -r9 ../../html.zip * # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile pip3 @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: doc/NodeJS-API.rst ================================================ JavaScript (NodeJS / ElectronJS) ================================ Model ----- .. js:autoclass:: Model :members: Stream ------ .. js:autoclass:: StreamImpl :members: Module exported methods ----------------------- .. js:autofunction:: FreeModel .. js:autofunction:: FreeStream .. js:autofunction:: FreeMetadata .. js:autofunction:: Version Metadata -------- .. js:autoclass:: Metadata :members: CandidateTranscript ------------------- .. js:autoclass:: CandidateTranscript :members: TokenMetadata ------------- .. js:autoclass:: TokenMetadata :members: ================================================ FILE: doc/NodeJS-Examples.rst ================================================ .. _js-api-example: JavaScript API Usage example ============================= Examples are from `native_client/javascript/client.ts`. Creating a model instance and loading model ------------------------------------------- .. literalinclude:: ../native_client/javascript/client.ts :language: javascript :linenos: :lineno-match: :start-after: sphinx-doc: js_ref_model_start :end-before: sphinx-doc: js_ref_model_stop Performing inference -------------------- .. literalinclude:: ../native_client/javascript/client.ts :language: javascript :linenos: :lineno-match: :start-after: sphinx-doc: js_ref_inference_start :end-before: sphinx-doc: js_ref_inference_stop Full source code ---------------- See :download:`Full source code<../native_client/javascript/client.ts>`. ================================================ FILE: doc/ParallelOptimization.rst ================================================ Parallel Optimization ===================== This is how we implement optimization of the DeepSpeech model across GPUs on a single host. Parallel optimization can take on various forms. For example one can use asynchronous updates of the model, synchronous updates of the model, or some combination of the two. Asynchronous Parallel Optimization ---------------------------------- In asynchronous parallel optimization, for example, one places the model initially in CPU memory. Then each of the :math:`G` GPUs obtains a mini-batch of data along with the current model parameters. Using this mini-batch each GPU then computes the gradients for all model parameters and sends these gradients back to the CPU when the GPU is done with its mini-batch. The CPU then asynchronously updates the model parameters whenever it receives a set of gradients from a GPU. Asynchronous parallel optimization has several advantages and several disadvantages. One large advantage is throughput. No GPU will ever be waiting idle. When a GPU is done processing a mini-batch, it can immediately obtain the next mini-batch to process. It never has to wait on other GPUs to finish their mini-batch. However, this means that the model updates will also be asynchronous which can have problems. For example, one may have model parameters :math:`W` on the CPU and send mini-batch :math:`n` to GPU 1 and send mini-batch :math:`n+1` to GPU 2. As processing is asynchronous, GPU 2 may finish before GPU 1 and thus update the CPU's model parameters :math:`W` with its gradients :math:`\Delta W_{n+1}(W)`, where the subscript :math:`n+1` identifies the mini-batch and the argument :math:`W` the location at which the gradient was evaluated. This results in the new model parameters .. math:: W + \Delta W_{n+1}(W). Next GPU 1 could finish with its mini-batch and update the parameters to .. math:: W + \Delta W_{n+1}(W) + \Delta W_{n}(W). The problem with this is that :math:`\Delta W_{n}(W)` is evaluated at :math:`W` and not :math:`W + \Delta W_{n+1}(W)`. Hence, the direction of the gradient :math:`\Delta W_{n}(W)` is slightly incorrect as it is evaluated at the wrong location. This can be counteracted through synchronous updates of model, but this is also problematic. Synchronous Optimization ------------------------ Synchronous optimization solves the problem we saw above. In synchronous optimization, one places the model initially in CPU memory. Then one of the `G` GPUs is given a mini-batch of data along with the current model parameters. Using the mini-batch the GPU computes the gradients for all model parameters and sends the gradients back to the CPU. The CPU then updates the model parameters and starts the process of sending out the next mini-batch. As on can readily see, synchronous optimization does not have the problem we found in the last section, that of incorrect gradients. However, synchronous optimization can only make use of a single GPU at a time. So, when we have a multi-GPU setup, :math:`G > 1`, all but one of the GPUs will remain idle, which is unacceptable. However, there is a third alternative which is combines the advantages of asynchronous and synchronous optimization. Hybrid Parallel Optimization ---------------------------- Hybrid parallel optimization combines most of the benefits of asynchronous and synchronous optimization. It allows for multiple GPUs to be used, but does not suffer from the incorrect gradient problem exhibited by asynchronous optimization. In hybrid parallel optimization one places the model initially in CPU memory. Then, as in asynchronous optimization, each of the :math:`G` GPUs obtains a mini-batch of data along with the current model parameters. Using the mini-batch each of the GPUs then computes the gradients for all model parameters and sends these gradients back to the CPU. Now, in contrast to asynchronous optimization, the CPU waits until each GPU is finished with its mini-batch then takes the mean of all the gradients from the :math:`G` GPUs and updates the model with this mean gradient. .. image:: ../images/Parallelism.png :alt: Image shows a diagram with arrows displaying the flow of information between devices during training. A CPU device sends weights and gradients to one or more GPU devices, which run an optimization step and then return the new parameters to the CPU, which averages them and starts a new training iteration. Hybrid parallel optimization has several advantages and few disadvantages. As in asynchronous parallel optimization, hybrid parallel optimization allows for one to use multiple GPUs in parallel. Furthermore, unlike asynchronous parallel optimization, the incorrect gradient problem is not present here. In fact, hybrid parallel optimization performs as if one is working with a single mini-batch which is :math:`G` times the size of a mini-batch handled by a single GPU. However, hybrid parallel optimization is not perfect. If one GPU is slower than all the others in completing its mini-batch, all other GPUs will have to sit idle until this straggler finishes with its mini-batch. This hurts throughput. But, if all GPUs are of the same make and model, this problem should be minimized. So, relatively speaking, hybrid parallel optimization seems the have more advantages and fewer disadvantages as compared to both asynchronous and synchronous optimization. So, we will, for our work, use this hybrid model. Adam Optimization ----------------- In contrast to `Deep Speech: Scaling up end-to-end speech recognition `_, in which `Nesterov’s Accelerated Gradient Descent `_ was used, we will use the Adam method for optimization `[3] `_, because, generally, it requires less fine-tuning. ================================================ FILE: doc/Python-API.rst ================================================ Python ====== .. automodule:: native_client.python Model ----- .. autoclass:: Model :members: Stream ------ .. autoclass:: Stream :members: Metadata -------- .. autoclass:: Metadata :members: CandidateTranscript ------------------- .. autoclass:: CandidateTranscript :members: TokenMetadata ------------- .. autoclass:: TokenMetadata :members: ================================================ FILE: doc/Python-Examples.rst ================================================ .. _py-api-example: Python API Usage example ======================== Examples are from `native_client/python/client.py`. Creating a model instance and loading model ------------------------------------------- .. literalinclude:: ../native_client/python/client.py :language: python :linenos: :lineno-match: :start-after: sphinx-doc: python_ref_model_start :end-before: sphinx-doc: python_ref_model_stop Performing inference -------------------- .. literalinclude:: ../native_client/python/client.py :language: python :linenos: :lineno-match: :start-after: sphinx-doc: python_ref_inference_start :end-before: sphinx-doc: python_ref_inference_stop Full source code ---------------- See :download:`Full source code<../native_client/python/client.py>`. ================================================ FILE: doc/SUPPORTED_PLATFORMS.rst ================================================ .. _supported-platforms-inference: Supported platforms for inference ================================= Here we maintain the list of supported platforms for running inference. Linux / AMD64 without GPU ^^^^^^^^^^^^^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8) * Full TensorFlow runtime (``deepspeech`` packages) * TensorFlow Lite runtime (``deepspeech-tflite`` packages) Linux / AMD64 with GPU ^^^^^^^^^^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8) * CUDA 10.0 (and capable GPU) * Full TensorFlow runtime (``deepspeech`` packages) * TensorFlow Lite runtime (``deepspeech-tflite`` packages) Linux / ARMv7 ^^^^^^^^^^^^^ * Cortex-A53 compatible ARMv7 SoC with Neon support * Raspbian Buster-compatible distribution * TensorFlow Lite runtime (``deepspeech-tflite`` packages) Linux / Aarch64 ^^^^^^^^^^^^^^^ * Cortex-A72 compatible Aarch64 SoC * ARMbian Buster-compatible distribution * TensorFlow Lite runtime (``deepspeech-tflite`` packages) Android / ARMv7 ^^^^^^^^^^^^^^^ * ARMv7 SoC with Neon support * Android 7.0-10.0 * NDK API level >= 21 * TensorFlow Lite runtime (``deepspeech-tflite`` packages) Android / Aarch64 ^^^^^^^^^^^^^^^^^ * Aarch64 SoC * Android 7.0-10.0 * NDK API level >= 21 * TensorFlow Lite runtime (``deepspeech-tflite`` packages) macOS / AMD64 ^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * macOS >= 10.10 * Full TensorFlow runtime (``deepspeech`` packages) * TensorFlow Lite runtime (``deepspeech-tflite`` packages) Windows / AMD64 without GPU ^^^^^^^^^^^^^^^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * Windows Server >= 2012 R2 ; Windows >= 8.1 * Full TensorFlow runtime (``deepspeech`` packages) * TensorFlow Lite runtime (``deepspeech-tflite`` packages) Windows / AMD64 with GPU ^^^^^^^^^^^^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * Windows Server >= 2012 R2 ; Windows >= 8.1 * CUDA 10.0 (and capable GPU) * Full TensorFlow runtime (``deepspeech`` packages) * TensorFlow Lite runtime (``deepspeech-tflite`` packages) ================================================ FILE: doc/Scorer.rst ================================================ .. _scorer-scripts: External scorer scripts ======================= DeepSpeech pre-trained models include an external scorer. This document explains how to reproduce our external scorer, as well as adapt the scripts to create your own. The scorer is composed of two sub-components, a KenLM language model and a trie data structure containing all words in the vocabulary. In order to create the scorer package, first we must create a KenLM language model (using ``data/lm/generate_lm.py``, and then use ``generate_scorer_package`` to create the final package file including the trie data structure. The ``generate_scorer_package`` binary is part of the native client package that is included with official releases. You can find the appropriate archive for your platform in the `GitHub release downloads `_. The native client package is named ``native_client.{arch}.{config}.{plat}.tar.xz``, where ``{arch}`` is the architecture the binary was built for, for example ``amd64`` or ``arm64``, ``config`` is the build configuration, which for building decoder packages does not matter, and ``{plat}`` is the platform the binary was built-for, for example ``linux`` or ``osx``. If you wanted to run the ``generate_scorer_package`` binary on a Linux desktop, you would download ``native_client.amd64.cpu.linux.tar.xz``. Reproducing our external scorer ------------------------------- Our KenLM language model was generated from the LibriSpeech normalized LM training text, available `here `_. It is created with `KenLM `_. You can download the LibriSpeech corpus with the following command: .. code-block:: bash cd data/lm wget http://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz Then use the ``generate_lm.py`` script to generate ``lm.binary`` and ``vocab-500000.txt``. As input you can use a plain text (e.g. ``file.txt``) or gzipped (e.g. ``file.txt.gz``) text file with one sentence in each line. If you are using a container created from ``Dockerfile.build``, you can use ``--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/``. Else you have to build `KenLM `_ first and then pass the build directory to the script. .. code-block:: bash cd data/lm python3 generate_lm.py --input_txt librispeech-lm-norm.txt.gz --output_dir . \ --top_k 500000 --kenlm_bins path/to/kenlm/build/bin/ \ --arpa_order 5 --max_arpa_memory "85%" --arpa_prune "0|0|1" \ --binary_a_bits 255 --binary_q_bits 8 --binary_type trie Afterwards you can use ``generate_scorer_package`` to generate the scorer package using the ``lm.binary`` and ``vocab-500000.txt`` files: .. code-block:: bash cd data/lm # Download and extract appropriate native_client package: curl -LO http://github.com/mozilla/DeepSpeech/releases/... tar xvf native_client.*.tar.xz ./generate_scorer_package --alphabet ../alphabet.txt --lm lm.binary --vocab vocab-500000.txt \ --package kenlm.scorer --default_alpha 0.931289039105002 --default_beta 1.1834137581510284 The ``generate_scorer_package`` binary is part of the released ``native_client.tar.xz``. If for some reason you need to rebuild it, please refer to how to :ref:`build-generate-scorer-package`. Building your own scorer ------------------------ Building your own scorer can be useful if you're using models in a narrow usage context, with a more limited vocabulary, for example. Building a scorer requires text data matching your intended use case, which must be formatted in a text file with one sentence per line. The LibriSpeech LM training text used by our scorer is around 4GB uncompressed, which should give an idea of the size of a corpus needed for a reasonable language model for general speech recognition. For more constrained use cases with smaller vocabularies, you don't need as much data, but you should still try to gather as much as you can. With a text corpus in hand, you can then re-use ``generate_lm.py`` and ``generate_scorer_package`` to create your own scorer that is compatible with DeepSpeech clients and language bindings. Before building the language model, you must first familiarize yourself with the `KenLM toolkit `_. Most of the options exposed by the ``generate_lm.py`` script are simply forwarded to KenLM options of the same name, so you must read the KenLM documentation in order to fully understand their behavior. After using ``generate_lm.py`` to create a KenLM language model binary file, you can use ``generate_scorer_package`` to create a scorer package as described in the previous section. Note that we have a :github:`lm_optimizer.py script ` which can be used to find good default values for alpha and beta. To use it, you must first generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py`` later. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_scorer_package`` again, this time with the new values. ================================================ FILE: doc/Structs.rst ================================================ Data structures =============== Metadata -------- .. doxygenstruct:: Metadata :project: deepspeech-c :members: CandidateTranscript ------------------- .. doxygenstruct:: CandidateTranscript :project: deepspeech-c :members: TokenMetadata ------------- .. doxygenstruct:: TokenMetadata :project: deepspeech-c :members: ================================================ FILE: doc/TRAINING.rst ================================================ .. _training-docs: Training Your Own Model ======================= .. _cuda-training-deps: Prerequisites for training a model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * `Python 3.6 `_ * Mac or Linux environment * CUDA 10.0 / CuDNN v7.6 per `Dockerfile `_. Getting the training code ^^^^^^^^^^^^^^^^^^^^^^^^^ Clone the latest released stable branch from Github (e.g. 0.9.3, check `here `_): .. code-block:: bash git clone --branch v0.9.3 https://github.com/mozilla/DeepSpeech If you plan on committing code or you want to report bugs, please use the master branch. Creating a virtual environment ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Throughout the documentation we assume you are using **virtualenv** to manage your Python environments. This setup is the one used and recommended by the project authors and is the easiest way to make sure you won't run into environment issues. If you're using **Anaconda, Miniconda or Mamba**, first read the instructions at :ref:`training-with-conda` and then continue from the installation step below. In creating a virtual environment you will create a directory containing a ``python3`` binary and everything needed to run deepspeech. You can use whatever directory you want. For the purpose of the documentation, we will rely on ``$HOME/tmp/deepspeech-train-venv``. You can create it using this command: .. code-block:: $ python3 -m venv $HOME/tmp/deepspeech-train-venv/ Once this command completes successfully, the environment will be ready to be activated. Activating the environment ^^^^^^^^^^^^^^^^^^^^^^^^^^ Each time you need to work with DeepSpeech, you have to *activate* this virtual environment. This is done with this simple command: .. code-block:: $ source $HOME/tmp/deepspeech-train-venv/bin/activate Installing DeepSpeech Training Code and its dependencies ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Install the required dependencies using ``pip3``\ : .. code-block:: bash cd DeepSpeech pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0 pip3 install --upgrade -e . Remember to re-run the last ``pip3 install`` command above when you update the training code (for example by pulling new changes), in order to update any dependencies. The ``webrtcvad`` Python package might require you to ensure you have proper tooling to build Python modules: .. code-block:: bash sudo apt-get install python3-dev Recommendations ^^^^^^^^^^^^^^^ If you have a capable (NVIDIA, at least 8GB of VRAM) GPU, it is highly recommended to install TensorFlow with GPU support. Training will be significantly faster than using the CPU. To enable GPU support, you can do: .. code-block:: bash pip3 uninstall tensorflow pip3 install 'tensorflow-gpu==1.15.4' Please ensure you have the required `CUDA dependency `_ and/or :ref:`Prerequisites `. It has been reported for some people failure at training: .. code-block:: tensorflow.python.framework.errors_impl.UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [[{{node tower_0/conv1d/Conv2D}}]] Setting the ``TF_FORCE_GPU_ALLOW_GROWTH`` environment variable to ``true`` seems to help in such cases. This could also be due to an incorrect version of libcudnn. Double check your versions with the :ref:`TensorFlow 1.15 documentation `. Basic Dockerfile for training ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ We provide ``Dockerfile.train`` to automatically set up a basic training environment in Docker. You need to generate the Dockerfile from the template using: This should ensure that you'll re-use the upstream Python 3 TensorFlow GPU-enabled Docker image. .. code-block:: bash make Dockerfile.train If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: .. code-block:: bash make Dockerfile.train DEEPSPEECH_REPO=git://your/fork DEEPSPEECH_SHA=origin/your-branch Common Voice training data ^^^^^^^^^^^^^^^^^^^^^^^^^^ The Common Voice corpus consists of voice samples that were donated through Mozilla's `Common Voice `_ Initiative. You can download individual CommonVoice v2.0 language data sets from `here `_. After extraction of such a data set, you'll find the following contents: * the ``*.tsv`` files output by CorporaCreator for the downloaded language * the mp3 audio files they reference in a ``clips`` sub-directory. For bringing this data into a form that DeepSpeech understands, you have to run the CommonVoice v2.0 importer (\ ``bin/import_cv2.py``\ ): .. code-block:: bash bin/import_cv2.py --filter_alphabet path/to/some/alphabet.txt /path/to/extracted/language/archive Providing a filter alphabet is optional. It will exclude all samples whose transcripts contain characters not in the specified alphabet. Running the importer with ``-h`` will show you some additional options. Once the import is done, the ``clips`` sub-directory will contain for each required ``.mp3`` an additional ``.wav`` file. It will also add the following ``.csv`` files: * ``clips/train.csv`` * ``clips/dev.csv`` * ``clips/test.csv`` The CSV files comprise of the following fields: * ``wav_filename`` - path of the sample, either absolute or relative. Here, the importer produces relative paths. * ``wav_filesize`` - samples size given in bytes, used for sorting the data before training. Expects integer. * ``transcript`` - transcription target for the sample. To use Common Voice data during training, validation and testing, you pass (comma separated combinations of) their filenames into ``--train_files``\ , ``--dev_files``\ , ``--test_files`` parameters of ``DeepSpeech.py``. If, for example, Common Voice language ``en`` was extracted to ``../data/CV/en/``\ , ``DeepSpeech.py`` could be called like this: .. code-block:: bash python3 DeepSpeech.py --train_files ../data/CV/en/clips/train.csv --dev_files ../data/CV/en/clips/dev.csv --test_files ../data/CV/en/clips/test.csv Training a model ^^^^^^^^^^^^^^^^ The central (Python) script is ``DeepSpeech.py`` in the project's root directory. For its list of command line options, you can call: .. code-block:: bash python3 DeepSpeech.py --helpfull To get the output of this in a slightly better-formatted way, you can also look at the flag definitions in :ref:`training-flags`. For executing pre-configured training scenarios, there is a collection of convenience scripts in the ``bin`` folder. Most of them are named after the corpora they are configured for. Keep in mind that most speech corpora are *very large*, on the order of tens of gigabytes, and some aren't free. Downloading and preprocessing them can take a very long time, and training on them without a fast GPU (GTX 10 series or newer recommended) takes even longer. **If you experience GPU OOM errors while training, try reducing the batch size with the ``--train_batch_size``\ , ``--dev_batch_size`` and ``--test_batch_size`` parameters.** As a simple first example you can open a terminal, change to the directory of the DeepSpeech checkout, activate the virtualenv created above, and run: .. code-block:: bash ./bin/run-ldc93s1.sh This script will train on a small sample dataset composed of just a single audio file, the sample file for the `TIMIT Acoustic-Phonetic Continuous Speech Corpus `_, which can be overfitted on a GPU in a few minutes for demonstration purposes. From here, you can alter any variables with regards to what dataset is used, how many training iterations are run and the default values of the network parameters. Feel also free to pass additional (or overriding) ``DeepSpeech.py`` parameters to these scripts. Then, just run the script to train the modified network. Each dataset has a corresponding importer script in ``bin/`` that can be used to download (if it's freely available) and preprocess the dataset. See ``bin/import_librivox.py`` for an example of how to import and preprocess a large dataset for training with DeepSpeech. Some importers might require additional code to properly handled your locale-specific requirements. Such handling is dealt with ``--validate_label_locale`` flag that allows you to source out-of-tree Python script that defines a ``validate_label`` function. Please refer to ``util/importers.py`` for implementation example of that function. If you don't provide this argument, the default ``validate_label`` function will be used. This one is only intended for English language, so you might have consistency issues in your data for other languages. For example, in order to use a custom validation function that disallows any sample with "a" in its transcript, and lower cases everything else, you could put the following code in a file called ``my_validation.py`` and then use ``--validate_label_locale my_validation.py``: .. code-block:: python def validate_label(label): if 'a' in label: # disallow labels with 'a' return None return label.lower() # lower case valid labels If you've run the old importers (in ``util/importers/``\ ), they could have removed source files that are needed for the new importers to run. In that case, simply remove the extracted folders and let the importer extract and process the dataset from scratch, and things should work. Training with automatic mixed precision ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Automatic Mixed Precision (AMP) training on GPU for TensorFlow has been recently [introduced](https://medium.com/tensorflow/automatic-mixed-precision-in-tensorflow-for-faster-ai-training-on-nvidia-gpus-6033234b2540). Mixed precision training makes use of both FP32 and FP16 precisions where appropriate. FP16 operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput. Mixed precision training also often allows larger batch sizes. Automatic mixed precision training can be enabled by including the flag `--automatic_mixed_precision` at training time: ``` python3 DeepSpeech.py --train_files ./train.csv --dev_files ./dev.csv --test_files ./test.csv --automatic_mixed_precision ``` On a Volta generation V100 GPU, automatic mixed precision speeds up DeepSpeech training and evaluation by ~30%-40%. Distributed training using Horovod ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you have a capable compute architecture, it is possible to distribute the training using `Horovod `_. A fast network is recommended. Horovod is capable of using MPI and NVIDIA's NCCL for highly optimized inter-process communication. It also offers `Gloo `_ as an easy-to-setup communication backend. For more information about setup or tuning of Horovod please visit `Horovod's documentation `_. Horovod is expected to run on heterogeneous systems (e.g. different number and model type of GPUs per machine). However, this can cause unpredictable problems and user interaction in training code is needed. Therefore, we do only support homogenous systems, which means same hardware and also same software configuration (OS, drivers, MPI, NCCL, TensorFlow, ...) on each machine. The only exception is different number of GPUs per machine, since this can be controlled by ``horovodrun -H``. Detailed documentation how to run Horovod is provided `here `_. The short command to train on 4 machines using 4 GPUs each: .. code-block:: bash horovodrun -np 16 -H server1:4,server2:4,server3:4,server4:4 python3 DeepSpeech.py --train_files [...] --horovod Checkpointing ^^^^^^^^^^^^^ During training of a model so-called checkpoints will get stored on disk. This takes place at a configurable time interval. The purpose of checkpoints is to allow interruption (also in the case of some unexpected failure) and later continuation of training without losing hours of training time. Resuming from checkpoints happens automatically by just (re)starting training with the same ``--checkpoint_dir`` of the former run. Alternatively, you can specify more fine grained options with ``--load_checkpoint_dir`` and ``--save_checkpoint_dir``, which specify separate locations to use for loading and saving checkpoints respectively. If not specified these flags use the same value as ``--checkpoint_dir``, ie. load from and save to the same directory. Be aware however that checkpoints are only valid for the same model geometry they had been generated from. In other words: If there are error messages of certain ``Tensors`` having incompatible dimensions, this is most likely due to an incompatible model change. One usual way out would be to wipe all checkpoint files in the checkpoint directory or changing it before starting the training. Exporting a model for inference ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If the ``--export_dir`` parameter is provided, a model will have been exported to this directory during training. Refer to the :ref:`usage instructions ` for information on running a client that can use the exported model. Exporting a model for TFLite ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you want to experiment with the TF Lite engine, you need to export a model that is compatible with it, then use the ``--export_tflite`` flags. If you already have a trained model, you can re-export it for TFLite by running ``DeepSpeech.py`` again and specifying the same ``checkpoint_dir`` that you used for training, as well as passing ``--export_tflite --export_dir /model/export/destination``. If you changed the alphabet you also need to add the ``--alphabet_config_path my-new-language-alphabet.txt`` flag. Making a mmap-able model for inference ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``output_graph.pb`` model file generated in the above step will be loaded in memory to be dealt with when running inference. This will result in extra loading time and memory consumption. One way to avoid this is to directly read data from the disk. TensorFlow has tooling to achieve this: it requires building the target ``//tensorflow/contrib/util:convert_graphdef_memmapped_format``. We recommend you build it from `TensorFlow r1.15 `_. For convenience, builds for Linux and macOS are `available (look for file named convert_graphdef_memmapped_format) `_ Producing a mmap-able model is as simple as: .. code-block:: $ convert_graphdef_memmapped_format --in_graph=output_graph.pb --out_graph=output_graph.pbmm Upon sucessfull run, it should report about conversion of a non-zero number of nodes. If it reports converting ``0`` nodes, something is wrong: make sure your model is a frozen one, and that you have not applied any incompatible changes (this includes ``quantize_weights``\ ). Continuing training from a release model ---------------------------------------- There are currently two supported approaches to make use of a pre-trained DeepSpeech model: fine-tuning or transfer-learning. Choosing which one to use is a simple decision, and it depends on your target dataset. Does your data use the same alphabet as the release model? If "Yes": fine-tune. If "No" use transfer-learning. If your own data uses the *extact* same alphabet as the English release model (i.e. `a-z` plus `'`) then the release model's output layer will match your data, and you can just fine-tune the existing parameters. However, if you want to use a new alphabet (e.g. Cyrillic `а`, `б`, `д`), the output layer of a release DeepSpeech model will *not* match your data. In this case, you should use transfer-learning (i.e. remove the trained model's output layer, and reinitialize a new output layer that matches your target character set. N.B. - If you have access to a pre-trained model which uses UTF-8 bytes at the output layer you can always fine-tune, because any alphabet should be encodable as UTF-8. .. _training-fine-tuning: Fine-Tuning (same alphabet) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you'd like to use one of the pre-trained models to bootstrap your training process (fine tuning), you can do so by using the ``--checkpoint_dir`` flag in ``DeepSpeech.py``. Specify the path where you downloaded the checkpoint from the release, and training will resume from the pre-trained model. For example, if you want to fine tune the entire graph using your own data in ``my-train.csv``\ , ``my-dev.csv`` and ``my-test.csv``\ , for three epochs, you can something like the following, tuning the hyperparameters as needed: .. code-block:: bash mkdir fine_tuning_checkpoints python3 DeepSpeech.py --n_hidden 2048 --checkpoint_dir path/to/checkpoint/folder --epochs 3 --train_files my-train.csv --dev_files my-dev.csv --test_files my_dev.csv --learning_rate 0.0001 Notes about the release checkpoints: the released models were trained with ``--n_hidden 2048``\ , so you need to use that same value when initializing from the release models. Since v0.6.0, the release models are also trained with ``--train_cudnn``\ , so you'll need to specify that as well. If you don't have a CUDA compatible GPU, then you can workaround it by using the ``--load_cudnn`` flag. Use ``--helpfull`` to get more information on how the flags work. You also cannot use ```--automatic_mixed_precision``` when loading release checkpoints, as they do not use automatic mixed precision training. If you try to load a release model without following these steps, you'll get an error similar to this: .. code-block:: E Tried to load a CuDNN RNN checkpoint but there were more missing variables than just the Adam moment tensors. Transfer-Learning (new alphabet) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you want to continue training an alphabet-based DeepSpeech model (i.e. not a UTF-8 model) on a new language, or if you just want to add new characters to your custom alphabet, you will probably want to use transfer-learning instead of fine-tuning. If you're starting with a pre-trained UTF-8 model -- even if your data comes from a different language or uses a different alphabet -- the model will be able to predict your new transcripts, and you should use fine-tuning instead. In a nutshell, DeepSpeech's transfer-learning allows you to remove certain layers from a pre-trained model, initialize new layers for your target data, stitch together the old and new layers, and update all layers via gradient descent. You will remove the pre-trained output layer (and optionally more layers) and reinitialize parameters to fit your target alphabet. The simplest case of transfer-learning is when you remove just the output layer. In DeepSpeech's implementation of transfer-learning, all removed layers will be contiguous, starting from the output layer. The key flag you will want to experiment with is ``--drop_source_layers``. This flag accepts an integer from ``1`` to ``5`` and allows you to specify how many layers you want to remove from the pre-trained model. For example, if you supplied ``--drop_source_layers 3``, you will drop the last three layers of the pre-trained model: the output layer, penultimate layer, and LSTM layer. All dropped layers will be reinintialized, and (crucially) the output layer will be defined to match your supplied target alphabet. You need to specify the location of the pre-trained model with ``--load_checkpoint_dir`` and define where your new model checkpoints will be saved with ``--save_checkpoint_dir``. You need to specify how many layers to remove (aka "drop") from the pre-trained model: ``--drop_source_layers``. You also need to supply your new alphabet file using the standard ``--alphabet_config_path`` (remember, using a new alphabet is the whole reason you want to use transfer-learning). .. code-block:: bash python3 DeepSpeech.py \ --drop_source_layers 1 \ --alphabet_config_path my-new-language-alphabet.txt \ --save_checkpoint_dir path/to/output-checkpoint/folder \ --load_checkpoint_dir path/to/release-checkpoint/folder \ --train_files my-new-language-train.csv \ --dev_files my-new-language-dev.csv \ --test_files my-new-language-test.csv UTF-8 mode ^^^^^^^^^^ DeepSpeech includes a UTF-8 operating mode which can be useful to model languages with very large alphabets, such as Chinese Mandarin. For details on how it works and how to use it, see :ref:`decoder-docs`. .. _training-data-augmentation: Augmentation ^^^^^^^^^^^^ Augmentation is a useful technique for better generalization of machine learning models. Thus, a pre-processing pipeline with various augmentation techniques on raw pcm and spectrogram has been implemented and can be used while training the model. Following are the available augmentation techniques that can be enabled at training time by using the corresponding flags in the command line. Each sample of the training data will get treated by every specified augmentation in their given order. However: whether an augmentation will actually get applied to a sample is decided by chance on base of the augmentation's probability value. For example a value of ``p=0.1`` would apply the according augmentation to just 10% of all samples. This also means that augmentations are not mutually exclusive on a per-sample basis. The ``--augment`` flag uses a common syntax for all augmentation types: .. code-block:: --augment augmentation_type1[param1=value1,param2=value2,...] --augment augmentation_type2[param1=value1,param2=value2,...] ... For example, for the ``overlay`` augmentation: .. code-block:: python3 DeepSpeech.py --augment overlay[p=0.1,source=/path/to/audio.sdb,snr=20.0] ... In the documentation below, whenever a value is specified as ```` or ````, it supports one of the follow formats: * ````: A constant (int or float) value. * ``~``: A center value with a randomization radius around it. E.g. ``1.2~0.4`` will result in picking of a uniformly random value between 0.8 and 1.6 on each sample augmentation. * ``:``: The value will range from `` at the beginning of the training to `` at the end of the training. E.g. ``-0.2:1.2`` (float) or ``2000:4000`` (int) * ``:~``: Combination of the two previous cases with a ranging center value. E.g. ``4-6~2`` would at the beginning of the training pick values between 2 and 6 and at the end of the training between 4 and 8. Ranges specified with integer limits will only assume integer (rounded) values. .. warning:: When feature caching is enabled, by default the cache has no expiration limit and will be used for the entire training run. This will cause these augmentations to only be performed once during the first epoch and the result will be reused for subsequent epochs. This would not only hinder value ranges from reaching their intended final values, but could also lead to unintended over-fitting. In this case flag ``--cache_for_epochs N`` (with N > 1) should be used to periodically invalidate the cache after every N epochs and thus allow samples to be re-augmented in new ways and with current range-values. Every augmentation targets a certain representation of the sample - in this documentation these representations are referred to as *domains*. Augmentations are applied in the following order: 1. **sample** domain: The sample just got loaded and its waveform is represented as a NumPy array. For implementation reasons these augmentations are the only ones that can be "simulated" through ``bin/play.py``. 2. **signal** domain: The sample waveform is represented as a tensor. 3. **spectrogram** domain: The sample spectrogram is represented as a tensor. 4. **features** domain: The sample's mel spectrogram features are represented as a tensor. Within a single domain, augmentations are applied in the same order as they appear in the command-line. Sample domain augmentations --------------------------- **Overlay augmentation** ``--augment overlay[p=,source=,snr=,layers=]`` Layers another audio source (multiple times) onto augmented samples. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **source**: path to the sample collection to use for augmenting (\*.sdb or \*.csv file). It will be repeated if there are not enough samples left. * **snr**: signal to noise ratio in dB - positive values for lowering volume of the overlay in relation to the sample * **layers**: number of layers added onto the sample (e.g. 10 layers of speech to get "cocktail-party effect"). A layer is just a sample of the same duration as the sample to augment. It gets stitched together from as many source samples as required. **Reverb augmentation** ``--augment reverb[p=,delay=,decay=]`` Adds simplified (no all-pass filters) `Schroeder reverberation `_ to the augmented samples. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **delay**: time delay in ms for the first signal reflection - higher values are widening the perceived "room" * **decay**: sound decay in dB per reflection - higher values will result in a less reflective perceived "room" **Resample augmentation** ``--augment resample[p=,rate=]`` Resamples augmented samples to another sample rate and then resamples back to the original sample rate. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **rate**: sample-rate to re-sample to **Codec augmentation** ``--augment codec[p=,bitrate=]`` Compresses and then decompresses augmented samples using the lossy Opus audio codec. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **bitrate**: bitrate used during compression **Volume augmentation** ``--augment volume[p=,dbfs=]`` Measures and levels augmented samples to a target dBFS value. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **dbfs** : target volume in dBFS (default value of 3.0103 will normalize min and max amplitudes to -1.0/1.0) Spectrogram domain augmentations -------------------------------- **Pitch augmentation** ``--augment pitch[p=,pitch=]`` Scales spectrogram on frequency axis and thus changes pitch. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **pitch**: pitch factor by with the frequency axis is scaled (e.g. a value of 2.0 will raise audio frequency by one octave) **Tempo augmentation** ``--augment tempo[p=,factor=]`` Scales spectrogram on time axis and thus changes playback tempo. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **factor**: speed factor by which the time axis is stretched or shrunken (e.g. a value of 2.0 will double playback tempo) **Warp augmentation** ``--augment warp[p=,nt=,nf=,wt=,wf=]`` Applies a non-linear image warp to the spectrogram. This is achieved by randomly shifting a grid of equally distributed warp points along time and frequency axis. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **nt**: number of equally distributed warp grid lines along time axis of the spectrogram (excluding the edges) * **nf**: number of equally distributed warp grid lines along frequency axis of the spectrogram (excluding the edges) * **wt**: standard deviation of the random shift applied to warp points along time axis (0.0 = no warp, 1.0 = half the distance to the neighbour point) * **wf**: standard deviation of the random shift applied to warp points along frequency axis (0.0 = no warp, 1.0 = half the distance to the neighbour point) **Frequency mask augmentation** ``--augment frequency_mask[p=,n=,size=]`` Sets frequency-intervals within the augmented samples to zero (silence) at random frequencies. See the SpecAugment paper for more details - https://arxiv.org/abs/1904.08779 * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **n**: number of intervals to mask * **size**: number of frequency bands to mask per interval Multi domain augmentations -------------------------- **Time mask augmentation** ``--augment time_mask[p=,n=,size=,domain=]`` Sets time-intervals within the augmented samples to zero (silence) at random positions. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **n**: number of intervals to set to zero * **size**: duration of intervals in ms * **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default) **Dropout augmentation** ``--augment dropout[p=,rate=,domain=]`` Zeros random data points of the targeted data representation. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **rate**: dropout rate ranging from 0.0 for no dropout to 1.0 for 100% dropout * **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default) **Add augmentation** ``--augment add[p=,stddev=,domain=]`` Adds random values picked from a normal distribution (with a mean of 0.0) to all data points of the targeted data representation. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **stddev**: standard deviation of the normal distribution to pick values from * **domain**: data representation to apply augmentation to - "signal", "features" (default) or "spectrogram" **Multiply augmentation** ``--augment multiply[p=,stddev=,domain=]`` Multiplies all data points of the targeted data representation with random values picked from a normal distribution (with a mean of 1.0). * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **stddev**: standard deviation of the normal distribution to pick values from * **domain**: data representation to apply augmentation to - "signal", "features" (default) or "spectrogram" Example training with all augmentations: .. code-block:: bash python -u DeepSpeech.py \ --train_files "train.sdb" \ --feature_cache ./feature.cache \ --cache_for_epochs 10 \ --epochs 100 \ --augment overlay[p=0.5,source=noise.sdb,layers=1,snr=50:20~10] \ --augment reverb[p=0.1,delay=50.0~30.0,decay=10.0:2.0~1.0] \ --augment resample[p=0.1,rate=12000:8000~4000] \ --augment codec[p=0.1,bitrate=48000:16000] \ --augment volume[p=0.1,dbfs=-10:-40] \ --augment pitch[p=0.1,pitch=1~0.2] \ --augment tempo[p=0.1,factor=1~0.5] \ --augment warp[p=0.1,nt=4,nf=1,wt=0.5:1.0,wf=0.1:0.2] \ --augment frequency_mask[p=0.1,n=1:3,size=1:5] \ --augment time_mask[p=0.1,domain=signal,n=3:10~2,size=50:100~40] \ --augment dropout[p=0.1,rate=0.05] \ --augment add[p=0.1,domain=signal,stddev=0~0.5] \ --augment multiply[p=0.1,domain=features,stddev=0~0.5] \ [...] The ``bin/play.py`` and ``bin/data_set_tool.py`` tools also support ``--augment`` parameters (for sample domain augmentations) and can be used for experimenting with different configurations or creating augmented data sets. Example of playing all samples with reverberation and maximized volume: .. code-block:: bash bin/play.py --augment reverb[p=0.1,delay=50.0,decay=2.0] --augment volume --random test.sdb Example simulation of the codec augmentation of a wav-file first at the beginning and then at the end of an epoch: .. code-block:: bash bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 0.0 test.wav bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 1.0 test.wav Example of creating a pre-augmented test set: .. code-block:: bash bin/data_set_tool.py \ --augment overlay[source=noise.sdb,layers=1,snr=20~10] \ --augment resample[rate=12000:8000~4000] \ test.sdb test-augmented.sdb .. _training-with-conda: Training from an Anaconda or miniconda environment ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Keep in mind that none of the core authors use Anaconda or miniconda, so this setup is not guaranteed to work. If you experience problems, try using a non-conda setup first. We're happy to accept pull requests fixing any incompatibilities with conda setups, but we will not offer any support ourselves beyond reviewing pull requests. To prevent common problems, make sure you **always use a separate environment when setting things up for training**: .. code-block:: bash (base) $ conda create -n deepspeech python=3.7 (base) $ conda activate deepspeech ================================================ FILE: doc/USING.rst ================================================ .. _usage-docs: Using a Pre-trained Model ========================= Inference using a DeepSpeech pre-trained model can be done with a client/language binding package. We have four clients/language bindings in this repository, listed below, and also a few community-maintained clients/language bindings in other repositories, listed `further down in this README <#third-party-bindings>`_. * :ref:`The C API `. * :ref:`The Python package/language binding ` * :ref:`The Node.JS package/language binding ` * :ref:`The command-line client ` * :github:`The .NET client/language binding ` .. _runtime-deps: Running ``deepspeech`` might, see below, require some runtime dependencies to be already installed on your system: * ``sox`` - The Python and Node.JS clients use SoX to resample files to 16kHz. * ``libgomp1`` - libsox (statically linked into the clients) depends on OpenMP. Some people have had to install this manually. * ``libstdc++`` - Standard C++ Library implementation. Some people have had to install this manually. * ``libpthread`` - On Linux, some people have had to install libpthread manually. On Ubuntu, ``libpthread`` is part of the ``libpthread-stubs0-dev`` package. * ``Redistribuable Visual C++ 2015 Update 3 (64-bits)`` - On Windows, it might be required to ensure this is installed. Please `download from Microsoft `_. Please refer to your system's documentation on how to install these dependencies. .. _cuda-inference-deps: CUDA dependency (inference) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ The GPU capable builds (Python, NodeJS, C++, etc) depend on CUDA 10.1 and CuDNN v7.6. Getting the pre-trained model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you want to use the pre-trained English model for performing speech-to-text, you can download it (along with other important inference material) from the DeepSpeech `releases page `_. Alternatively, you can run the following command to download the model files in your current directory: .. code-block:: bash wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.pbmm wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.scorer There are several pre-trained model files available in official releases. Files ending in ``.pbmm`` are compatible with clients and language bindings built against the standard TensorFlow runtime. Usually these packages are simply called ``deepspeech``. These files are also compatible with CUDA enabled clients and language bindings. These packages are usually called ``deepspeech-gpu``. Files ending in ``.tflite`` are compatible with clients and language bindings built against the `TensorFlow Lite runtime `_. These models are optimized for size and performance in low power devices. On desktop platforms, the compatible packages are called ``deepspeech-tflite``. On Android and Raspberry Pi, we only publish TensorFlow Lite enabled packages, and they are simply called ``deepspeech``. You can see a full list of supported platforms and which TensorFlow runtime is supported at :ref:`supported-platforms-inference`. +--------------------+---------------------+---------------------+ | Package/Model type | .pbmm | .tflite | +====================+=====================+=====================+ | deepspeech | Depends on platform | Depends on platform | +--------------------+---------------------+---------------------+ | deepspeech-gpu | ✅ | ❌ | +--------------------+---------------------+---------------------+ | deepspeech-tflite | ❌ | ✅ | +--------------------+---------------------+---------------------+ Finally, the pre-trained model files also include files ending in ``.scorer``. These are external scorers (language models) that are used at inference time in conjunction with an acoustic model (``.pbmm`` or ``.tflite`` file) to produce transcriptions. We also provide further documentation on :ref:`the decoding process ` and :ref:`how scorers are generated `. Important considerations on model inputs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The release notes include detailed information on how the released models were trained/constructed. Important considerations for users include the characteristics of the training data used and whether they match your intended use case. For acoustic models, an important characteristic is the demographic distribution of speakers. For external scorers, the texts should be similar to those of the expected use case. If the data used for training the models does not align with your intended use case, it may be necessary to adapt or train new models in order to get good accuracy in your transcription results. The process for training an acoustic model is described in :ref:`training-docs`. In particular, fine tuning a release model using your own data can be a good way to leverage relatively smaller amounts of data that would not be sufficient for training a new model from scratch. See the :ref:`fine tuning and transfer learning sections ` for more information. :ref:`Data augmentation ` can also be a good way to increase the value of smaller training sets. Creating your own external scorer from text data is another way that you can adapt the model to your specific needs. The process and tools used to generate an external scorer package are described in :ref:`scorer-scripts` and an overview of how the external scorer is used by DeepSpeech to perform inference is available in :ref:`decoder-docs`. Generating a smaller scorer from a single purpose text dataset is a quick process and can bring significant accuracy improvements, specially for more constrained, limited vocabulary applications. Model compatibility ^^^^^^^^^^^^^^^^^^^ DeepSpeech models are versioned to keep you from trying to use an incompatible graph with a newer client after a breaking change was made to the code. If you get an error saying your model file version is too old for the client, you should either upgrade to a newer model release, re-export your model from the checkpoint using a newer version of the code, or downgrade your client if you need to use the old model and can't re-export it. .. _py-usage: Using the Python package ^^^^^^^^^^^^^^^^^^^^^^^^ Pre-built binaries which can be used for performing inference with a trained model can be installed with ``pip3``. You can then use the ``deepspeech`` binary to do speech-to-text on an audio file: For the Python bindings, it is highly recommended that you perform the installation within a Python 3.5 or later virtual environment. You can find more information about those in `this documentation `_. We will continue under the assumption that you already have your system properly setup to create new virtual environments. Create a DeepSpeech virtual environment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In creating a virtual environment you will create a directory containing a ``python3`` binary and everything needed to run deepspeech. You can use whatever directory you want. For the purpose of the documentation, we will rely on ``$HOME/tmp/deepspeech-venv``. You can create it using this command: .. code-block:: $ virtualenv -p python3 $HOME/tmp/deepspeech-venv/ Once this command completes successfully, the environment will be ready to be activated. Activating the environment ~~~~~~~~~~~~~~~~~~~~~~~~~~ Each time you need to work with DeepSpeech, you have to *activate* this virtual environment. This is done with this simple command: .. code-block:: $ source $HOME/tmp/deepspeech-venv/bin/activate Installing DeepSpeech Python bindings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Once your environment has been set-up and loaded, you can use ``pip3`` to manage packages locally. On a fresh setup of the ``virtualenv``\ , you will have to install the DeepSpeech wheel. You can check if ``deepspeech`` is already installed with ``pip3 list``. To perform the installation, just use ``pip3`` as such: .. code-block:: $ pip3 install deepspeech If ``deepspeech`` is already installed, you can update it as such: .. code-block:: $ pip3 install --upgrade deepspeech Alternatively, if you have a supported NVIDIA GPU on Linux, you can install the GPU specific package as follows: .. code-block:: $ pip3 install deepspeech-gpu See the `release notes `_ to find which GPUs are supported. Please ensure you have the required `CUDA dependency <#cuda-dependency>`_. You can update ``deepspeech-gpu`` as follows: .. code-block:: $ pip3 install --upgrade deepspeech-gpu In both cases, ``pip3`` should take care of installing all the required dependencies. After installation has finished, you should be able to call ``deepspeech`` from the command-line. Note: the following command assumes you `downloaded the pre-trained model <#getting-the-pre-trained-model>`_. .. code-block:: bash deepspeech --model deepspeech-0.9.3-models.pbmm --scorer deepspeech-0.9.3-models.scorer --audio my_audio_file.wav The ``--scorer`` argument is optional, and represents an external language model to be used when transcribing the audio. See :ref:`the Python client ` for an example of how to use the package programatically. .. _nodejs-usage: Using the Node.JS / Electron.JS package ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can download the JS bindings using ``npm``\ : .. code-block:: bash npm install deepspeech Please note that as of now, we support: - Node.JS versions 4 to 13. - Electron.JS versions 1.6 to 7.1 TypeScript support is also provided. Alternatively, if you're using Linux and have a supported NVIDIA GPU, you can install the GPU specific package as follows: .. code-block:: bash npm install deepspeech-gpu See the `release notes `_ to find which GPUs are supported. Please ensure you have the required `CUDA dependency <#cuda-dependency>`_. See the :ref:`TypeScript client ` for an example of how to use the bindings programatically. .. _cli-usage: Using the command-line client ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To download the pre-built binaries for the ``deepspeech`` command-line (compiled C++) client, use one of the ``native_client.tar.xz`` files from the [releases](https://github.com/mozilla/DeepSpeech/releases). Note: the following command assumes you `downloaded the pre-trained model <#getting-the-pre-trained-model>`_. .. code-block:: bash ./deepspeech --model deepspeech-0.9.3-models.pbmm --scorer deepspeech-0.9.3-models.scorer --audio audio_input.wav See the help output with ``./deepspeech -h`` for more details. Installing bindings from source ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If pre-built binaries aren't available for your system, you'll need to install them from scratch. Follow the :github:`native client build and installation instructions `. Dockerfile for building from source ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ We provide ``Dockerfile.build`` to automatically build ``libdeepspeech.so``, the C++ native client, Python bindings, and KenLM. You need to generate the Dockerfile from the template using: .. code-block:: bash make Dockerfile.build If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: .. code-block:: bash make Dockerfile.build DEEPSPEECH_REPO=git://your/fork DEEPSPEECH_SHA=origin/your-branch Third party bindings ^^^^^^^^^^^^^^^^^^^^ In addition to the bindings above, third party developers have started to provide bindings to other languages: * `Asticode `_ provides `Golang `_ bindings in its `go-astideepspeech `_ repo. * `RustAudio `_ provide a `Rust `_ binding, the installation and use of which is described in their `deepspeech-rs `_ repo. * `stes `_ provides preliminary `PKGBUILDs `_ to install the client and python bindings on `Arch Linux `_ in the `arch-deepspeech `_ repo. * `gst-deepspeech `_ provides a `GStreamer `_ plugin which can be used from any language with GStreamer bindings. * `thecodrr `_ provides `Vlang `_ bindings. The installation and use of which is described in their `vspeech `_ repo. * `eagledot `_ provides `NIM-lang `_ bindings. The installation and use of which is described in their `nim-deepspeech `_ repo. ================================================ FILE: doc/conf.py ================================================ # -*- coding: utf-8 -*- # # DeepSpeech documentation build configuration file, created by # sphinx-quickstart on Thu Feb 2 21:20:39 2017. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # # pylint: skip-file import os import sys sys.path.insert(0, os.path.abspath('../')) autodoc_mock_imports = ['deepspeech'] # This is in fact only relevant on ReadTheDocs, but we want to run the same way # on our CI as in RTD to avoid regressions on RTD that we would not catch on CI import subprocess parent = subprocess.check_output("cd ../ && pwd", shell=True).decode().strip() os.environ["PATH"] = os.path.join(parent, 'node_modules', '.bin') + ':' + os.environ["PATH"] subprocess.check_call('cd ../ && npm install typedoc@0.17.4 typescript@3.8.3 @types/node@13.9.x', shell=True) subprocess.check_call('env', shell=True) subprocess.check_call('which typedoc', shell=True) subprocess.check_call('cd ../ && doxygen doc/doxygen-c.conf', shell=True) subprocess.check_call('cd ../ && doxygen doc/doxygen-java.conf', shell=True) subprocess.check_call('cd ../ && doxygen doc/doxygen-dotnet.conf', shell=True) # -- General configuration ------------------------------------------------ import semver # -- Project information ----------------------------------------------------- project = u'Mozilla DeepSpeech' copyright = '2016-2020 Mozilla Corporation, 2020 DeepSpeech authors' author = 'DeepSpeech authors' with open('../VERSION', 'r') as ver: v = ver.read().strip() vv = semver.parse(v) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # The short X.Y version version = '{}.{}'.format(vv['major'], vv['minor']) # The full version, including alpha/beta/rc tags release = v # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.extlinks', 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx_rtd_theme', 'sphinx_js', 'breathe' ] breathe_projects = { "deepspeech-c": "xml-c/", "deepspeech-java": "xml-java/", "deepspeech-dotnet": "xml-dotnet/", } js_source_path = "../native_client/javascript/index.ts" js_language = "typescript" jsdoc_config_path = "../native_client/javascript/tsconfig.json" # Add any paths that contain templates here, relative to this directory. templates_path = ['.templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ['.build', 'Thumbs.db', '.DS_Store', 'node_modules'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False add_module_names = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { 'collapse_navigation': False, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['.static'] # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'DeepSpeechdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'DeepSpeech.tex', u'DeepSpeech Documentation', u'DeepSpeech authors', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'deepspeech', u'DeepSpeech Documentation', [author], 1) ] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'DeepSpeech', u'DeepSpeech Documentation', author, 'DeepSpeech', 'One line description of project.', 'Miscellaneous'), ] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'https://docs.python.org/': None} extlinks = {'github': ('https://github.com/mozilla/DeepSpeech/blob/v{}/%s'.format(release), '%s')} ================================================ FILE: doc/doxygen-c.conf ================================================ # Doxyfile 1.8.13 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "My Project" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = doc/ # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: # FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: # Fortran. In the later case the parser tries to guess whether the code is fixed # or free formatted code, this is the default for Fortran type files), VHDL. For # instance to make doxygen treat .inc files as Fortran files (default is PHP), # and .f files as C (default is Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 0. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 0 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO, these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = NO # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = NO # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = NO # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = native_client/deepspeech.h # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, # *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.idl \ *.ddl \ *.odl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.cs \ *.d \ *.php \ *.php4 \ *.php5 \ *.phtml \ *.inc \ *.m \ *.markdown \ *.md \ *.mm \ *.dox \ *.py \ *.pyw \ *.f90 \ *.f95 \ *.f03 \ *.f08 \ *.f \ *.for \ *.tcl \ *.vhd \ *.vhdl \ *.ucf \ *.qsf # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = NO # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: http://clang.llvm.org/) for more accurate parsing at the # cost of reduced performance. This can be particularly helpful with template # rich C++ code for which doxygen's built-in parser lacks the necessary type # information. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse-libclang=ON option for CMake. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = NO # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /