Repository: google/magika Branch: main Commit: fece437bf08d Files: 432 Total size: 1.8 MB Directory structure: gitextract_9puqy48w/ ├── .cargo/ │ └── config.toml ├── .dockerignore ├── .gemini/ │ └── config.yaml ├── .gitattributes ├── .github/ │ ├── CODEOWNERS │ ├── ISSUE_TEMPLATE/ │ │ ├── misdetection.md │ │ └── new_content_type_request.md │ ├── dependabot.yml │ ├── labeler.yml │ ├── scorecard.yml │ └── workflows/ │ ├── cli-latest.yml │ ├── cli-release.yml │ ├── codeql.yml │ ├── docs-check.yml │ ├── github-issue-labeler.yml │ ├── github-pages.yml │ ├── go-test.yml │ ├── js-check-import-scenarios.yml │ ├── js-docs-builder.yml │ ├── js-publish.yml │ ├── js-test.yml │ ├── python-build-and-release-package.yml │ ├── python-test-published-package.yml │ ├── python-test-published-rc-package.yml │ ├── python-test-suite.yml │ ├── rust-test.yml │ ├── scorecard.yml │ └── website-test.yml ├── .gitignore ├── CITATION.cff ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── assets/ │ ├── content_types_kb.min.json │ └── models/ │ ├── CHANGELOG.md │ ├── begonly_v2_1/ │ │ ├── config.min.json │ │ ├── metadata.json │ │ ├── model.keras │ │ └── model.onnx │ ├── fast_v2_1/ │ │ ├── config.min.json │ │ ├── metadata.json │ │ ├── model.keras │ │ └── model.onnx │ ├── standard_v1/ │ │ ├── README.md │ │ ├── content_types_config.json │ │ ├── magika_config.json │ │ ├── model.h5 │ │ ├── model_config.json │ │ └── thresholds.json │ ├── standard_v2_0/ │ │ ├── README.md │ │ ├── config.min.json │ │ ├── metadata.json │ │ ├── model.keras │ │ └── model.onnx │ ├── standard_v2_1/ │ │ ├── README.md │ │ ├── config.min.json │ │ ├── metadata.json │ │ ├── model.keras │ │ └── model.onnx │ ├── standard_v3_0/ │ │ ├── README.md │ │ ├── config.min.json │ │ ├── metadata.json │ │ └── model.onnx │ ├── standard_v3_1/ │ │ ├── README.md │ │ ├── config.min.json │ │ ├── metadata.json │ │ └── model.onnx │ ├── standard_v3_2/ │ │ ├── README.md │ │ ├── config.min.json │ │ ├── metadata.json │ │ └── model.onnx │ └── standard_v3_3/ │ ├── README.md │ ├── config.min.json │ ├── metadata.json │ └── model.onnx ├── dist-workspace.toml ├── docs/ │ ├── concepts.md │ └── js.md ├── go/ │ ├── README.md │ ├── cli/ │ │ ├── cli.go │ │ ├── cli_test.go │ │ ├── main.go │ │ └── tests_data/ │ │ └── magika_test_pptx.txt │ ├── docker/ │ │ └── Dockerfile │ ├── example/ │ │ └── main.go │ ├── go.mod │ ├── go.sum │ ├── magika/ │ │ ├── config.go │ │ ├── content.go │ │ ├── features.go │ │ ├── features_test.go │ │ ├── scanner.go │ │ └── scanner_test.go │ └── onnx/ │ ├── onnx.go │ ├── onnx_runtime.go │ ├── onnx_runtime.h │ ├── onnx_runtime_test.go │ └── onnx_zero.go ├── js/ │ ├── .gitignore │ ├── CHANGELOG.md │ ├── README.md │ ├── magika-cli.ts │ ├── magika-node.ts │ ├── magika.ts │ ├── package.json │ ├── postBuild.js │ ├── simple_examples/ │ │ ├── browser-esmodule-example/ │ │ │ ├── index.html │ │ │ ├── index.js │ │ │ ├── package.json │ │ │ ├── playwright.config.ts │ │ │ └── test/ │ │ │ └── simple.spec.ts │ │ ├── node-commonjs-example/ │ │ │ ├── index.js │ │ │ └── package.json │ │ ├── node-esmodule-example/ │ │ │ ├── index.js │ │ │ └── package.json │ │ ├── run_examples.sh │ │ └── typescript-esmodule-example/ │ │ ├── index.ts │ │ └── package.json │ ├── src/ │ │ ├── .npmignore │ │ ├── content-type-info.ts │ │ ├── content-type-label.ts │ │ ├── content-types-infos.ts │ │ ├── magika-options.ts │ │ ├── magika-prediction.ts │ │ ├── magika-result.ts │ │ ├── model-config-node.ts │ │ ├── model-config.ts │ │ ├── model-features.ts │ │ ├── model-node.ts │ │ ├── model-prediction.ts │ │ ├── model.ts │ │ ├── overwrite-reason.ts │ │ ├── prediction-mode.ts │ │ └── status.ts │ ├── test/ │ │ ├── features-extraction-vs-reference.test.ts │ │ ├── inference-vs-reference.test.ts │ │ ├── magika-cli.test.ts │ │ ├── magika.test.ts │ │ ├── tfnHook.ts │ │ └── utils.ts │ ├── tsconfig.cjs.json │ ├── tsconfig.esm.json │ └── tsconfig.json ├── python/ │ ├── .gitignore │ ├── .python-version │ ├── CHANGELOG.md │ ├── README.md │ ├── mypy.ini │ ├── pyproject.toml │ ├── pytest.ini │ ├── scripts/ │ │ ├── check_changelog.sh │ │ ├── check_copyright.py │ │ ├── check_documentation.py │ │ ├── check_source.sh │ │ ├── generate_reference.py │ │ ├── pre_release_check.py │ │ ├── prepare_pyproject_for_pure_python_wheel.py │ │ ├── run_quick_test_magika_cli.py │ │ ├── run_quick_test_magika_module.py │ │ ├── sync.py │ │ └── test_magika_model.py │ ├── src/ │ │ └── magika/ │ │ ├── __init__.py │ │ ├── cli/ │ │ │ ├── magika_client.py │ │ │ └── magika_rust_client_not_found_warning.py │ │ ├── colors.py │ │ ├── config/ │ │ │ └── content_types_kb.min.json │ │ ├── logger.py │ │ ├── magika.py │ │ ├── models/ │ │ │ └── standard_v3_3/ │ │ │ ├── README.md │ │ │ ├── config.min.json │ │ │ ├── metadata.json │ │ │ └── model.onnx │ │ ├── py.typed │ │ └── types/ │ │ ├── __init__.py │ │ ├── content_type_info.py │ │ ├── content_type_label.py │ │ ├── magika_error.py │ │ ├── magika_prediction.py │ │ ├── magika_result.py │ │ ├── model.py │ │ ├── overwrite_reason.py │ │ ├── prediction_mode.py │ │ ├── seekable.py │ │ ├── status.py │ │ └── strenum.py │ └── tests/ │ ├── __init__.py │ ├── test_features_extraction_vs_reference.py │ ├── test_inference_vs_reference.py │ ├── test_magika_python_module.py │ ├── test_python_magika_client.py │ └── utils.py ├── rust/ │ ├── .gitignore │ ├── README.md │ ├── changelog.sh │ ├── cli/ │ │ ├── CHANGELOG.md │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── output │ │ ├── publish.sh │ │ ├── src/ │ │ │ └── main.rs │ │ └── test.sh │ ├── color.sh │ ├── gen/ │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── content_types │ │ ├── src/ │ │ │ └── main.rs │ │ └── test.sh │ ├── latest.sh │ ├── lib/ │ │ ├── CHANGELOG.md │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── src/ │ │ │ ├── builder.rs │ │ │ ├── config.rs │ │ │ ├── content.rs │ │ │ ├── error.rs │ │ │ ├── file.rs │ │ │ ├── future.rs │ │ │ ├── input.rs │ │ │ ├── lib.rs │ │ │ ├── model.rs │ │ │ └── session.rs │ │ └── test.sh │ ├── onnx/ │ │ ├── build.sh │ │ └── maturin.sh │ ├── publish.sh │ ├── rustfmt.toml │ ├── sync.sh │ ├── taplo.toml │ └── test.sh ├── tests_data/ │ ├── README.md │ ├── basic/ │ │ ├── asm/ │ │ │ └── code.asm │ │ ├── batch/ │ │ │ └── simple.bat │ │ ├── c/ │ │ │ └── code.c │ │ ├── css/ │ │ │ └── code.css │ │ ├── csv/ │ │ │ └── magika_test.csv │ │ ├── dockerfile/ │ │ │ └── Dockerfile │ │ ├── docx/ │ │ │ ├── doc.docx │ │ │ └── magika_test.docx │ │ ├── eml/ │ │ │ └── sample.eml │ │ ├── empty/ │ │ │ └── empty_file │ │ ├── epub/ │ │ │ ├── doc.epub │ │ │ └── magika_test.epub │ │ ├── flac/ │ │ │ └── test.flac │ │ ├── handlebars/ │ │ │ └── example.handlebars │ │ ├── html/ │ │ │ └── doc.html │ │ ├── ignorefile/ │ │ │ ├── example.ignorefile │ │ │ └── other.ignorefile │ │ ├── ini/ │ │ │ └── doc.ini │ │ ├── javascript/ │ │ │ └── code.js │ │ ├── jinja/ │ │ │ └── example.j2 │ │ ├── json/ │ │ │ └── doc.json │ │ ├── latex/ │ │ │ └── sample.tex │ │ ├── makefile/ │ │ │ └── simple.Makefile │ │ ├── markdown/ │ │ │ ├── README.md │ │ │ ├── magika_test.md │ │ │ └── simple.md │ │ ├── mht/ │ │ │ └── sample.mht │ │ ├── odp/ │ │ │ └── magika_test.odp │ │ ├── ods/ │ │ │ └── magika_test.ods │ │ ├── odt/ │ │ │ ├── doc.odt │ │ │ └── magika_test.odt │ │ ├── ogg/ │ │ │ └── test.ogg │ │ ├── outlook/ │ │ │ └── sample.msg │ │ ├── pem/ │ │ │ ├── doc.pem │ │ │ └── doc.pub │ │ ├── pptx/ │ │ │ └── magika_test.pptx │ │ ├── psd/ │ │ │ └── MagikaTest.psd │ │ ├── python/ │ │ │ └── code.py │ │ ├── pytorch/ │ │ │ └── example.pth │ │ ├── rtf/ │ │ │ ├── doc.rtf │ │ │ └── magika_test.rtf │ │ ├── ruby/ │ │ │ └── code.rb │ │ ├── rust/ │ │ │ ├── asm.rs │ │ │ ├── code.rs │ │ │ ├── test_case1.rs │ │ │ └── test_case2.rs │ │ ├── smali/ │ │ │ └── code.smali │ │ ├── srt/ │ │ │ └── code.srt │ │ ├── swift/ │ │ │ └── code.swift │ │ ├── toml/ │ │ │ └── doc.toml │ │ ├── tsv/ │ │ │ └── magika_test.tsv │ │ ├── twig/ │ │ │ └── example.twig │ │ ├── txt/ │ │ │ ├── complex-sentence.txt │ │ │ ├── few-words.txt │ │ │ ├── lorem-big.txt │ │ │ ├── lorem-small.txt │ │ │ ├── magika_test_pptx.txt │ │ │ ├── many-words.txt │ │ │ ├── one-sentence-with-newline.txt │ │ │ ├── one-sentence.txt │ │ │ └── random-ascii.txt │ │ ├── typescript/ │ │ │ └── code.ts │ │ ├── xlsx/ │ │ │ └── magika_test.xlsx │ │ ├── yaml/ │ │ │ ├── dependabot.yml │ │ │ └── python-test.yml │ │ ├── yara/ │ │ │ └── rule.yar │ │ └── zig/ │ │ └── code.zig │ ├── current_missdetections/ │ │ ├── html/ │ │ │ └── malformed-html-gh-521.html │ │ └── xls/ │ │ └── password-protected-example.xls │ ├── mitra/ │ │ ├── bzip/ │ │ │ └── bzip2.bz2 │ │ ├── cab/ │ │ │ └── cab.cab │ │ ├── elf/ │ │ │ ├── elf.elf │ │ │ └── elf64.elf │ │ ├── flac/ │ │ │ ├── flac.flac │ │ │ └── tiny.flac │ │ ├── iso/ │ │ │ └── iso.iso │ │ ├── ogg/ │ │ │ └── vorbis.ogg │ │ ├── pcap/ │ │ │ └── pcap.pcap │ │ ├── php/ │ │ │ └── php.php │ │ ├── rtf/ │ │ │ └── rich.rtf │ │ ├── tga/ │ │ │ └── footer.tga │ │ ├── tiff/ │ │ │ ├── tiff-be.tif │ │ │ └── tiff-le.tif │ │ ├── webm/ │ │ │ └── webm.webm │ │ ├── xar/ │ │ │ ├── hello-world.xar │ │ │ └── mini.xar │ │ └── xz/ │ │ └── xz.xz │ └── mitra_candidates/ │ ├── DS_Store │ ├── ace.ace │ ├── dicom.dcm │ ├── hdf5.h5 │ ├── html.htm │ ├── jp2-stream.jp2 │ ├── jp2.jp2 │ ├── lha.lzh │ ├── lzip.lz │ ├── mini.bplist │ ├── mini.plist │ ├── mini.protobuf │ ├── pcapng.pcapng │ ├── photoshop.psd │ ├── qoi.qoi │ ├── raw.tga │ ├── tiny.avro │ ├── wad.wad │ └── wasm.wasm ├── website/ │ ├── .gitignore │ ├── README.md │ ├── index.html │ ├── jsconfig.json │ ├── package.json │ ├── public/ │ │ ├── model/ │ │ │ ├── config.json │ │ │ └── model.json │ │ └── models/ │ │ ├── standard_v3_2/ │ │ │ ├── config.min.json │ │ │ ├── metadata.json │ │ │ └── model.json │ │ └── standard_v3_3/ │ │ ├── README.md │ │ ├── config.min.json │ │ ├── metadata.json │ │ └── model.json │ ├── src/ │ │ ├── App.vue │ │ └── main.js │ └── vite.config.js └── website-ng/ ├── .gcloudignore ├── .gitignore ├── README.md ├── app.yaml ├── astro.config.mjs ├── components.json ├── content.config.ts ├── jsrepo.json ├── package.json ├── public/ │ └── models/ │ ├── standard_v3_2/ │ │ ├── config.min.json │ │ ├── metadata.json │ │ └── model.json │ └── standard_v3_3/ │ ├── README.md │ ├── config.min.json │ ├── metadata.json │ └── model.json ├── src/ │ ├── components/ │ │ └── MagikaDemo.svelte │ ├── content/ │ │ └── docs/ │ │ ├── additional-resources/ │ │ │ ├── changelog.md │ │ │ ├── disclaimer.md │ │ │ ├── faq.md │ │ │ ├── license.md │ │ │ ├── related-blog-posts.md │ │ │ └── research-papers-and-citation.md │ │ ├── cli-and-bindings/ │ │ │ ├── cli.md │ │ │ ├── js-api.md │ │ │ ├── js.md │ │ │ ├── other-bindings.md │ │ │ ├── overview.md │ │ │ ├── python.md │ │ │ └── rust.md │ │ ├── contributing/ │ │ │ ├── creating-new-bindings.md │ │ │ ├── how-to-contribute.md │ │ │ ├── known-limitations.md │ │ │ └── reporting-security-vulnerabilities.md │ │ ├── core-concepts/ │ │ │ ├── how-magika-works.md │ │ │ ├── models-and-content-types.md │ │ │ ├── prediction-modes.md │ │ │ └── understanding-the-output.md │ │ ├── demo/ │ │ │ └── magika-demo.mdx │ │ ├── getting-started/ │ │ │ ├── installation.mdx │ │ │ └── quick-start.md │ │ ├── index.mdx │ │ ├── introduction/ │ │ │ └── overview.md │ │ └── models/ │ │ └── standard_v3_3.md │ ├── content.config.ts │ ├── lib/ │ │ ├── components/ │ │ │ └── ui/ │ │ │ ├── button/ │ │ │ │ ├── button.svelte │ │ │ │ └── index.ts │ │ │ ├── card/ │ │ │ │ ├── card-action.svelte │ │ │ │ ├── card-content.svelte │ │ │ │ ├── card-description.svelte │ │ │ │ ├── card-footer.svelte │ │ │ │ ├── card-header.svelte │ │ │ │ ├── card-title.svelte │ │ │ │ ├── card.svelte │ │ │ │ └── index.ts │ │ │ ├── file-drop-zone/ │ │ │ │ ├── file-drop-zone.svelte │ │ │ │ ├── index.ts │ │ │ │ └── types.ts │ │ │ ├── input/ │ │ │ │ ├── index.ts │ │ │ │ └── input.svelte │ │ │ ├── label/ │ │ │ │ ├── index.ts │ │ │ │ └── label.svelte │ │ │ ├── progress/ │ │ │ │ ├── index.ts │ │ │ │ └── progress.svelte │ │ │ ├── tabs/ │ │ │ │ ├── index.ts │ │ │ │ ├── tabs-content.svelte │ │ │ │ ├── tabs-list.svelte │ │ │ │ ├── tabs-trigger.svelte │ │ │ │ └── tabs.svelte │ │ │ └── textarea/ │ │ │ ├── index.ts │ │ │ └── textarea.svelte │ │ ├── utils/ │ │ │ └── utils.ts │ │ └── utils.ts │ ├── pages/ │ │ ├── install.ps1.ts │ │ └── install.sh.ts │ └── styles/ │ └── global.css ├── svelte.config.js └── tsconfig.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .cargo/config.toml ================================================ [build] target-dir = "rust/target" ================================================ FILE: .dockerignore ================================================ # Include any files or directories that you don't want to be copied to your # container here (e.g., local build artifacts, temporary files, etc.). # # For more help, visit the .dockerignore file reference guide at # https://docs.docker.com/go/build-context-dockerignore/ **/.DS_Store **/__pycache__ **/.venv **/.classpath **/.dockerignore **/.env **/.git **/.gitignore **/.project **/.settings **/.toolstarget **/.vs **/.vscode **/*.*proj.user **/*.dbmdl **/*.jfm **/bin **/charts **/docker-compose* **/compose* **/Dockerfile* **/node_modules **/npm-debug.log **/obj **/secrets.dev.yaml **/values.dev.yaml LICENSE # Keep this if the container wants it, this is a test file. !tests_data/basic/dockerfile/Dockerfile ================================================ FILE: .gemini/config.yaml ================================================ code_review: pull_request_opened: summary: false ================================================ FILE: .gitattributes ================================================ /tests_data/** -text ================================================ FILE: .github/CODEOWNERS ================================================ # Default owners (lowest precedence). * @reyammer @invernizzi # Julien owns the Rust code /rust/ @ia0 # Yanick owns the Python code, all docs, and test data /python/ @reyammer *.md @reyammer /tests_data/ @reyammer # Julien still owns the Rust changelogs /rust/**/CHANGELOG.md @ia0 # Luca owns the JS code, docs, and website /js/ @invernizzi /docs/js.md @invernizzi /website/ @invernizzi ================================================ FILE: .github/ISSUE_TEMPLATE/misdetection.md ================================================ --- name: Misdetection about: Report a file, or files, that have been misdetected as something that they aren't. title: "[Misdetection] file misdetected as " labels: misdetection, needs triage assignees: '' --- **What should the file have been detected as? What has the file been misdetected as?** Ex. "HTML pages are being mistaken for generic XML files.", "C# code misdetected as Java.", or "Can't tell the difference between exe and dll files." **Please link or attach the misdetected file below** (Do NOT upload PII!) Placeholder.zip **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/ISSUE_TEMPLATE/new_content_type_request.md ================================================ --- name: New content type request about: Suggest a new content type for Magika to detect title: "[NEW CONTENT TYPE REQUEST]" labels: missing content type, needs triage assignees: '' --- **What type of file would you like magika to detect?** Write the full name of the file format, followed by the file extension in parenthesis. Examples: - "Nintendo Binary Revolution RESource (.brres)" - "Valve Map Format file (.vmf)" - "Blender save file (.blend)" - "RPG Maker 2000/2003 Lcf DataBase (.ldb)" - "COLLADA file (.dae)" - "Unreal Engine Asset (.uasset)" **What software can create/open these files?** Examples: - Simply state the name of the software, and where it can be obtained: - "Valve Hammer Editor, included with any Source Engine game on Steam." - If the file is common enough, write a general description: - "Many 3D modeling software." - "Any text editor." - Link to the GitHub page: - "[BrawlCrate](https://github.com/soopercool101/BrawlCrate)" - Link to the software's websight main page or download page: - "[Blender](https://www.blender.org/download/)" - "[Unreal Engine](https://www.unrealengine.com/en-US)" - If there are more than one software to open the file type, list them: - "[RPG Maker 2003](https://www.rpgmakerweb.com/products/rpg-maker-2003), [easyRPG](https://easyrpg.org/), Wolf RPG Editor" **Where can these files be found?** Examples: - Simply state where the files can be obtained: - "Any RPGMaker 2000/2003 game." - Provide instructions on how to obtain the files: - "Dump the ISO of any of [these Wii games](https://wiki.vg-resource.com/BRRES#List_of_games_using_the_format)" - "Use [bspsrc](https://github.com/ata4/bspsrc) to decompile the BSP files of any Source Engine game. Use [GCFScape](https://nemstools.github.io/pages/GCFScape-Download.html) to extract even more BSPs from 'dir.vpk' files. Make your own with Hammer." - Link to a source of the files: - "[Unreal Marketplace](https://www.unrealengine.com/marketplace/en-US/store)" - "https://blendermarket.com/categories/models, https://www.turbosquid.com/Search/3D-Models/marketplace/blend, https://sketchfab.com/store/3d-models/blend?ref=store-home" - Provide some of your own: - "placeholder.zip" as attachment. **If possible, please provide a specification for this file type.** Link to a resource that explains how the file works. Examples: - A wiki page: - "https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)" - GitHub documentation: - "https://gota7.github.io/NitroStudio2/specs/sequenceArchive.html" - If you find more than one source, list them: - "https://wiki.tockdom.com/wiki/BRRES_(File_Format), https://horizon.miraheze.org/wiki/.brres" - "http://www.amnoid.de/gc/Rarc.txt, https://kuribo64.net/wiki/?page=RARC, https://wiki.tockdom.com/wiki/RARC_(File_Format)" - "https://www.3dbrew.org/wiki/CGFX, https://mk3ds.com/index.php?title=CGFX_(File_Format)" - "https://mk8.tockdom.com/wiki/BFRES_(File_Format), https://wiki.vg-resource.com/BFRES, https://wiki.oatmealdome.me/BFRES_(File_Format)" - A PDF: - "https://www.collada.org/2008/03/COLLADASchema" **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "monthly" - package-ecosystem: "docker" directory: "/" schedule: interval: "daily" - package-ecosystem: "pip" directory: "/python" schedule: interval: "daily" - package-ecosystem: "npm" directory: "/js" schedule: interval: "weekly" - package-ecosystem: "cargo" directory: "/rust" schedule: interval: "weekly" ================================================ FILE: .github/labeler.yml ================================================ needs triage: - "/.*/" ================================================ FILE: .github/scorecard.yml ================================================ # Scorecard maintainer annotations. # See https://github.com/ossf/scorecard/blob/main/config/README.md annotations: # Binary files in tests_data/ are only used for testing. - checks: - binary-artifacts reasons: - reason: test-data - checks: - pinned-dependencies reasons: # Test data with unpinned dependencies: # - tests_data/basic/dockerfile/Dockerfile - reason: test-data # CI/CD containers meant to run the latest version: # - .github/workflows/python-e2e-test.yml - reason: remediated ================================================ FILE: .github/workflows/cli-latest.yml ================================================ name: Update the trampoline release on: # This is called by cli-release.yml but we don't use the input. workflow_call: inputs: plan: required: true type: string # In case we want to run it manually (the workflow is idempotent). workflow_dispatch: permissions: contents: write jobs: update: runs-on: ubuntu-latest steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - run: ./latest.sh working-directory: rust env: GH_TOKEN: ${{ github.token }} ================================================ FILE: .github/workflows/cli-release.yml ================================================ # This file was autogenerated by dist: https://axodotdev.github.io/cargo-dist # # Copyright 2022-2024, axodotdev # SPDX-License-Identifier: MIT or Apache-2.0 # # CI that: # # * checks for a Git Tag that looks like a release # * builds artifacts with dist (archives, installers, hashes) # * uploads those artifacts to temporary workflow zip # * on success, uploads the artifacts to a GitHub Release # # Note that the GitHub Release will be created with a generated # title/body based on your changelogs. name: Release permissions: "contents": "write" # This task will run whenever you push a git tag that looks like a version # like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc. # Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where # PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION # must be a Cargo-style SemVer Version (must have at least major.minor.patch). # # If PACKAGE_NAME is specified, then the announcement will be for that # package (erroring out if it doesn't have the given version or isn't dist-able). # # If PACKAGE_NAME isn't specified, then the announcement will be for all # (dist-able) packages in the workspace with that version (this mode is # intended for workspaces with only one dist-able package, or with all dist-able # packages versioned/released in lockstep). # # If you push multiple tags at once, separate instances of this workflow will # spin up, creating an independent announcement for each one. However, GitHub # will hard limit this to 3 tags per commit, as it will assume more tags is a # mistake. # # If there's a prerelease-style suffix to the version, then the release(s) # will be marked as a prerelease. on: pull_request: push: tags: - 'cli**[0-9]+.[0-9]+.[0-9]+*' jobs: # Run 'dist plan' (or host) to determine what tasks we need to do plan: runs-on: "ubuntu-latest" outputs: val: ${{ steps.plan.outputs.manifest }} tag: ${{ !github.event.pull_request && github.ref_name || '' }} tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }} publishing: ${{ !github.event.pull_request }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install dist # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.31.0/cargo-dist-installer.sh | sh" - name: Cache dist uses: actions/upload-artifact@v6 with: name: cargo-dist-cache path: ~/.cargo/bin/dist # sure would be cool if github gave us proper conditionals... # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible # functionality based on whether this is a pull_request, and whether it's from a fork. # (PRs run on the *source* but secrets are usually on the *target* -- that's *good* # but also really annoying to build CI around when it needs secrets to work right.) - id: plan run: | dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json echo "dist ran successfully" cat plan-dist-manifest.json echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" uses: actions/upload-artifact@v6 with: name: artifacts-plan-dist-manifest path: plan-dist-manifest.json # Build and packages all the platform-specific things build-local-artifacts: name: build-local-artifacts (${{ join(matrix.targets, ', ') }}) # Let the initial task tell us to not run (currently very blunt) needs: - plan if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} strategy: fail-fast: false # Target platforms/runners are computed by dist in create-release. # Each member of the matrix has the following arguments: # # - runner: the github runner # - dist-args: cli flags to pass to dist # - install-dist: expression to run to install dist on the runner # # Typically there will be: # - 1 "global" task that builds universal installers # - N "local" tasks that build each platform's binaries and platform-specific installers matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} runs-on: ${{ matrix.runner }} container: ${{ matrix.container && matrix.container.image || null }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json permissions: "attestations": "write" "contents": "read" "id-token": "write" steps: - name: enable windows longpaths run: | git config --global core.longpaths true - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install Rust non-interactively if not already installed if: ${{ matrix.container }} run: | if ! command -v cargo > /dev/null 2>&1; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y echo "$HOME/.cargo/bin" >> $GITHUB_PATH fi - name: Install dist run: ${{ matrix.install_dist.run }} # Get the dist-manifest - name: Fetch local artifacts uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ merge-multiple: true - name: Install dependencies run: | ${{ matrix.packages_install }} - name: Build artifacts run: | # Actually do builds and make zips and whatnot dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json echo "dist ran successfully" - name: Attest uses: actions/attest-build-provenance@v3 with: subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*" - id: cargo-dist name: Post-build # We force bash here just because github makes it really hard to get values up # to "real" actions without writing to env-vars, and writing to env-vars has # inconsistent syntax between shell and powershell. shell: bash run: | # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT" echo "EOF" >> "$GITHUB_OUTPUT" cp dist-manifest.json "$BUILD_MANIFEST_NAME" - name: "Upload artifacts" uses: actions/upload-artifact@v6 with: name: artifacts-build-local-${{ join(matrix.targets, '_') }} path: | ${{ steps.cargo-dist.outputs.paths }} ${{ env.BUILD_MANIFEST_NAME }} # Build and package all the platform-agnostic(ish) things build-global-artifacts: needs: - plan - build-local-artifacts runs-on: "ubuntu-latest" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json steps: - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install cached dist uses: actions/download-artifact@v7 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ merge-multiple: true - id: cargo-dist shell: bash run: | dist build ${NEEDS_PLAN_OUTPUTS_TAG_FLAG} --output-format=json "--artifacts=global" > dist-manifest.json echo "dist ran successfully" # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" echo "EOF" >> "$GITHUB_OUTPUT" cp dist-manifest.json "$BUILD_MANIFEST_NAME" env: NEEDS_PLAN_OUTPUTS_TAG_FLAG: ${{ needs.plan.outputs.tag-flag }} - name: "Upload artifacts" uses: actions/upload-artifact@v6 with: name: artifacts-build-global path: | ${{ steps.cargo-dist.outputs.paths }} ${{ env.BUILD_MANIFEST_NAME }} # Determines if we should publish/announce host: needs: - plan - build-local-artifacts - build-global-artifacts # Only run if we're "publishing", and only if plan, local and global didn't fail (skipped is fine) if: ${{ always() && needs.plan.result == 'success' && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} runs-on: "ubuntu-latest" outputs: val: ${{ steps.host.outputs.manifest }} steps: - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install cached dist uses: actions/download-artifact@v7 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Fetch artifacts from scratch-storage - name: Fetch artifacts uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ merge-multiple: true - id: host shell: bash run: | dist host ${NEEDS_PLAN_OUTPUTS_TAG_FLAG} --steps=upload --steps=release --output-format=json > dist-manifest.json echo "artifacts uploaded and released successfully" cat dist-manifest.json echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" env: NEEDS_PLAN_OUTPUTS_TAG_FLAG: ${{ needs.plan.outputs.tag-flag }} - name: "Upload dist-manifest.json" uses: actions/upload-artifact@v6 with: # Overwrite the previous copy name: artifacts-dist-manifest path: dist-manifest.json # Create a GitHub Release while uploading all files to it - name: "Download GitHub Artifacts" uses: actions/download-artifact@v7 with: pattern: artifacts-* path: artifacts merge-multiple: true - name: Cleanup run: | # Remove the granular manifests rm -f artifacts/*-dist-manifest.json - name: Create GitHub Release env: PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}" ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}" ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}" RELEASE_COMMIT: "${{ github.sha }}" NEEDS_PLAN_OUTPUTS_TAG: ${{ needs.plan.outputs.tag }} run: | # Write and read notes from a file to avoid quoting breaking things echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt gh release create "${NEEDS_PLAN_OUTPUTS_TAG}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/* announce: needs: - plan - host # use "always() && ..." to allow us to wait for all publish jobs while # still allowing individual publish jobs to skip themselves (for prereleases). # "host" however must run to completion, no skipping allowed! if: ${{ always() && needs.host.result == 'success' }} runs-on: "ubuntu-latest" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive custom-cli-latest: needs: - plan - announce uses: ./.github/workflows/cli-latest.yml with: plan: ${{ needs.plan.outputs.val }} secrets: inherit ================================================ FILE: .github/workflows/codeql.yml ================================================ # For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # # You may wish to alter this file to override the set of languages analyzed, # or to provide custom queries or build logic. # # ******** NOTE ******** # We have attempted to detect the languages in your repository. Please check # the `language` matrix defined below to confirm you have the correct set of # supported CodeQL languages. # name: "CodeQL" on: push: branches: ["main"] pull_request: branches: ["main"] schedule: - cron: "42 13 * * 2" permissions: contents: read jobs: analyze: name: Analyze # Runner size impacts CodeQL analysis time. To learn more, please see: # - https://gh.io/recommended-hardware-resources-for-running-codeql # - https://gh.io/supported-runners-and-hardware-resources # - https://gh.io/using-larger-runners # Consider using larger runners for possible analysis time improvements. runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} permissions: # required for all workflows security-events: write # only required for workflows in private repositories actions: read contents: read strategy: fail-fast: false matrix: language: ["javascript-typescript", "python"] # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support steps: - name: Checkout repository uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs # queries: security-extended,security-and-quality # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild uses: github/codeql-action/autobuild@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun # If the Autobuild fails above, remove it and uncomment the following three lines. # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. # - run: | # echo "Run, Build Application using script" # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5 with: category: "/language:${{matrix.language}}" ================================================ FILE: .github/workflows/docs-check.yml ================================================ name: Docs - Check documentation on: workflow_dispatch: push: branches: - "main" pull_request: paths: - "*.md" - "assets/**/*.md" - "docs/**/*.md" - "js/**/*.md" - "python/**/*.md" - "rust/**/*.md" - "website-ng/**/*.md" schedule: - cron: "42 7 * * 4" # Run weekly permissions: contents: read jobs: run-check-docs: runs-on: ubuntu-latest steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - name: Install uv run: curl -LsSf https://astral.sh/uv/0.5.22/install.sh | sh - name: "Run check_documentation.py script" working-directory: python run: uv run ./scripts/check_documentation.py ================================================ FILE: .github/workflows/github-issue-labeler.yml ================================================ name: New issue labeler on: # Runs on newly opened issues issues: types: [opened] # Sets permissions of the GITHUB_TOKEN permissions: issues: write contents: read jobs: triage: runs-on: ubuntu-latest steps: - uses: github/issue-labeler@c1b0f9f52a63158c4adc09425e858e87b32e9685 # pin@v3.4 with: configuration-path: .github/labeler.yml enable-versioned-regex: 0 repo-token: "${{secrets.GITHUB_TOKEN}}" ================================================ FILE: .github/workflows/github-pages.yml ================================================ name: Pages - deploy on: # Runs on pushes targeting the default branch push: branches: ["main"] paths: - "website/**" # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages permissions: contents: read pages: write id-token: write # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. concurrency: group: "pages" cancel-in-progress: false jobs: deploy-pages: environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-latest defaults: run: working-directory: ./website steps: - name: Checkout uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 with: ref: main - name: Set up Node uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # pin@v4 with: node-version: 18.x - name: Install dependencies run: yarn install --frozen-lockfile - name: Build run: yarn run build-github - name: Setup Pages uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # pin@v4 - name: Upload artifact uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # pin@v3 with: path: "./website/dist" - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # pin@v4 ================================================ FILE: .github/workflows/go-test.yml ================================================ name: Go - build and run tests on: workflow_dispatch: push: branches: - "main" pull_request: paths: - "go/**" - "tests_data/**" - ".github/workflows/go-test.yml" permissions: contents: read jobs: unit-testing: runs-on: "ubuntu-latest" steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - name: Build the Go Docker image (which also runs tests) run: docker build -f go/docker/Dockerfile . ================================================ FILE: .github/workflows/js-check-import-scenarios.yml ================================================ # Tests that the Magika library can be used in various scenarios (e.g., commonjs/node, esmodule/node, esmodule/browse) name: JS - check import scenarios on: workflow_dispatch: push: branches: - "main" pull_request: paths: - 'js/simple_examples/**' - '.github/workflows/run-js-examples.yml' jobs: run-examples: runs-on: ubuntu-latest defaults: run: working-directory: js/simple_examples steps: - name: Checkout uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # pin@v4 with: ref: main - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # pin@v4 name: Set up Node.js with: node-version: "20.x" registry-url: "https://registry.npmjs.org" - name: Install Magika dependencies run: yarn install --frozen-lockfile working-directory: js - name: Build Magika run: yarn build && rm -Rf node_modules working-directory: js - name: Install dependencies for the node-commonjs-example run: yarn install working-directory: js/simple_examples/node-commonjs-example - name: Test node-commonjs-example run: yarn --silent start && rm -Rf node_modules working-directory: js/simple_examples/node-commonjs-example - name: Install dependencies for the node-esmodule-example run: yarn install working-directory: js/simple_examples/node-esmodule-example - name: Test node-esmodule-example run: yarn --silent start && rm -Rf node_modules working-directory: js/simple_examples/node-esmodule-example - name: Install dependencies for the browser-esmodule-example run: yarn install && yarn playwright install chromium working-directory: js/simple_examples/browser-esmodule-example - name: Test browser-esmodule-example run: yarn --silent start && rm -Rf node_modules working-directory: js/simple_examples/browser-esmodule-example - name: Install dependencies for the typescript-esmodule-example run: yarn install working-directory: js/simple_examples/typescript-esmodule-example - name: Test typescript-esmodule-example run: yarn --silent start && rm -Rf node_modules working-directory: js/simple_examples/typescript-esmodule-example ================================================ FILE: .github/workflows/js-docs-builder.yml ================================================ name: JS - generate docs on: # Runs on pushes targeting the default branch push: branches: ["main"] paths: - "js/**" - ".github/workflows/**" # Allows you to run this workflow manually from the Actions tab workflow_dispatch: permissions: contents: read jobs: makeDocs: permissions: contents: write id-token: write runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 with: ref: main - name: Generate docs working-directory: js run: | yarn install --frozen-lockfile yarn run build yarn run make-docs - name: Commit run: | git config --local user.email "invernizzi.l@gmail.com" git config --local user.name "Luca Invernizzi" git commit -m "Update docs" -a ================================================ FILE: .github/workflows/js-publish.yml ================================================ name: JS - publish on: workflow_dispatch: permissions: contents: read jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - name: Enable Corepack run: corepack enable - uses: actions/setup-node@v6 with: node-version: "20.x" registry-url: "https://registry.npmjs.org" - name: Build working-directory: js run: | yarn install --frozen-lockfile yarn run build yarn run test - name: Publish working-directory: js run: yarn publish env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} ================================================ FILE: .github/workflows/js-test.yml ================================================ name: JS - tests on: workflow_dispatch: push: branches: - "main" pull_request: paths: - "js/**" - "tests_data/**" - ".github/workflows/js-*" permissions: contents: read jobs: unit-testing: strategy: matrix: node-version: ["18", "20"] os: ["ubuntu-latest", "macos-latest"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # pin@v4 with: node-version: ${{ matrix.node-version }} - name: Install dependencies working-directory: js run: yarn install --frozen-lockfile - name: Build working-directory: js run: yarn run build - name: Run tests working-directory: js run: yarn test ================================================ FILE: .github/workflows/python-build-and-release-package.yml ================================================ name: Python - build and release package on: workflow_dispatch: push: branches: - "main" tags: # Note: These need to match what specified in env.[TEST_]RELEASE_TAG_PREFIX below. - "python-v*" - "python-test-v*" pull_request: paths: - "python/**" - "rust/**" - "tests_data/**" - ".github/workflows/python-build-and-release-package.yml" schedule: - cron: "12 3 * * 4" # Run everything once per week. - cron: "12 3 * * 1" # Refresh the cache an additional time. release: types: [created] permissions: contents: read env: # Trigger for publishing to pypi and testpypi (and for pre-release checks # enforcement). RELEASE_TAG_PREFIX: "python-v" TEST_RELEASE_TAG_PREFIX: "python-test-v" UV_VERSION: "0.9.5" jobs: # This job acts as a gatekeeper for releases, which are triggered by a tag # push. It performs critical pre-release checks. These checks are skipped for # non-release pushes. pre-release-checks: name: Pre-release checks runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v5 - name: Setup python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5 with: python-version: "3.12" - name: Install uv run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh - name: Check package for release env: IS_RELEASE_TAG: ${{ github.event_name == 'push' && github.ref_type == 'tag' && (startsWith(github.ref_name, env.RELEASE_TAG_PREFIX) || startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX)) }} run: | if [[ "${IS_RELEASE_TAG}" == 'true' ]]; then FULL_TAG_REF="${GITHUB_REF}" TAG_NAME="${GITHUB_REF_NAME}" if [[ "${TAG_NAME}" == "${{ env.RELEASE_TAG_PREFIX }}"* ]]; then PREFIX_TO_REMOVE="refs/tags/${{ env.RELEASE_TAG_PREFIX }}" else PREFIX_TO_REMOVE="refs/tags/${{ env.TEST_RELEASE_TAG_PREFIX }}" fi TAG_VERSION="${FULL_TAG_REF#${PREFIX_TO_REMOVE}}" CHECKER_OPTIONS="--expected-version ${TAG_VERSION}" else CHECKER_OPTIONS="--report-only" fi # Note: this uses the magika python package installed via uv. Also, # pip is not available here, so we skip pip show check. uv run ./scripts/pre_release_check.py $CHECKER_OPTIONS --no-check-pip-show-package-version working-directory: python build-wheels: needs: [pre-release-checks] runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: ubuntu-latest target: x86_64 - runner: windows-latest target: x64 - runner: macos-14 target: aarch64 steps: - uses: actions/checkout@v5 - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5 with: python-version: "3.12" - name: Install uv run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh - if: matrix.platform.runner == 'ubuntu-latest' uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 with: path: rust/onnx/runtime/build/Linux key: maturin-${{ matrix.platform.target }}-${{ hashFiles('rust/onnx/build.sh') }} - if: matrix.platform.runner == 'ubuntu-latest' name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out=../dist before-script-linux: "${{ github.workspace }}/rust/onnx/maturin.sh" manylinux: 2_28 working-directory: python - if: matrix.platform.runner != 'ubuntu-latest' name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out=../dist working-directory: python - name: Upload wheels uses: actions/upload-artifact@v4 with: name: wheel-${{ matrix.platform.runner }}-${{ matrix.platform.target }} path: dist # Download, install, and test the wheels with different versions of python test-wheels: needs: [build-wheels] runs-on: ${{ matrix.platform.runner }} if: github.event.schedule != '12 3 * * 1' strategy: # We want to know in which exact situation the tests fail fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] platform: - runner: ubuntu-latest target: x86_64 - runner: windows-latest target: x64 - runner: macos-14 target: aarch64 steps: - uses: actions/checkout@v5 - uses: actions/download-artifact@v4 with: name: wheel-${{ matrix.platform.runner }}-${{ matrix.platform.target }} path: dist - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5 with: python-version: "${{ matrix.python-version }}" - name: Install uv run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh # Attempt "uv add magika.whl", in a temporary directory - name: Check that `uv add magika.whl` works shell: bash run: | mkdir /tmp/test-uv cp -vR dist/*.whl /tmp/test-uv cd /tmp/test-uv uv init uv add ./$(\ls -1 *.whl | head -n 1) # From now on, magika will be available in the global environment - name: Install the wheel via pip run: python3 -m pip install $(python -c "import glob; print(glob.glob('dist/*.whl')[0])") - run: magika --version - run: "python3 -c 'import magika; m = magika.Magika(); print(m)'" - run: magika -r tests_data/basic - run: python3 ./python/scripts/run_quick_test_magika_cli.py - run: python3 ./python/scripts/run_quick_test_magika_module.py - name: Check package for release readiness env: IS_RELEASE_TAG: ${{ github.event_name == 'push' && github.ref_type == 'tag' && (startsWith(github.ref_name, env.RELEASE_TAG_PREFIX) || startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX)) }} shell: bash run: | if [[ "${IS_RELEASE_TAG}" == 'true' ]]; then FULL_TAG_REF="${GITHUB_REF}" TAG_NAME="${GITHUB_REF_NAME}" if [[ "${TAG_NAME}" == "${{ env.RELEASE_TAG_PREFIX }}"* ]]; then PREFIX_TO_REMOVE="refs/tags/${{ env.RELEASE_TAG_PREFIX }}" else PREFIX_TO_REMOVE="refs/tags/${{ env.TEST_RELEASE_TAG_PREFIX }}" fi TAG_VERSION="${FULL_TAG_REF#${PREFIX_TO_REMOVE}}" CHECKER_OPTIONS="--expected-version ${TAG_VERSION}" else CHECKER_OPTIONS="--report-only" fi # Note: this uses the magika python package that was just built. python3 ./scripts/pre_release_check.py $CHECKER_OPTIONS working-directory: python build-pure-python-wheel-and-sdist: needs: [pre-release-checks] runs-on: ubuntu-latest if: github.event.schedule != '12 3 * * 1' steps: - uses: actions/checkout@v5 - name: Install uv run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh - run: uv run ./scripts/prepare_pyproject_for_pure_python_wheel.py working-directory: python - name: Build pure python wheel and source distribution run: uv build --out-dir ../dist working-directory: python - name: Upload pure python wheel uses: actions/upload-artifact@v4 with: name: wheel-pure-python path: dist/*.whl - name: Upload sdist uses: actions/upload-artifact@v4 with: name: sdist path: dist/*.tar.gz # Download, install, and test the pure python wheel on multiple platforms test-pure-python-wheel: needs: [build-pure-python-wheel-and-sdist] runs-on: ${{ matrix.platform.runner }} if: github.event.schedule != '12 3 * * 1' strategy: # We want to know in which exact situation the tests fail fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] platform: - runner: ubuntu-latest target: x86_64 - runner: windows-latest target: x64 - runner: macos-14 target: aarch64 steps: - uses: actions/checkout@v5 - uses: actions/download-artifact@v4 with: name: wheel-pure-python path: dist - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5 with: python-version: "${{ matrix.python-version }}" - name: Install uv run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh # Attempt "uv add magika.whl", in a temporary directory - name: Check that `uv add magika.whl` works shell: bash run: | mkdir /tmp/test-uv cp -vR dist/*.whl /tmp/test-uv cd /tmp/test-uv uv init uv add ./$(\ls -1 *.whl | head -n 1) # From now on, magika will be available in the global environment - name: Install the wheel run: python3 -m pip install $(python -c "import glob; print(glob.glob('dist/*.whl')[0])") # Check that the magika script points to the placeholder raising a warning - run: magika --version | grep -C10 WARNING | grep -C10 magika-python-client # Check that the fallback magika's python client can be run - run: magika-python-client -r tests_data/basic # Check that the results of the python's client are correct - run: python3 ./python/scripts/run_quick_test_magika_cli.py --client-path magika-python-client # Test the python module - run: "python3 -c 'import magika; m = magika.Magika(); print(m)'" - run: python3 ./python/scripts/run_quick_test_magika_module.py - name: Check package for release readiness env: IS_RELEASE_TAG: ${{ github.event_name == 'push' && github.ref_type == 'tag' && (startsWith(github.ref_name, env.RELEASE_TAG_PREFIX) || startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX)) }} shell: bash run: | if [[ "${IS_RELEASE_TAG}" == 'true' ]]; then FULL_TAG_REF="${GITHUB_REF}" TAG_NAME="${GITHUB_REF_NAME}" if [[ "${TAG_NAME}" == "${{ env.RELEASE_TAG_PREFIX }}"* ]]; then PREFIX_TO_REMOVE="refs/tags/${{ env.RELEASE_TAG_PREFIX }}" else PREFIX_TO_REMOVE="refs/tags/${{ env.TEST_RELEASE_TAG_PREFIX }}" fi TAG_VERSION="${FULL_TAG_REF#${PREFIX_TO_REMOVE}}" CHECKER_OPTIONS="--expected-version ${TAG_VERSION}" else CHECKER_OPTIONS="--report-only" fi # Note: this uses the magika python package that was just built. python3 ./scripts/pre_release_check.py $CHECKER_OPTIONS --use-python-client working-directory: python test-sdist: needs: [build-pure-python-wheel-and-sdist] runs-on: ${{ matrix.platform.runner }} if: github.event.schedule != '12 3 * * 1' strategy: # We want to know in which exact situation the tests fail fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] platform: - runner: ubuntu-latest target: x86_64 - runner: windows-latest target: x64 - runner: macos-14 target: aarch64 steps: - uses: actions/checkout@v5 - uses: actions/download-artifact@v4 with: name: sdist path: dist - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5 with: python-version: "${{ matrix.python-version }}" - name: Install uv run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh # Attempt "uv add magika.whl", in a temporary directory - name: Check that `uv add magika.tar.gz` works shell: bash run: | mkdir /tmp/test-uv cp -vR dist/*.tar.gz /tmp/test-uv cd /tmp/test-uv uv init uv add ./$(\ls -1 *.tar.gz | head -n 1) # From now on, magika will be available in the global environment - name: Install the sdist run: python3 -m pip install $(python -c "import glob; print(glob.glob('dist/*.tar.gz')[0])") # Check that the magika script points to the placeholder raising a warning - run: magika --version | grep -C10 WARNING | grep -C10 magika-python-client # Check that the fallback magika's python client can be run - run: magika-python-client -r tests_data/basic # Check that the results of the python's client are correct - run: python3 ./python/scripts/run_quick_test_magika_cli.py --client-path magika-python-client # Test the python module - run: "python3 -c 'import magika; m = magika.Magika(); print(m)'" - run: python3 ./python/scripts/run_quick_test_magika_module.py - name: Check package for release readiness env: IS_RELEASE_TAG: ${{ github.event_name == 'push' && github.ref_type == 'tag' && (startsWith(github.ref_name, env.RELEASE_TAG_PREFIX) || startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX)) }} shell: bash run: | if [[ "${IS_RELEASE_TAG}" == 'true' ]]; then FULL_TAG_REF="${GITHUB_REF}" TAG_NAME="${GITHUB_REF_NAME}" if [[ "${TAG_NAME}" == "${{ env.RELEASE_TAG_PREFIX }}"* ]]; then PREFIX_TO_REMOVE="refs/tags/${{ env.RELEASE_TAG_PREFIX }}" else PREFIX_TO_REMOVE="refs/tags/${{ env.TEST_RELEASE_TAG_PREFIX }}" fi TAG_VERSION="${FULL_TAG_REF#${PREFIX_TO_REMOVE}}" CHECKER_OPTIONS="--expected-version ${TAG_VERSION}" else CHECKER_OPTIONS="--report-only" fi # Note: this uses the magika python package that was just built. python3 ./scripts/pre_release_check.py $CHECKER_OPTIONS --use-python-client working-directory: python # Adapted from https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ # Note: The publishing is only done with pushes of release tags. publish-to-pypi: name: Publish to PyPI if: github.event_name == 'push' && github.ref_type == 'tag' && contains(github.ref_name, 'python') needs: [test-wheels, test-pure-python-wheel, test-sdist] runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/magika permissions: id-token: write # IMPORTANT: mandatory for trusted publishing steps: - name: Download all the artifacts (binary wheels, pure python wheel, sdist) uses: actions/download-artifact@v4 with: path: artifacts/ - name: Flatten artifacts structure run: | # List all files for debugging ls -alR artifacts/ # Find all files inside the subdirectories and move them up find artifacts/ -mindepth 2 -type f -exec mv -t artifacts/ {} + # Remove the now-empty subdirectories find artifacts/ -mindepth 1 -type d -empty -delete # Check structure after flattening ls -alR artifacts/ - name: Publish distribution to PyPI if: github.event_name == 'push' && github.ref_type == 'tag' && startsWith(github.ref_name, env.RELEASE_TAG_PREFIX) uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: artifacts/ # Note: The publishing is only done with pushes of test release tags. publish-to-testpypi: name: Publish to TestPyPI if: github.event_name == 'push' && github.ref_type == 'tag' && contains(github.ref_name, 'python') needs: [test-wheels, test-pure-python-wheel, test-sdist] runs-on: ubuntu-latest environment: name: testpypi url: https://test.pypi.org/p/magika permissions: id-token: write # IMPORTANT: mandatory for trusted publishing steps: - name: Download all the artifacts (binary wheels, pure python wheel, sdist) uses: actions/download-artifact@v4 with: path: artifacts/ - name: Flatten artifacts structure run: | # List all files for debugging ls -alR artifacts/ # Find all files inside the subdirectories and move them up find artifacts/ -mindepth 2 -type f -exec mv -t artifacts/ {} + # Remove the now-empty subdirectories find artifacts/ -mindepth 1 -type d -empty -delete # Check structure after flattening ls -alR artifacts/ - name: Publish distribution to TestPyPI if: github.event_name == 'push' && github.ref_type == 'tag' && startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX) uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: artifacts/ repository-url: https://test.pypi.org/legacy/ ================================================ FILE: .github/workflows/python-test-published-package.yml ================================================ # This routinely checks that published packages are installable and work # properly. This makes sure that a new version of one of our dependencies is not # breaking our releases. # TODO: test more magika package versions # TODO: check the actual predicted content types name: Python - test published packages on: schedule: - cron: "42 4 * * *" # Run daily workflow_dispatch: pull_request: paths: - ".github/workflows/python-test-published-package.yml" permissions: contents: read env: UV_VERSION: "0.9.5" jobs: unit-testing: strategy: matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] os: ["ubuntu-latest", "macos-latest", "windows-latest"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5 with: python-version: "${{ matrix.python-version }}" - name: Install uv run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh - name: Check that `uv add magika` works shell: bash run: mkdir /tmp/test-uv && cd /tmp/test-uv && uv init && uv add magika && cd - && rm -rf /tmp/test-uv - name: Install magika with pip run: python3 -m pip install magika - run: python3 -c 'import magika; m = magika.Magika(); print(m)' - run: magika --version # The latest published model does not necessarily support detection for # all types in our tests data; thus, for now we just check that the magika # CLI does not crash when scanning the files, without checking the actual # predictions. - run: magika -r tests_data/basic ================================================ FILE: .github/workflows/python-test-published-rc-package.yml ================================================ # This routinely checks that the latest published -rc packages are installable # and work properly. This makes sure that a new version of one of our # dependencies is not breaking our releases. # TODO: test more magika package versions # TODO: check the actual predicted content types name: Python - test published -rc packages on: schedule: - cron: "42 3 * * *" # Run daily workflow_dispatch: pull_request: paths: - ".github/workflows/python-test-published-rc-package.yml" permissions: contents: read env: UV_VERSION: "0.9.5" jobs: unit-testing: strategy: matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] os: ["ubuntu-latest", "macos-latest", "windows-latest"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5 with: python-version: "${{ matrix.python-version }}" - name: Install uv run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh - name: Check that `uv add --prerelease magika` works shell: bash run: mkdir /tmp/test-uv && cd /tmp/test-uv && uv init && uv add --prerelease allow magika && cd - && rm -rf /tmp/test-uv - name: Install magika with pip (considering prereleases) run: python3 -m pip install --pre magika - run: python3 -c 'import magika; m = magika.Magika(); print(m)' - run: magika --version # The latest published model does not necessarily support detection for # all types in our tests data; thus, for now we just check that the magika # CLI does not crash when scanning the files, without checking the actual # predictions. - run: magika -r tests_data/basic ================================================ FILE: .github/workflows/python-test-suite.yml ================================================ name: Python - run test suite on: workflow_dispatch: push: branches: - "main" pull_request: paths: - "python/**" - "rust/**" - "tests_data/**" - ".github/workflows/python-test-suite.yml" permissions: contents: read env: UV_VERSION: "0.9.5" jobs: unit-testing: strategy: matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] os: ["ubuntu-latest", "macos-latest"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5 with: python-version: "${{ matrix.python-version }}" - name: Install uv run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh - name: Install all projects dependencies (with the requested python version) working-directory: python run: uv sync --python ${{ matrix.python-version }} --all-extras --dev - name: Print python version working-directory: python run: uv run --python ${{ matrix.python-version }} python --version - name: Run ruff check working-directory: python run: uv run --python ${{ matrix.python-version }} ruff check --verbose - name: Run ruff format --check working-directory: python run: uv run --python ${{ matrix.python-version }} ruff format --check --verbose - name: Run mypy working-directory: python run: uv run --python ${{ matrix.python-version }} mypy src/magika tests - name: Run copyright checks working-directory: python run: uv run ./scripts/check_copyright.py - name: Run the python tests suite working-directory: python run: uv run --python ${{ matrix.python-version }} pytest tests -m "not slow" - name: Run magika --version working-directory: python run: uv run --python ${{ matrix.python-version }} magika --version shell: bash # Allows for cross-platform - name: Run magika with tests_data working-directory: python run: uv run --python ${{ matrix.python-version }} magika -r ../tests_data/basic - name: Run "magika cli" quick tests working-directory: python run: uv run --python ${{ matrix.python-version }} scripts/run_quick_test_magika_cli.py - name: Run "magika module" quick tests working-directory: python run: uv run --python ${{ matrix.python-version }} scripts/run_quick_test_magika_module.py - name: Run "magika module" quick tests + perf measurement working-directory: python run: uv run --python ${{ matrix.python-version }} scripts/run_quick_test_magika_module.py --print-inference-stats --repeat 10 changelog-check: runs-on: ubuntu-latest steps: - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # pin@v4 with: fetch-depth: 0 - name: Check for undocumented changes run: ./scripts/check_changelog.sh ${GITHUB_BASE_REF} working-directory: python ================================================ FILE: .github/workflows/rust-test.yml ================================================ name: Rust - test on: workflow_dispatch: push: branches: - "main" pull_request: paths: - ".github/workflows/rust-*" - "assets/**" - "rust/**" - "tests_data/**" permissions: contents: read jobs: changelog: runs-on: ubuntu-latest steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 with: fetch-depth: 0 - run: ./changelog.sh working-directory: rust test: runs-on: ubuntu-latest continue-on-error: ${{ matrix.toolchain == 'nightly' }} strategy: matrix: toolchain: [stable, nightly] steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - run: rustup default ${{ matrix.toolchain }} - run: rustup component add rustfmt clippy - run: ./test.sh working-directory: rust run: runs-on: ${{ matrix.os }}-latest strategy: matrix: os: [ubuntu, macos, windows] steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - run: cargo build --release working-directory: rust/cli - run: rust/target/release/magika -r tests_data/basic ================================================ FILE: .github/workflows/scorecard.yml ================================================ # This workflow uses actions that are not certified by GitHub. They are provided # by a third-party and are governed by separate terms of service, privacy # policy, and support documentation. name: Scorecard supply-chain security on: workflow_dispatch: # For Branch-Protection check. Only the default branch is supported. See # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection branch_protection_rule: # To guarantee Maintained check is occasionally updated. See # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained schedule: - cron: "45 21 * * 2" push: branches: ["main"] # Declare default permissions as read only. permissions: read-all jobs: analysis: name: Scorecard analysis runs-on: ubuntu-latest permissions: # Needed to upload the results to code-scanning dashboard. security-events: write # Needed to publish results and get a badge (see publish_results below). id-token: write steps: - name: "Checkout code" uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 with: persist-credentials: false - name: "Run analysis" uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 with: results_file: results.sarif results_format: sarif # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: # - you want to enable the Branch-Protection check on a *public* repository, or # - you are installing Scorecard on a *private* repository # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. # repo_token: ${{ secrets.SCORECARD_TOKEN }} # Public repositories: # - Publish results to OpenSSF REST API for easy access by consumers # - Allows the repository to include the Scorecard badge. # - See https://github.com/ossf/scorecard-action#publishing-results. # For private repositories: # - `publish_results` will always be set to `false`, regardless # of the value entered here. publish_results: true # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 with: name: SARIF file path: results.sarif retention-days: 5 # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" uses: github/codeql-action/upload-sarif@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5 with: sarif_file: results.sarif ================================================ FILE: .github/workflows/website-test.yml ================================================ name: Website - tests on: workflow_dispatch: push: branches: - "main" pull_request: paths: - "website/**" - "js/**" - "tests_data/**" - ".github/workflows/website-*" permissions: contents: read jobs: build-and-test: strategy: matrix: node-version: ["20"] os: ["ubuntu-latest"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4 - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # pin@v4 with: node-version: ${{ matrix.node-version }} - name: Install js dependencies working-directory: js run: yarn install --frozen-lockfile - name: Build js working-directory: js run: yarn run build - name: Create magika link working-directory: js run: yarn link - name: Link to local magika working-directory: website run: yarn link magika - name: Install website dependencies working-directory: website run: yarn install --frozen-lockfile - name: Build website working-directory: website run: yarn build # TODO: add some actual testing ================================================ FILE: .gitignore ================================================ *.pyc __pycache__/ .ipynb_checkpoints venv/ tmp/ .env *.swp *.egg-info dist/* *.pickle .s.yml */models-data/* .vscode scratchpad/ */node_modules/* docs/dist js/dist ================================================ FILE: CITATION.cff ================================================ cff-version: 1.2.0 message: "If you use this software, please cite it as below." authors: - family-names: "Fratantonio" given-names: "Yanick" - family-names: "Invernizzi" given-names: "Luca" - family-names: "Farah" given-names: "Loua" - family-names: "Kurt" given-names: "Thomas" - family-names: "Zhang" given-names: "Marina" - family-names: "Albertini" given-names: "Ange" - family-names: "Galilee" given-names: "Francois" - family-names: "Metitieri" given-names: "Giancarlo" - family-names: "Cretin" given-names: "Julien" - family-names: "Petit-Bianco" given-names: "Alexandre" - family-names: "Tao" given-names: "David" - family-names: "Bursztein" given-names: "Elie" title: "Magika: AI-Powered Content-Type Detection" url: "https://arxiv.org/abs/2409.13768" ================================================ FILE: CONTRIBUTING.md ================================================ # How to Contribute We would love to accept your patches and contributions to this project! Check [open issues labeled as "help wanted"](https://github.com/google/magika/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) as a starting point. ## Before you begin ### Sign our Contributor License Agreement Contributions to this project must be accompanied by a [Contributor License Agreement](https://cla.developers.google.com/about) (CLA). You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project. If you or your current employer have already signed the Google CLA (even if it was for a different project), you probably don't need to do it again. Visit to see your current agreements or to sign a new one. ### Review our Community Guidelines This project follows [Google's Open Source Community Guidelines](https://opensource.google/conduct/). ## Contribution process ### Code Reviews All submissions, including submissions by project members, require review. We use [GitHub pull requests](https://docs.github.com/articles/about-pull-requests) for this purpose. ================================================ FILE: Dockerfile ================================================ # syntax=docker/dockerfile:1 ARG PYTHON_VERSION=3.11 FROM python:${PYTHON_VERSION}-slim as base WORKDIR /magika # This requires buildx # RUN --mount=type=cache,target=/root/.cache/pip \ # pip install magika RUN pip install magika ENTRYPOINT ["magika"] ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Magika [![image](https://img.shields.io/pypi/v/magika.svg)](https://pypi.python.org/pypi/magika) [![NPM Version](https://img.shields.io/npm/v/magika)](https://npmjs.com/package/magika) [![image](https://img.shields.io/pypi/l/magika.svg)](https://pypi.python.org/pypi/magika) [![image](https://img.shields.io/pypi/pyversions/magika.svg)](https://pypi.python.org/pypi/magika) [![Go Version](https://img.shields.io/github/v/tag/google/magika?filter=go%2F*&label=go&sort=semver)](https://pkg.go.dev/github.com/google/magika/go) [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8706/badge)](https://www.bestpractices.dev/en/projects/8706) ![CodeQL](https://github.com/google/magika/workflows/CodeQL/badge.svg) [![Actions status](https://github.com/google/magika/actions/workflows/python-build-and-release-package.yml/badge.svg)](https://github.com/google/magika/actions) [![PyPI Monthly Downloads](https://static.pepy.tech/badge/magika/month)](https://pepy.tech/projects/magika) [![PyPI Downloads](https://static.pepy.tech/badge/magika)](https://pepy.tech/projects/magika) Magika is a novel AI-powered file type detection tool that relies on the recent advance of deep learning to provide accurate detection. Under the hood, Magika employs a custom, highly optimized model that only weighs about a few MBs, and enables precise file identification within milliseconds, even when running on a single CPU. Magika has been trained and evaluated on a dataset of ~100M samples across 200+ content types (covering both binary and textual file formats), and it achieves an average ~99% accuracy on our test set. Here is an example of what Magika command line output looks like:

Magika is used at scale to help improve Google users' safety by routing Gmail, Drive, and Safe Browsing files to the proper security and content policy scanners, processing hundreds billions samples on a weekly basis. Magika has also been integrated with [VirusTotal](https://www.virustotal.com/) ([example](./assets/magika-vt.png)) and [abuse.ch](https://bazaar.abuse.ch/) ([example](./assets/magika-abusech.png)). For more context you can read our initial [announcement post on Google's OSS blog](https://opensource.googleblog.com/2024/02/magika-ai-powered-fast-and-efficient-file-type-identification.html), you can consult [Magika's website](https://securityresearch.google/magika/), and you can read more in our [research paper](https://securityresearch.google/magika/additional-resources/research-papers-and-citation/), published at the IEEE/ACM International Conference on Software Engineering (ICSE) 2025. You can try Magika without installing anything by using our [web demo](https://securityresearch.google/magika/demo/magika-demo/), which runs locally in your browser! # Highlights - Available as a command line tool written in Rust, a Python API, and additional bindings for Rust, JavaScript/TypeScript (with an experimental npm package, which powers our [web demo](https://securityresearch.google/magika/demo/magika-demo/)), and GoLang (WIP). - Trained and evaluated on a dataset of ~100M files across [200+ content types](./assets/models/standard_v3_3/README.md). - On our test set, Magika achieves ~99% average precision and recall, outperforming existing approaches -- especially on textual content types. - After the model is loaded (which is a one-off overhead), the inference time is about 5ms per file, even when run on a single CPU. - You can invoke Magika with even thousands of files at the same time. You can also use `-r` for recursively scanning a directory. - Near-constant inference time, independently from the file size; Magika only uses a limited subset of the file's content. - Magika uses a per-content-type threshold system that determines whether to "trust" the prediction for the model, or whether to return a generic label, such as "Generic text document" or "Unknown binary data". - The tolerance to errors can be controlled via different prediction modes, such as `high-confidence`, `medium-confidence`, and `best-guess`. - The client and the bindings are already open source, and more is coming soon! # Table of Contents 1. [Getting Started](#getting-started) 1. [Installation](#installation) 1. [Quick Start](#quick-start) 1. [Documentation](#documentation) 1. [Security Vulnerabilities](#security-vulnerabilities) 1. [License](#license) 1. [Disclaimer](#disclaimer) # Getting Started ## Installation ### Command Line Tool Magika ships a CLI written in Rust, and can be installed in several ways. Via `magika` python package: ```shell pipx install magika ``` Via brew (macOS / Linux) ```shell brew install magika ``` Via installer script: ```shell curl -LsSf https://securityresearch.google/magika/install.sh | sh ``` or: ```shell powershell -ExecutionPolicy Bypass -c "irm https://securityresearch.google/magika/install.ps1 | iex" ``` Via `magika-cli` Rust package: ```shell cargo install --locked magika-cli ``` ### Python package ```shell pip install magika ``` ### JavaScript package ```shell npm install magika ``` ## Quick Start Here you can find a number of quick examples just to get you started. To learn about Magika's inner workings, see the [Core Concepts](https://securityresearch.google/magika/core-concepts/) section of Magika's website. ### Command Line Tool Examples ```shell % cd tests_data/basic && magika -r * | head asm/code.asm: Assembly (code) batch/simple.bat: DOS batch file (code) c/code.c: C source (code) css/code.css: CSS source (code) csv/magika_test.csv: CSV document (code) dockerfile/Dockerfile: Dockerfile (code) docx/doc.docx: Microsoft Word 2007+ document (document) docx/magika_test.docx: Microsoft Word 2007+ document (document) eml/sample.eml: RFC 822 mail (text) empty/empty_file: Empty file (inode) ``` ```shell % magika ./tests_data/basic/python/code.py --json [ { "path": "./tests_data/basic/python/code.py", "result": { "status": "ok", "value": { "dl": { "description": "Python source", "extensions": [ "py", "pyi" ], "group": "code", "is_text": true, "label": "python", "mime_type": "text/x-python" }, "output": { "description": "Python source", "extensions": [ "py", "pyi" ], "group": "code", "is_text": true, "label": "python", "mime_type": "text/x-python" }, "score": 0.996999979019165 } } } ] ``` ```shell % cat tests_data/basic/ini/doc.ini | magika - -: INI configuration file (text) ``` ```shell % magika --help Determines file content types using AI Usage: magika [OPTIONS] [PATH]... Arguments: [PATH]... List of paths to the files to analyze. Use a dash (-) to read from standard input (can only be used once). Options: -r, --recursive Identifies files within directories instead of identifying the directory itself --no-dereference Identifies symbolic links as is instead of identifying their content by following them --colors Prints with colors regardless of terminal support --no-colors Prints without colors regardless of terminal support -s, --output-score Prints the prediction score in addition to the content type -i, --mime-type Prints the MIME type instead of the content type description -l, --label Prints a simple label instead of the content type description --json Prints in JSON format --jsonl Prints in JSONL format --format Prints using a custom format (use --help for details). The following placeholders are supported: %p The file path %l The unique label identifying the content type %d The description of the content type %g The group of the content type %m The MIME type of the content type %e Possible file extensions for the content type %s The score of the content type for the file %S The score of the content type for the file in percent %b The model output if overruled (empty otherwise) %% A literal % -h, --help Print help (see a summary with '-h') -V, --version Print version ``` For more examples and documentation about the CLI, see https://crates.io/crates/magika-cli. ### Python Examples ```python >>> from magika import Magika >>> m = Magika() >>> res = m.identify_bytes(b'function log(msg) {console.log(msg);}') >>> print(res.output.label) javascript ``` ```python >>> from magika import Magika >>> m = Magika() >>> res = m.identify_path('./tests_data/basic/ini/doc.ini') >>> print(res.output.label) ini ``` ```python >>> from magika import Magika >>> m = Magika() >>> with open('./tests_data/basic/ini/doc.ini', 'rb') as f: >>> res = m.identify_stream(f) >>> print(res.output.label) ini ``` For more examples and documentation about the Python module, see the [Python `Magika` module](https://securityresearch.google/magika/cli-and-bindings/python/) section. # Documentation Please consult [Magika's website](https://securityresearch.google/magika) for detailed documentation about: - Core Concepts - How Magika works - Models & content types - Prediction modes - Understanding the output - CLI & Bindings (Python module, JavaScript module, ...) - Contributing - FAQ - ... # Security Vulnerabilities Please contact us directly at magika-dev@google.com. # License Apache 2.0; see [`LICENSE`](LICENSE) for details. # Disclaimer This project is not an official Google project. It is not supported by Google and Google specifically disclaims all warranties as to its quality, merchantability, or fitness for a particular purpose. ================================================ FILE: assets/content_types_kb.min.json ================================================ {"3gp":{"mime_type":"video/3gpp","group":"video","description":"3GPP multimedia file","extensions":["3gp"],"is_text":false},"3ds":{"mime_type":"application/octet-stream","group":"unknown","description":"Nintendo 3DS roms","extensions":["3ds"],"is_text":false},"3dsx":{"mime_type":"application/octet-stream","group":"unknown","description":"Nintendo 3DS homebrew","extensions":["3dsx"],"is_text":false},"3dsm":{"mime_type":"application/x-3ds","group":"image","description":"3D studio Max","extensions":["3ds"],"is_text":false},"3mf":{"mime_type":"application/vnd.ms-package.3dmanufacturing-3dmodel+xml","group":"image","description":"3D Manufacturing Format","extensions":["3mf"],"is_text":false},"abnf":{"mime_type":"text/plain","group":null,"description":"augmented Backus\u2013Naur form","extensions":["abnf"],"is_text":false},"ace":{"mime_type":"application/x-ace-compressed","group":"archive","description":"ACE archive","extensions":["ace"],"is_text":false},"ada":{"mime_type":"text/x-ada","group":"code","description":"ADA source","extensions":[],"is_text":false},"aff":{"mime_type":"text/plain","group":null,"description":"Hunspell Affix","extensions":["aff"],"is_text":true},"ai":{"mime_type":"application/pdf","group":"document","description":"Adobe Illustrator Artwork","extensions":["ai"],"is_text":false},"aidl":{"mime_type":"text/plain","group":null,"description":"Android Interface Definition Language","extensions":["aidl"],"is_text":true},"algol68":{"mime_type":null,"group":null,"description":null,"extensions":["a68"],"is_text":false},"ani":{"mime_type":"application/x-navi-animation","group":null,"description":"Animated cursor","extensions":["ani"],"is_text":false},"apk":{"mime_type":"application/vnd.android.package-archive","group":"executable","description":"Android package","extensions":["apk"],"is_text":false},"applebplist":{"mime_type":"application/x-bplist","group":"application","description":"Apple binary property list","extensions":["bplist","plist"],"is_text":false},"appledouble":{"mime_type":"multipart/appledouble","group":"unknown","description":"AppleDouble","extensions":[],"is_text":false},"appleplist":{"mime_type":"application/x-plist","group":"application","description":"Apple property list","extensions":["plist"],"is_text":true},"applesingle":{"mime_type":"application/applefile","group":"unknown","description":"AppleSingle","extensions":[],"is_text":false},"ar":{"mime_type":"application/x-archive","group":"archive","description":"AR Archive","extensions":[],"is_text":false},"arc":{"mime_type":"application/x-arc","group":"archive","description":"Arc","extensions":["arc"],"is_text":false},"arj":{"mime_type":"application/arj","group":"archive","description":"Arj","extensions":[],"is_text":false},"arrow":{"mime_type":"vnd.apache.arrow.file","group":null,"description":null,"extensions":[],"is_text":false},"asc":{"mime_type":"application/pgp-signature","group":"text","description":"PGP","extensions":["asc"],"is_text":true},"asd":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"au":{"mime_type":"audio/basic","group":"audio","description":"NeXT/Sun AU","extensions":["au"],"is_text":false},"asf":{"mime_type":"video/x-ms-wma","group":"application","description":"Microsoft Advanced Systems Format","extensions":["asf"],"is_text":false},"asm":{"mime_type":"text/x-asm","group":"code","description":"Assembly","extensions":["s","S","asm"],"is_text":true},"asp":{"mime_type":"text/html","group":"code","description":"ASP source","extensions":["aspx","asp"],"is_text":true},"autohotkey":{"mime_type":"text/plain","group":"code","description":"AutoHotKey script","extensions":[],"is_text":true},"autoit":{"mime_type":"text/plain","group":"code","description":"AutoIt script","extensions":["au3"],"is_text":true},"avi":{"mime_type":"video/x-msvideo","group":"video","description":"Audio Video Interleave","extensions":["avi"],"is_text":false},"avif":{"mime_type":"image/avif","group":"video","description":"AV1 Image File Format","extensions":["avif","avifs"],"is_text":false},"avro":{"mime_type":"application/x-avro-binary","group":null,"description":"Apache Avro binary","extensions":["avro"],"is_text":false},"awk":{"mime_type":"text/plain","group":"code","description":"Awk","extensions":["awk"],"is_text":true},"ax":{"mime_type":"application/x-dosexec","group":"executable","description":"Directshow filter","extensions":["ax"],"is_text":false},"batch":{"mime_type":"text/x-msdos-batch","group":"code","description":"DOS batch file","extensions":["bat"],"is_text":true},"bazel":{"mime_type":"text/plain","group":"code","description":"Bazel build file","extensions":["bzl"],"is_text":true},"bcad":{"mime_type":"application/octet-stream","group":"document","description":"bCAD Drawing","extensions":["bdf"],"is_text":false},"bib":{"mime_type":"text/x-bibtex","group":"text","description":"BibTeX","extensions":["bib"],"is_text":true},"bmp":{"mime_type":"image/bmp","group":"image","description":"BMP image data","extensions":["bmp"],"is_text":false},"bpg":{"mime_type":"image/bpg","group":"image","description":"BPG","extensions":["bpg"],"is_text":false},"bpl":{"mime_type":null,"group":"unknown","description":null,"extensions":["bpl"],"is_text":false},"brainfuck":{"mime_type":"text/x-brainfuck","group":"code","description":"Brainfuck source","extensions":["b","bf"],"is_text":true},"brf":{"mime_type":"text/plain","group":"text","description":"Braille Ready Format","extensions":["brf","bfm"],"is_text":false},"bzip":{"mime_type":"application/x-bzip2","group":"archive","description":"bzip2 compressed data","extensions":["bz2","tbz2","tar.bz2"],"is_text":false},"bzip3":{"mime_type":"application/x-bzip3","group":"archive","description":"bzip3 compressed data","extensions":["bz3"],"is_text":false},"c":{"mime_type":"text/x-c","group":"code","description":"C source","extensions":["c"],"is_text":true},"cab":{"mime_type":"application/vnd.ms-cab-compressed","group":"archive","description":"Microsoft Cabinet archive data","extensions":["cab"],"is_text":false},"cad":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"cat":{"mime_type":"application/octet-stream","group":"application","description":"Windows Catalog file","extensions":["cat"],"is_text":false},"cdf":{"mime_type":null,"group":"archive","description":null,"extensions":[],"is_text":false},"chm":{"mime_type":"application/chm","group":"application","description":"MS Windows HtmlHelp Data","extensions":["chm"],"is_text":false},"clojure":{"mime_type":"text/x-clojure","group":"code","description":"Clojure","extensions":["clj","cljs","cljc","cljr"],"is_text":true},"cmake":{"mime_type":"text/x-cmake","group":"code","description":"CMake build file","extensions":["cmake"],"is_text":true},"cobol":{"mime_type":"text/x-cobol","group":"code","description":"Cobol","extensions":["cbl","cob","cpy","CBL","COB","CPY"],"is_text":true},"coff":{"mime_type":"application/x-coff","group":"executable","description":"Intel 80386 COFF","extensions":["obj","o"],"is_text":false},"coffeescript":{"mime_type":"text/coffeescript","group":"code","description":"CoffeeScript","extensions":["coffee"],"is_text":true},"com":{"mime_type":"application/x-dosexec","group":"executable","description":null,"extensions":[],"is_text":false},"cpl":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["cpl"],"is_text":false},"cpp":{"mime_type":"text/x-c","group":"code","description":"C++ source","extensions":["cc","cpp","cxx","c++","cppm","ixx"],"is_text":true},"crt":{"mime_type":"application/x-x509-ca-cert","group":"text","description":"Certificates (binary format)","extensions":["der","cer","crt"],"is_text":false},"crx":{"mime_type":"application/x-chrome-extension","group":"executable","description":"Google Chrome extension","extensions":["crx"],"is_text":false},"cs":{"mime_type":"text/plain","group":"code","description":"C# source","extensions":["cs","csx"],"is_text":true},"csproj":{"mime_type":"text/plain","group":"code","description":".NET project config","extensions":["csproj"],"is_text":true},"css":{"mime_type":"text/css","group":"code","description":"CSS source","extensions":["css"],"is_text":true},"csv":{"mime_type":"text/csv","group":"code","description":"CSV document","extensions":["csv"],"is_text":true},"ctl":{"mime_type":"application/octet-stream","group":null,"description":null,"extensions":[],"is_text":false},"dart":{"mime_type":"text/plain","group":"code","description":"Dart source","extensions":["dart"],"is_text":true},"deb":{"mime_type":"application/vnd.debian.binary-package","group":"archive","description":"Debian binary package","extensions":["deb"],"is_text":false},"dex":{"mime_type":"application/x-android-dex","group":"executable","description":"Dalvik dex file","extensions":["dex"],"is_text":false},"dey":{"mime_type":"application/x-android-dey","group":"executable","description":"Dalvik dex file","extensions":[],"is_text":false},"dicom":{"mime_type":"application/dicom","group":"image","description":"DICOM","extensions":["dcm"],"is_text":false},"diff":{"mime_type":"text/plain","group":"text","description":"Diff file","extensions":["diff","patch"],"is_text":true},"directory":{"mime_type":"inode/directory","group":"inode","description":"A directory","extensions":[],"is_text":false},"django":{"mime_type":"text/x-django","group":"code","description":"Django source","extensions":[],"is_text":false},"dll":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["dll"],"is_text":false},"dm":{"mime_type":"text/plain","group":"code","description":"Dream Maker","extensions":["dm"],"is_text":true},"dmigd":{"mime_type":"text/plain","group":"text","description":"Dominion Mods","extensions":["dm"],"is_text":true},"dmg":{"mime_type":"application/x-apple-diskimage","group":"archive","description":"Apple disk image","extensions":["dmg"],"is_text":false},"dmscript":{"mime_type":"text/plain","group":"code","description":"Digital Micrograph Script","extensions":["s"],"is_text":true},"doc":{"mime_type":"application/msword","group":"document","description":"Microsoft Word CDF document","extensions":["doc"],"is_text":false},"dockerfile":{"mime_type":"text/x-dockerfile","group":"code","description":"Dockerfile","extensions":[],"is_text":true},"docx":{"mime_type":"application/vnd.openxmlformats-officedocument.wordprocessingml.document","group":"document","description":"Microsoft Word 2007+ document","extensions":["docx","docm"],"is_text":false},"dosmbr":{"mime_type":"application/octet-stream","group":null,"description":"Master boot record","extensions":[],"is_text":false},"dotx":{"mime_type":"application/vnd.openxmlformats-officedocument.wordprocessingml.template","group":"document","description":"Office Word 2007 template","extensions":["dotx"],"is_text":false},"dsstore":{"mime_type":"application/octet-stream","group":"unknown","description":"Application Desktop Services Store","extensions":[],"is_text":false},"dwg":{"mime_type":"image/x-dwg","group":"image","description":"Autocad Drawing","extensions":["dwg"],"is_text":false},"dxf":{"mime_type":"image/vnd.dxf","group":"image","description":"Audocad Drawing Exchange Format","extensions":["dxf"],"is_text":true},"dylib":{"mime_type":"application/x-mach-o","group":"executable","description":"Mach-O executable","extensions":["dylib"],"is_text":false},"ebml":{"mime_type":"application/octet-stream","group":"unknown","description":"Extensible Binary Meta Language","extensions":[],"is_text":false},"elf":{"mime_type":"application/x-executable-elf","group":"executable","description":"ELF executable","extensions":["elf"],"is_text":false},"elixir":{"mime_type":"text/plain","group":"code","description":"Elixir script","extensions":["exs"],"is_text":true},"emf":{"mime_type":"application/octet-stream","group":"application","description":"Windows Enhanced Metafile image data","extensions":["emf"],"is_text":false},"eml":{"mime_type":"message/rfc822","group":"text","description":"RFC 822 mail","extensions":["eml"],"is_text":true},"empty":{"mime_type":"inode/x-empty","group":"inode","description":"Empty file","extensions":[],"is_text":false},"epub":{"mime_type":"application/epub+zip","group":"document","description":"EPUB document","extensions":["epub"],"is_text":false},"erb":{"mime_type":"text/x-ruby","group":"code","description":"Embedded Ruby source","extensions":["erb"],"is_text":true},"erlang":{"mime_type":"text/x-erlang","group":"code","description":"Erlang source","extensions":["erl","hrl"],"is_text":true},"ese":{"mime_type":"application/x-ms-ese","group":null,"description":"ESE Db","extensions":["dat"],"is_text":false},"exe":{"mime_type":"application/x-dosexec","group":"executable","description":"PE executable","extensions":["exe"],"is_text":false},"exp":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"flac":{"mime_type":"audio/flac","group":"audio","description":"FLAC audio bitstream data","extensions":["flac"],"is_text":false},"flutter":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"flv":{"mime_type":"video/x-flv","group":"video","description":"Flash Video","extensions":["flv"],"is_text":false},"fortran":{"mime_type":"text/x-fortran","group":"document","description":"Fortran","extensions":["f90","f95","f03","F90"],"is_text":true},"fpx":{"mime_type":null,"group":"image","description":"Flashpix","extensions":["fpx"],"is_text":false},"gemfile":{"mime_type":"text/plain","group":"code","description":"Gemfile file","extensions":[],"is_text":true},"gemspec":{"mime_type":"text/plain","group":"code","description":"Gemspec file","extensions":["gemspec"],"is_text":true},"gif":{"mime_type":"image/gif","group":"image","description":"GIF image data","extensions":["gif"],"is_text":false},"gitattributes":{"mime_type":"text/plain","group":"code","description":"Gitattributes file","extensions":[],"is_text":true},"gitmodules":{"mime_type":"text/plain","group":"code","description":"Gitmodules file","extensions":[],"is_text":true},"gleam":{"mime_type":null,"group":"code","description":"Gleam source","extensions":["gleam"],"is_text":true},"go":{"mime_type":"text/x-golang","group":"code","description":"Golang source","extensions":["go"],"is_text":true},"gpx":{"mime_type":null,"group":null,"description":"XML document","extensions":["gpx"],"is_text":false},"gradle":{"mime_type":"text/x-groovy","group":"code","description":"Gradle source","extensions":["gradle"],"is_text":true},"groovy":{"mime_type":"text/x-groovy","group":"code","description":"Groovy source","extensions":["groovy"],"is_text":true},"gzip":{"mime_type":"application/gzip","group":"archive","description":"gzip compressed data","extensions":["gz","gzip","tgz","tar.gz"],"is_text":false},"h":{"mime_type":"text/x-c","group":"code","description":"C header source","extensions":["h"],"is_text":true},"h5":{"mime_type":"application/x-hdf5","group":"archive","description":"Hierarchical Data Format v5","extensions":["h5","hdf5"],"is_text":false},"handlebars":{"mime_type":"text/x-handlebars-template","group":"code","description":"Handlebars source","extensions":["hbs","handlebars"],"is_text":true},"haskell":{"mime_type":"text/plain","group":"code","description":"Haskell source","extensions":["hs","lhs"],"is_text":true},"hcl":{"mime_type":"text/x-hcl","group":"code","description":"HashiCorp configuration language","extensions":["hcl"],"is_text":true},"heif":{"mime_type":"image/heic","group":"image","description":"High Efficiency Image File","extensions":["heif","heifs","heic","heics"],"is_text":false},"hfs":{"mime_type":"application/x-hfs","group":null,"description":null,"extensions":["hfs"],"is_text":false},"hlp":{"mime_type":"application/winhlp","group":"application","description":"MS Windows help","extensions":["hlp"],"is_text":false},"hpp":{"mime_type":"text/x-h","group":"code","description":null,"extensions":["hh","hpp","hxx","h++"],"is_text":true},"hta":{"mime_type":"application/hta","group":"code","description":"HTML Application","extensions":["hta"],"is_text":false},"htaccess":{"mime_type":"text/x-apache-conf","group":"code","description":"Apache access configuration","extensions":[],"is_text":true},"html":{"mime_type":"text/html","group":"code","description":"HTML document","extensions":["html","htm","xhtml","xht"],"is_text":true},"hve":{"mime_type":null,"group":"unknown","description":null,"extensions":[],"is_text":false},"hwp":{"mime_type":"application/x-hwp","group":"document","description":"Hangul Word Processor","extensions":["hwp"],"is_text":false},"icc":{"mime_type":"application/vnd.iccprofile","group":null,"description":"ICC profile","extensions":["icc"],"is_text":false},"icns":{"mime_type":"image/x-icns","group":"image","description":"Mac OS X icon","extensions":["icns"],"is_text":false},"ico":{"mime_type":"image/vnd.microsoft.icon","group":"image","description":"MS Windows icon resource","extensions":["ico"],"is_text":false},"ics":{"mime_type":"text/calendar","group":"application","description":"Internet Calendaring and Scheduling","extensions":["ics"],"is_text":true},"ignorefile":{"mime_type":"text/plain","group":"code","description":"Ignorefile","extensions":[],"is_text":true},"img":{"mime_type":null,"group":null,"description":null,"extensions":["img"],"is_text":false},"ini":{"mime_type":"text/plain","group":"text","description":"INI configuration file","extensions":["ini"],"is_text":true},"internetshortcut":{"mime_type":"application/x-mswinurl","group":"application","description":"MS Windows Internet shortcut","extensions":["url"],"is_text":true},"iosapp":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"ipynb":{"mime_type":"application/json","group":"code","description":"Jupyter notebook","extensions":["ipynb"],"is_text":true},"iso":{"mime_type":"application/x-iso9660-image","group":"archive","description":"ISO 9660 CD-ROM filesystem data","extensions":["iso"],"is_text":false},"jar":{"mime_type":"application/java-archive","group":"archive","description":"Java archive data (JAR)","extensions":["jar","klib"],"is_text":false},"java":{"mime_type":"text/x-java","group":"code","description":"Java source","extensions":["java"],"is_text":true},"javabytecode":{"mime_type":"application/x-java-applet","group":"executable","description":"Java compiled bytecode","extensions":["class"],"is_text":false},"javascript":{"mime_type":"application/javascript","group":"code","description":"JavaScript source","extensions":["js","mjs","cjs"],"is_text":true},"jinja":{"mime_type":"text/x-jinja2-template","group":"code","description":"Jinja template","extensions":["jinja","jinja2","j2"],"is_text":true},"jng":{"mime_type":"image/jng","group":"image","description":"JPEG network graphics","extensions":["jng"],"is_text":false},"jnlp":{"mime_type":"application/x-java-jnlp-file","group":"code","description":"Java Network Launch Protocol","extensions":["jnlp"],"is_text":true},"jp2":{"mime_type":"image/jpeg2000","group":"image","description":"jpeg2000","extensions":["jp2"],"is_text":false},"jpeg":{"mime_type":"image/jpeg","group":"image","description":"JPEG image data","extensions":["jpg","jpeg"],"is_text":false},"json":{"mime_type":"application/json","group":"code","description":"JSON document","extensions":["json"],"is_text":true},"jsonc":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"jsonl":{"mime_type":"application/json","group":"code","description":"JSONL document","extensions":["jsonl","jsonld"],"is_text":true},"jsx":{"mime_type":"application/javascript","group":"code","description":"JSX source","extensions":["jsx","mjsx","cjsx"],"is_text":true},"julia":{"mime_type":"text/x-julia","group":"code","description":"Julia source","extensions":["jl"],"is_text":true},"jxl":{"mime_type":"image/jxl","group":"image","description":"JPEG XL","extensions":["jxl"],"is_text":false},"ko":{"mime_type":"application/x-executable-elf","group":"executable","description":"ELF executable, kernel object","extensions":["ko"],"is_text":false},"kotlin":{"mime_type":"text/plain","group":"code","description":"Kotlin source","extensions":["kt","kts"],"is_text":true},"ks":{"mime_type":null,"group":null,"description":"Tyrano","extensions":["ks"],"is_text":true},"latex":{"mime_type":"text/x-tex","group":"text","description":"LaTeX document","extensions":["tex","sty"],"is_text":true},"latexaux":{"mime_type":null,"group":null,"description":null,"extensions":["aux"],"is_text":false},"less":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"lha":{"mime_type":"application/x-lha","group":"archive","description":"LHarc archive","extensions":["lha","lzh"],"is_text":false},"license":{"mime_type":"text/plain","group":"text","description":"License file","extensions":[],"is_text":true},"lisp":{"mime_type":"text/x-lisp","group":"code","description":"Lisp source","extensions":["lisp","lsp","l","cl"],"is_text":true},"litcs":{"mime_type":null,"group":null,"description":"Literate CS","extensions":["litcoffee"],"is_text":false},"lnk":{"mime_type":"application/x-ms-shortcut","group":"application","description":"MS Windows shortcut","extensions":["lnk"],"is_text":false},"lock":{"mime_type":"text/plain","group":"application","description":"Lock file","extensions":["lock"],"is_text":true},"lrz":{"mime_type":"application/x-lrzip","group":null,"description":"LRZip","extensions":["lrz"],"is_text":false},"lua":{"mime_type":"text/plain","group":"code","description":"Lua","extensions":["lua"],"is_text":true},"lz":{"mime_type":"application/x-lzip","group":"archive","description":"LZip","extensions":["lz"],"is_text":false},"lz4":{"mime_type":"application/x-lz4","group":"archive","description":"LZ4","extensions":["lz4"],"is_text":false},"lzx":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"m3u":{"mime_type":"text/plain","group":"application","description":"M3U playlist","extensions":["m3u8","m3u"],"is_text":true},"m4":{"mime_type":"text/plain","group":"code","description":"GNU Macro","extensions":["m4"],"is_text":true},"macho":{"mime_type":"application/x-mach-o","group":"executable","description":"Mach-O executable","extensions":[],"is_text":false},"maff":{"mime_type":"application/x-maff","group":null,"description":null,"extensions":["maff"],"is_text":false},"makefile":{"mime_type":"text/x-makefile","group":"code","description":"Makefile source","extensions":[],"is_text":true},"markdown":{"mime_type":"text/markdown","group":"text","description":"Markdown document","extensions":["md","markdown"],"is_text":true},"matlab":{"mime_type":"text/x-matlab","group":"code","description":"Matlab Source","extensions":["m","matlab"],"is_text":true},"mht":{"mime_type":"application/x-mimearchive","group":"code","description":"MHTML document","extensions":["mht"],"is_text":true},"midi":{"mime_type":"audio/midi","group":"audio","description":"Midi","extensions":["mid"],"is_text":false},"mkv":{"mime_type":"video/x-matroska","group":"video","description":"Matroska","extensions":["mkv"],"is_text":false},"mp2":{"mime_type":null,"group":null,"description":"MP2 stream","extensions":["mp2"],"is_text":false},"mp3":{"mime_type":"audio/mpeg","group":"audio","description":"MP3 media file","extensions":["mp3"],"is_text":false},"mp4":{"mime_type":"video/mp4","group":"video","description":"MP4 media file","extensions":["mp4"],"is_text":false},"mpegts":{"mime_type":"video/MP2T","group":"video","description":"MPEG Transport stream","extensions":["ts","tsv","tsa","m2t"],"is_text":false},"mscompress":{"mime_type":"application/x-ms-compress-szdd","group":"archive","description":"MS Compress archive data","extensions":[],"is_text":false},"msi":{"mime_type":"application/x-msi","group":"archive","description":"Microsoft Installer file","extensions":["msi"],"is_text":false},"msix":{"mime_type":"application/msix","group":"application","description":"Windows app package","extensions":["msix"],"is_text":false},"mst":{"mime_type":null,"group":null,"description":null,"extensions":["mst"],"is_text":false},"mui":{"mime_type":"application/x-dosexec","group":"application","description":"PE Windows executable","extensions":["mui"],"is_text":false},"mum":{"mime_type":"text/xml","group":"application","description":"Windows Update Package file","extensions":["mum"],"is_text":true},"mun":{"mime_type":null,"group":null,"description":null,"extensions":["mun"],"is_text":false},"nim":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"npy":{"mime_type":"application/octet-stream","group":"archive","description":"Numpy Array","extensions":["npy"],"is_text":false},"npz":{"mime_type":"application/octet-stream","group":"archive","description":"Numpy Arrays Archive","extensions":["npz"],"is_text":false},"null":{"mime_type":null,"group":null,"description":null,"extensions":["null"],"is_text":false},"nupkg":{"mime_type":"application/octet-stream","group":null,"description":"NuGet Package","extensions":["nupkg"],"is_text":false},"object":{"mime_type":null,"group":null,"description":null,"extensions":["o"],"is_text":false},"objectivec":{"mime_type":"text/x-objcsrc","group":"code","description":"ObjectiveC source","extensions":["m","mm"],"is_text":true},"ocaml":{"mime_type":"text-ocaml","group":"code","description":"OCaml","extensions":["ml","mli"],"is_text":true},"ocx":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["ocx"],"is_text":false},"odex":{"mime_type":"application/x-executable-elf","group":"executable","description":"ODEX ELF executable","extensions":["odex"],"is_text":false},"odin":{"mime_type":null,"group":"code","description":"Odin","extensions":["odin"],"is_text":true},"odp":{"mime_type":"application/vnd.oasis.opendocument.presentation","group":"document","description":"OpenDocument Presentation","extensions":["odp"],"is_text":false},"ods":{"mime_type":"application/vnd.oasis.opendocument.spreadsheet","group":"document","description":"OpenDocument Spreadsheet","extensions":["ods"],"is_text":false},"odt":{"mime_type":"application/vnd.oasis.opendocument.text","group":"document","description":"OpenDocument Text","extensions":["odt"],"is_text":false},"ogg":{"mime_type":"audio/ogg","group":"audio","description":"Ogg data","extensions":["ogg"],"is_text":false},"ole":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"one":{"mime_type":"application/msonenote","group":"document","description":"One Note","extensions":["one"],"is_text":false},"onnx":{"mime_type":"application/octet-stream","group":"archive","description":"Open Neural Network Exchange","extensions":["onnx"],"is_text":false},"ooxml":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"otf":{"mime_type":"font/otf","group":"font","description":"OpenType font","extensions":["otf"],"is_text":false},"outlook":{"mime_type":"application/vnd.ms-outlook","group":"application","description":"MS Outlook Message","extensions":[],"is_text":false},"palmos":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"parquet":{"mime_type":"application/vnd.apache.parquet","group":"unknown","description":"Apache Parquet","extensions":["pqt","parquet"],"is_text":false},"pascal":{"mime_type":"text/x-pascal","group":"code","description":"Pascal source","extensions":["pas","pp"],"is_text":true},"pbm":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"pcap":{"mime_type":"application/vnd.tcpdump.pcap","group":"application","description":"pcap capture file","extensions":["pcap","pcapng"],"is_text":false},"pdb":{"mime_type":"application/octet-stream","group":"application","description":"Windows Program Database","extensions":["pdb"],"is_text":false},"pdf":{"mime_type":"application/pdf","group":"document","description":"PDF document","extensions":["pdf"],"is_text":false},"pebin":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["exe","dll"],"is_text":false},"pem":{"mime_type":"application/x-pem-file","group":"application","description":"PEM certificate","extensions":["pem","pub","gpg"],"is_text":true},"perl":{"mime_type":"text/x-perl","group":"code","description":"Perl source","extensions":["pl"],"is_text":true},"pgp":{"mime_type":"application/pgp-keys","group":null,"description":"PGP","extensions":["gpg","pgp"],"is_text":false},"php":{"mime_type":"text/x-php","group":"code","description":"PHP source","extensions":["php"],"is_text":true},"pickle":{"mime_type":"application/octet-stream","group":"application","description":"Python pickle","extensions":["pickle","pkl"],"is_text":false},"png":{"mime_type":"image/png","group":"image","description":"PNG image","extensions":["png"],"is_text":false},"po":{"mime_type":"text/gettext-translation","group":"application","description":"Portable Object (PO) for i18n","extensions":["po"],"is_text":true},"postscript":{"mime_type":"application/postscript","group":"document","description":"PostScript document","extensions":["ps"],"is_text":false},"powershell":{"mime_type":"application/x-powershell","group":"code","description":"Powershell source","extensions":["ps1"],"is_text":true},"ppt":{"mime_type":"application/vnd.ms-powerpoint","group":"document","description":"Microsoft PowerPoint CDF document","extensions":["ppt"],"is_text":false},"pptx":{"mime_type":"application/vnd.openxmlformats-officedocument.presentationml.presentation","group":"document","description":"Microsoft PowerPoint 2007+ document","extensions":["pptx","pptm"],"is_text":false},"printfox":{"mime_type":null,"group":null,"description":"c64","extensions":[],"is_text":false},"prolog":{"mime_type":"text/x-prolog","group":"code","description":"Prolog source","extensions":["pl","pro","P"],"is_text":true},"proteindb":{"mime_type":"application/octet-stream","group":"application","description":"Protein DB","extensions":["pdb"],"is_text":true},"proto":{"mime_type":"text/x-proto","group":"code","description":"Protocol buffer definition","extensions":["proto"],"is_text":true},"protobuf":{"mime_type":"application/protobuf","group":"unknown","description":"Protocol buffers","extensions":["protobuf","pb"],"is_text":false},"psd":{"mime_type":"image/vnd.adobe.photoshop","group":"image","description":"Adobe Photoshop","extensions":["psd"],"is_text":false},"pytorch":{"mime_type":"application/octet-stream","group":"application","description":"Pytorch storage file","extensions":["pt","pth"],"is_text":false},"pub":{"mime_type":"application/x-mspublisher","group":null,"description":null,"extensions":["pub"],"is_text":false},"python":{"mime_type":"text/x-python","group":"code","description":"Python source","extensions":["py","pyi"],"is_text":true},"pythonbytecode":{"mime_type":"application/x-bytecode.python","group":"executable","description":"Python compiled bytecode","extensions":["pyc","pyo"],"is_text":false},"pythonpar":{"mime_type":null,"group":null,"description":null,"extensions":["par"],"is_text":false},"qoi":{"mime_type":"image/x-qoi","group":"image","description":"Quite Ok Image","extensions":["qoi"],"is_text":false},"qt":{"mime_type":"video/quicktime","group":"video","description":"QuickTime","extensions":["mov"],"is_text":false},"r":{"mime_type":"text/x-R","group":"code","description":"R (language)","extensions":["R"],"is_text":true},"randomascii":{"mime_type":"text/plain","group":"text","description":"Random ASCII characters","extensions":[],"is_text":true},"randombytes":{"mime_type":"application/octet-stream","group":"unknown","description":"Random bytes","extensions":[],"is_text":false},"randomtxt":{"mime_type":"text/plain","group":"text","description":"Random text","extensions":[],"is_text":true},"rar":{"mime_type":"application/x-rar","group":"archive","description":"RAR archive data","extensions":["rar"],"is_text":false},"rdf":{"mime_type":"application/rdf+xml","group":"text","description":"Resource Description Framework document (RDF)","extensions":["rdf"],"is_text":true},"rdp":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"riff":{"mime_type":"application/x-riff","group":null,"description":null,"extensions":[],"is_text":false},"rlib":{"mime_type":"application/x-archive","group":"archive","description":"rust library","extensions":["rlib"],"is_text":false},"rll":{"mime_type":null,"group":"executable","description":"Resource Library","extensions":["rll"],"is_text":false},"rpm":{"mime_type":"application/x-rpm","group":"archive","description":"RedHat Package Manager archive (RPM)","extensions":["rpm"],"is_text":false},"rst":{"mime_type":"text/x-rst","group":"text","description":"ReStructuredText document","extensions":["rst"],"is_text":true},"rtf":{"mime_type":"text/rtf","group":"text","description":"Rich Text Format document","extensions":["rtf"],"is_text":true},"ruby":{"mime_type":"application/x-ruby","group":"code","description":"Ruby source","extensions":["rb"],"is_text":true},"rust":{"mime_type":"application/x-rust","group":"code","description":"Rust source","extensions":["rs"],"is_text":true},"rzip":{"mime_type":null,"group":null,"description":"Rzip","extensions":["rz"],"is_text":false},"scala":{"mime_type":"application/x-scala","group":"code","description":"Scala source","extensions":["scala"],"is_text":true},"scheme":{"mime_type":"text/x-scheme","group":"code","description":null,"extensions":["scm","ss"],"is_text":false},"scr":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["scr"],"is_text":false},"scriptwsf":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"scss":{"mime_type":"text/x-scss","group":"code","description":"SCSS source","extensions":["scss"],"is_text":true},"sevenzip":{"mime_type":"application/x-7z-compressed","group":"archive","description":"7-zip archive data","extensions":["7z"],"is_text":false},"sgml":{"mime_type":"application/sgml","group":"text","description":"sgml","extensions":["sgml"],"is_text":true},"sh3d":{"mime_type":null,"group":null,"description":null,"extensions":["sh3d"],"is_text":false},"shell":{"mime_type":"text/x-shellscript","group":"code","description":"Shell script","extensions":["sh"],"is_text":true},"smali":{"mime_type":"application/x-smali","group":"code","description":"Smali source","extensions":["smali"],"is_text":true},"snap":{"mime_type":"application/octet-stream","group":"archive","description":"Snap archive","extensions":["snap"],"is_text":false},"so":{"mime_type":"application/x-executable-elf","group":"executable","description":"ELF executable, shared library","extensions":["so"],"is_text":false},"solidity":{"mime_type":null,"group":"code","description":"Solidity source","extensions":["sol"],"is_text":true},"sql":{"mime_type":"application/x-sql","group":"code","description":"SQL source","extensions":["sql"],"is_text":true},"sqlite":{"mime_type":null,"group":"application","description":"SQLITE database","extensions":["sqlite","sqlite3"],"is_text":false},"squashfs":{"mime_type":"application/octet-stream","group":"archive","description":"Squash filesystem","extensions":[],"is_text":false},"srt":{"mime_type":"text/srt","group":"application","description":"SubRip Text Format","extensions":["srt"],"is_text":true},"stlbinary":{"mime_type":"application/sla","group":"image","description":"Stereolithography CAD (binary)","extensions":["stl"],"is_text":false},"stltext":{"mime_type":"application/sla","group":"image","description":"Stereolithography CAD (text)","extensions":["stl"],"is_text":true},"sum":{"mime_type":null,"group":"application","description":"Checksum file","extensions":["sum"],"is_text":true},"svd":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"svg":{"mime_type":"image/svg+xml","group":"image","description":"SVG Scalable Vector Graphics image data","extensions":["svg"],"is_text":true},"swf":{"mime_type":"application/x-shockwave-flash","group":"executable","description":"Small Web File","extensions":["swf"],"is_text":false},"swift":{"mime_type":"text/x-swift","group":"code","description":"Swift","extensions":["swift"],"is_text":true},"symlink":{"mime_type":"inode/symlink","group":"inode","description":"Symbolic link","extensions":[],"is_text":false},"symlinktext":{"mime_type":"text/plain","group":"application","description":"Symbolic link (textual representation)","extensions":[],"is_text":true},"sys":{"mime_type":"application/x-windows-driver","group":"executable","description":"PE Windows executable","extensions":["sys"],"is_text":false},"tar":{"mime_type":"application/x-tar","group":"archive","description":"POSIX tar archive","extensions":["tar"],"is_text":false},"tcl":{"mime_type":"application/x-tcl","group":"code","description":"Tickle","extensions":["tcl"],"is_text":true},"textproto":{"mime_type":"text/plain","group":"code","description":"Text protocol buffer","extensions":["textproto","textpb","pbtxt"],"is_text":true},"tga":{"mime_type":"image/x-tga","group":"image","description":"Targa image data","extensions":["tga"],"is_text":false},"thumbsdb":{"mime_type":"image/vnd.ms-thumb","group":"application","description":"Windows thumbnail cache","extensions":[],"is_text":false},"tiff":{"mime_type":"image/tiff","group":"image","description":"TIFF image data","extensions":["tiff","tif"],"is_text":false},"tmdx":{"mime_type":null,"group":null,"description":null,"extensions":["tmdx","tmvx"],"is_text":false},"toml":{"mime_type":"application/toml","group":"text","description":"Tom's obvious, minimal language","extensions":["toml"],"is_text":true},"torrent":{"mime_type":"application/x-bittorrent","group":"application","description":"BitTorrent file","extensions":["torrent"],"is_text":false},"troff":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"tsv":{"mime_type":"text/tsv","group":"code","description":"TSV document","extensions":["tsv"],"is_text":true},"tsx":{"mime_type":"text/x-typescript","group":"code","description":"TSX source","extensions":["tsx","mtsx","ctsx"],"is_text":true},"ttf":{"mime_type":"font/sfnt","group":"font","description":"TrueType Font data","extensions":["ttf","ttc"],"is_text":false},"twig":{"mime_type":"text/x-twig","group":"code","description":"Twig template","extensions":["twig"],"is_text":true},"txt":{"mime_type":"text/plain","group":"text","description":"Generic text document","extensions":["txt"],"is_text":true},"txtascii":{"mime_type":"text/plain","group":"text","description":"Generic text document encoded in ASCII","extensions":["txt"],"is_text":true},"txtutf16":{"mime_type":"text/plain","group":"text","description":"Generic text document encoded in UTF-16","extensions":["txt"],"is_text":true},"txtutf8":{"mime_type":"text/plain","group":"text","description":"Generic text document encoded in UTF-8","extensions":["txt"],"is_text":true},"typescript":{"mime_type":"application/typescript","group":"code","description":"TypeScript source","extensions":["ts","mts","cts"],"is_text":true},"udf":{"mime_type":"application/x-udf-image","group":null,"description":"Universal Disc Format","extensions":[],"is_text":false},"undefined":{"mime_type":"application/undefined","group":"undefined","description":"Undefined","extensions":[],"is_text":false},"unixcompress":{"mime_type":"application/x-compress","group":null,"description":null,"extensions":["z"],"is_text":false},"unknown":{"mime_type":"application/octet-stream","group":"unknown","description":"Unknown binary data","extensions":[],"is_text":false},"vba":{"mime_type":"text/vbscript","group":"code","description":"MS Visual Basic source (VBA)","extensions":["vbs","vba","vb"],"is_text":true},"vbe":{"mime_type":null,"group":"code","description":"EncryptedVBS","extensions":["vbe"],"is_text":false},"vcard":{"mime_type":"text/vcard","group":null,"description":null,"extensions":["vcard"],"is_text":false},"vcs":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"vcxproj":{"mime_type":"application/xml","group":"code","description":"Visual Studio MSBuild project","extensions":["vcxproj"],"is_text":true},"verilog":{"mime_type":"text/x-verilog","group":"code","description":"Verilog source","extensions":["v","verilog","vlg","vh"],"is_text":true},"vhd":{"mime_type":"application/x-vhd","group":null,"description":"Virtual Hard Disk","extensions":[],"is_text":false},"vhdl":{"mime_type":"text/x-vhdl","group":"code","description":"VHDL source","extensions":["vhd"],"is_text":true},"visio":{"mime_type":"application/vnd.ms-visio.drawing.main+xml","group":"document","description":"Microsoft Visio","extensions":["vsd","vsdm","vsdx","vdw"],"is_text":false},"vtt":{"mime_type":"text/vtt","group":"text","description":"Web Video Text Tracks","extensions":["vtt","webvtt"],"is_text":true},"vue":{"mime_type":"application/javascript","group":"code","description":"Vue source","extensions":["vue"],"is_text":true},"wad":{"mime_type":"application/wad","group":"archive","description":"WAD","extensions":["wad"],"is_text":false},"wasm":{"mime_type":"application/wasm","group":"executable","description":"Web Assembly","extensions":["wasm"],"is_text":false},"wav":{"mime_type":"audio/x-wav","group":"audio","description":"Waveform Audio file (WAV)","extensions":["wav"],"is_text":false},"webm":{"mime_type":"video/webm","group":"video","description":"WebM media file","extensions":["webm"],"is_text":false},"webp":{"mime_type":"image/webp","group":"image","description":"WebP media file","extensions":["webp"],"is_text":false},"webtemplate":{"mime_type":"text/plain","group":"code","description":"Web templating language","extensions":[],"is_text":true},"wim":{"mime_type":"application/x-ms-wim","group":"unknown","description":"Windows Imaging Format","extensions":["wim","swm","esd"],"is_text":false},"winregistry":{"mime_type":"text/x-ms-regedit","group":"application","description":"Windows Registry text","extensions":["reg"],"is_text":true},"wma":{"mime_type":"audio/x-ms-wma","group":"audio","description":"Windows Media Audio","extensions":["wma"],"is_text":false},"wmf":{"mime_type":"image/wmf","group":"image","description":"Windows metafile","extensions":["wmf"],"is_text":false},"wmv":{"mime_type":"video/x-ms-wmv","group":"video","description":"Windows Media Video","extensions":["wmv"],"is_text":false},"woff":{"mime_type":"font/woff","group":"font","description":"Web Open Font Format","extensions":["woff"],"is_text":false},"woff2":{"mime_type":"font/woff2","group":"font","description":"Web Open Font Format v2","extensions":["woff2"],"is_text":false},"xar":{"mime_type":"application/x-xar","group":"archive","description":"XAR archive compressed data","extensions":["pkg","xar"],"is_text":false},"xcf":{"mime_type":"image/x-xcf","group":"image","description":"Gimp image","extensions":["xcf"],"is_text":false},"xls":{"mime_type":"application/vnd.ms-excel","group":"document","description":"Microsoft Excel CDF document","extensions":["xls"],"is_text":false},"xlsb":{"mime_type":"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet","group":"document","description":"Microsoft Excel 2007+ document (binary format)","extensions":["xlsb"],"is_text":false},"xlsx":{"mime_type":"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet","group":"document","description":"Microsoft Excel 2007+ document","extensions":["xlsx","xlsm"],"is_text":false},"xml":{"mime_type":"text/xml","group":"code","description":"XML document","extensions":["xml"],"is_text":true},"xpi":{"mime_type":"application/zip","group":"archive","description":"Compressed installation archive (XPI)","extensions":["xpi"],"is_text":false},"xsd":{"mime_type":null,"group":null,"description":null,"extensions":["xsd"],"is_text":false},"xz":{"mime_type":"application/x-xz","group":"archive","description":"XZ compressed data","extensions":["xz"],"is_text":false},"yaml":{"mime_type":"application/x-yaml","group":"code","description":"YAML source","extensions":["yml","yaml"],"is_text":true},"yara":{"mime_type":"text/x-yara","group":"code","description":"YARA rule","extensions":["yar","yara"],"is_text":true},"zig":{"mime_type":"text/zig","group":"code","description":"Zig source","extensions":["zig"],"is_text":true},"zip":{"mime_type":"application/zip","group":"archive","description":"Zip archive data","extensions":["zip"],"is_text":false},"zlibstream":{"mime_type":"application/zlib","group":"application","description":"zlib compressed data","extensions":[],"is_text":false},"zst":{"mime_type":"application/zstd","group":"archive","description":"Zstandard","extensions":["zst"],"is_text":false}} ================================================ FILE: assets/models/CHANGELOG.md ================================================ # Changelog Here we document the main changes of the various models. Indicated inference speed calculated by averaging 100 inferences (within one invocation) on an AMD Ryzen 9 7950X 16-Core Processor CPU. ## `standard_v3_3` - 2025-04-11 - [216 possible tool's outputs](./standard_v3_3/README.md), ~99% average accuracy, ~2ms inference speed. - Better dataset balance between javascript vs. typescript (leading to an increased accuracy for typescript, 85% => 95%). - New synthetic datasets with utf8-encoded, non-ascii characters for simple text and JSON. - More thresholds tuning. ## `standard_v3_2` - 2025-03-17 - [216 possible tool's outputs](./standard_v3_2/README.md), ~99% average accuracy, ~2ms inference speed. - Difference with respect `standard_v3_1`: trained on a new (synthetic) dataset of CSV files to address a regression with CSV files (https://github.com/google/magika/issues/983); model selection now uses minimal test loss instead of other heuristics. ## `standard_v3_1` - [216 possible tool's outputs](./standard_v3_1/README.md). - Overall same average accuracy of `standard_v3_0`, ~99%, but more robust detections of short textual input and improved detection of Javascript. - Inference speed: ~2ms (similar to `standard_v3_0`). - Augmentation techniques used during training: CutMix, which was used for `v1` but not for `v2_1`; and "Random Snippet Selection", with which we train the model with random snippets extracted from samples in our dataset (this is only enabled for key textual content types). - Tweaked balance among content types in training dataset. ## `standard_v3_0` - [216 possible tool's outputs](./standard_v3_0/README.md). - Overall same average accuracy of `standard_v2_1`, ~99%. - Inference speed: ~2ms (~3x faster than `standard_v2_1`, ~20% faster than `standard_v1`). ## `standard_v2_1` - [Support for 200+ content types](./standard_v2_1/README.md), almost double what supported in `standard_v1`. - Overall average accuracy of ~99%. - Inference speed: ~6.2ms, which is slower than `standard_v1`; See `fast_v2_1` in case you need something faster (at the price of less accuracy). ## `fast_v2_1` - Similar to `standard_v2_1`, but significantly faster (about 4x faster). - Overall average accuracy of ~98.5%. ## `standard_v1` - Initial release. - Support for about 100 content types. - Average accuracy 99%+. - Inference speed: ~2.6ms. ================================================ FILE: assets/models/begonly_v2_1/config.min.json ================================================ {"beg_size": 2048, "mid_size": 0, "end_size": 0, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "pytorch", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {"latex": 0.95, "pascal": 0.95}, "overwrite_map": {}} ================================================ FILE: assets/models/begonly_v2_1/metadata.json ================================================ {"model_name_hash": "e66844a04ae7a03bd9f228d9b778ec8429d361d0dca09b951b327ffad5beb07a", "namespace_hash": "ce3c9130af6416f40d71c5934f927acbd174f904a550fca2185aa3cd3528ca35"} ================================================ FILE: assets/models/fast_v2_1/config.min.json ================================================ {"beg_size": 512, "mid_size": 0, "end_size": 512, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "pytorch", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {"latex": 0.95, "pascal": 0.95}, "overwrite_map": {}} ================================================ FILE: assets/models/fast_v2_1/metadata.json ================================================ {"model_name_hash": "83b9d2bd0c450deffc70624554c99fa63e1830db852cdce860b7e215fa176f9f", "namespace_hash": "ce3c9130af6416f40d71c5934f927acbd174f904a550fca2185aa3cd3528ca35"} ================================================ FILE: assets/models/standard_v1/README.md ================================================ # Model documentation The list of supported content types is indicated by the `target_labels_space` list in the model config, which you can find [here](https://github.com/google/magika/blob/7f947319f1ebe09626368a3f989a0863fcd7c52a/assets/models/standard_v1/model_config.json#L440-L554). Note: This model will be deprecated soon, in favor of more recent models (e.g., `standard_v3_x`). ================================================ FILE: assets/models/standard_v1/content_types_config.json ================================================ { "3gp": { "name": "3gp", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ace": { "name": "ace", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "aff": { "name": "aff", "extensions": [ "aff" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ai": { "name": "ai", "extensions": [ "ai" ], "mime_type": "application/pdf", "group": "document", "magic": "PDF document", "description": "Adobe Illustrator Artwork", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "ai", "target_label": "ai", "correct_labels": [ "ai", "pdf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "algol68": { "name": "algol68", "extensions": [ "a68" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "apk": { "name": "apk", "extensions": [ "apk" ], "mime_type": "application/vnd.android.package-archive", "group": "executable", "magic": "Java archive data", "description": "Android package", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "apk", "correct_labels": [ "apk" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "appleplist": { "name": "appleplist", "extensions": [ "plist" ], "mime_type": "application/x-plist", "group": "application", "magic": "Apple binary property list", "description": "Android property list", "vt_type": "appleplist", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "text" ], "model_target_label": "appleplist", "target_label": "appleplist", "correct_labels": [ "appleplist" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "arj": { "name": "arj", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "asm": { "name": "asm", "extensions": [ "S", "asm" ], "mime_type": "text/x-asm", "group": "code", "magic": "assembler source", "description": "Assembly", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "asm", "target_label": "asm", "correct_labels": [ "asm" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "asp": { "name": "asp", "extensions": [ "aspx", "asp" ], "mime_type": "text/html", "group": "code", "magic": "HTML document", "description": "ASP source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "asp", "target_label": "asp", "correct_labels": [ "asp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "avi": { "name": "avi", "extensions": [ "avi" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ax": { "name": "ax", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "batch": { "name": "batch", "extensions": [ "bat" ], "mime_type": "text/x-msdos-batch", "group": "code", "magic": "DOS batch file", "description": "DOS batch file", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "batch", "target_label": "batch", "correct_labels": [ "batch" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "bcad": { "name": "bcad", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "bib": { "name": "bib", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "bmp": { "name": "bmp", "extensions": [ "bmp" ], "mime_type": "image/bmp", "group": "image", "magic": "PC bitmap", "description": "BMP image data", "vt_type": "bmp", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "bmp", "target_label": "bmp", "correct_labels": [ "bmp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "bpl": { "name": "bpl", "extensions": [ "bpl" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "brainfuck": { "name": "brainfuck", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "bzip": { "name": "bzip", "extensions": [ "bz2", "tbz2", "tar.bz2" ], "mime_type": "application/x-bzip2", "group": "archive", "magic": "bzip2 compressed data", "description": "bzip2 compressed data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "bzip", "target_label": "bzip", "correct_labels": [ "bzip" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "c": { "name": "c", "extensions": [ "c", "cpp", "h", "hpp", "cc" ], "mime_type": "text/x-c", "group": "code", "magic": "C source", "description": "C source", "vt_type": "c,cpp", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "c", "target_label": "c", "correct_labels": [ "c", "cpp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cab": { "name": "cab", "extensions": [ "cab" ], "mime_type": "application/vnd.ms-cab-compressed", "group": "archive", "magic": "Microsoft Cabinet archive data", "description": "Microsoft Cabinet archive data", "vt_type": "cab", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "cab", "target_label": "cab", "correct_labels": [ "cab" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cad": { "name": "cad", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "cat": { "name": "cat", "extensions": [ "cat" ], "mime_type": "application/octet-stream", "group": "application", "magic": "data", "description": "Windows Catalog file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "cat", "target_label": "cat", "correct_labels": [ "cat", "ctl" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cdf": { "name": "cdf", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "chm": { "name": "chm", "extensions": [ "chm" ], "mime_type": "application/chm", "group": "application", "magic": "MS Windows HtmlHelp Data", "description": "MS Windows HtmlHelp Data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "chm", "target_label": "chm", "correct_labels": [ "chm" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "clojure": { "name": "clojure", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "cmake": { "name": "cmake", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "cobol": { "name": "cobol", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "coff": { "name": "coff", "extensions": [], "mime_type": "application/x-coff", "group": "executable", "magic": "Intel 80386 COFF", "description": "Intel 80386 COFF", "vt_type": "coff", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary" ], "model_target_label": "coff", "target_label": "coff", "correct_labels": [ "coff", "exp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "coffee": { "name": "coffee", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "com": { "name": "com", "extensions": [], "mime_type": "application/x-dosexec", "group": null, "magic": null, "description": null, "vt_type": "com", "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "cpl": { "name": "cpl", "extensions": [ "cpl" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE32 executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "cpl" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cpp": { "name": "cpp", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "crx": { "name": "crx", "extensions": [ "crx" ], "mime_type": "application/x-chrome-extension", "group": "executable", "magic": "Google Chrome extension", "description": "Google Chrome extension", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "crx", "target_label": "crx", "correct_labels": [ "crx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cs": { "name": "cs", "extensions": [ "cs" ], "mime_type": "text/plain", "group": "code", "magic": "ASCII text", "description": "C# source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "cs", "target_label": "cs", "correct_labels": [ "cs" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "css": { "name": "css", "extensions": [ "css" ], "mime_type": "text/css", "group": "code", "magic": "ASCII text", "description": "CSS source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "css", "target_label": "css", "correct_labels": [ "css" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "csv": { "name": "csv", "extensions": [ "csv" ], "mime_type": "text/csv", "group": "code", "magic": "CSV text", "description": "CSV document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "csv", "target_label": "csv", "correct_labels": [ "csv" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ctl": { "name": "ctl", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "dart": { "name": "dart", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "deb": { "name": "deb", "extensions": [ "deb" ], "mime_type": "application/vnd.debian.binary-package", "group": "archive", "magic": "Debian binary package", "description": "Debian binary package", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "deb", "target_label": "deb", "correct_labels": [ "deb" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dex": { "name": "dex", "extensions": [ "dex" ], "mime_type": "application/x-android-dex", "group": "executable", "magic": "Dalvik dex file", "description": "Dalvik dex file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "dex", "target_label": "dex", "correct_labels": [ "dex" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dey": { "name": "dey", "extensions": [], "mime_type": "application/x-android-dey", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "diff": { "name": "diff", "extensions": [ "diff" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "directory": { "name": "directory", "extensions": [], "mime_type": "inode/directory", "group": "inode", "magic": "directory", "description": "A directory", "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": "directory", "correct_labels": [ "directory" ], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "dll": { "name": "dll", "extensions": [ "dll" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": "pedll", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "dll" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dm": { "name": "dm", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "dmg": { "name": "dmg", "extensions": [ "dmg" ], "mime_type": "application/x-apple-diskimage", "group": "archive", "magic": "Apple disk image", "description": "Apple disk image", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "dmg", "target_label": "dmg", "correct_labels": [ "dmg" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "doc": { "name": "doc", "extensions": [ "doc" ], "mime_type": "application/msword", "group": "document", "magic": "Composite Document File", "description": "Microsoft Word CDF document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "cdf", "target_label": "doc", "correct_labels": [ "doc" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dockerfile": { "name": "dockerfile", "extensions": [ "=Dockerfile" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "docx": { "name": "docx", "extensions": [ "docx", "docm" ], "mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "group": "document", "magic": "Microsoft Word 2007+", "description": "Microsoft Word 2007+ document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "ooxml", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "docx", "correct_labels": [ "docx", "tmdx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dosmbr": { "name": "dosmbr", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "dylib": { "name": "dylib", "extensions": [ "dylib" ], "mime_type": "application/x-mach-o", "group": "executable", "magic": "Mach-O executable", "description": "Mach-O executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "macho" ], "model_target_label": "macho", "target_label": "macho", "correct_labels": [ "macho", "dylib" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "elf": { "name": "elf", "extensions": [ "elf", "so" ], "mime_type": "application/x-executable-elf", "group": "executable", "magic": "ELF executable", "description": "ELF executable", "vt_type": "elf", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary", "elf" ], "model_target_label": "elf", "target_label": "elf", "correct_labels": [ "elf", "so" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "elixir": { "name": "elixir", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "emf": { "name": "emf", "extensions": [ "emf" ], "mime_type": "application/octet-stream", "group": "application", "magic": "Windows Enhanced Metafile", "description": "Windows Enhanced Metafile image data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "emf", "target_label": "emf", "correct_labels": [ "emf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "eml": { "name": "eml", "extensions": [ "eml" ], "mime_type": "message/rfc822", "group": "text", "magic": "RFC 822 mail", "description": "RFC 822 mail", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "eml", "target_label": "eml", "correct_labels": [ "eml" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "empty": { "name": "empty", "extensions": [], "mime_type": "inode/x-empty", "group": "inode", "magic": "empty", "description": "Empty file", "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": "empty", "correct_labels": [ "empty" ], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "epub": { "name": "epub", "extensions": [ "epub" ], "mime_type": "application/epub+zip", "group": "document", "magic": "EPUB document", "description": "EPUB document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "epub", "correct_labels": [ "epub" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "erlang": { "name": "erlang", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ese": { "name": "ese", "extensions": [], "mime_type": "application/x-ms-ese", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "exe": { "name": "exe", "extensions": [ "exe" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "ELF executable", "description": "ELF executable", "vt_type": "peexe", "datasets": [ "vt-ext", "vt-ext-malicious" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "exe" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "exp": { "name": "exp", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "flac": { "name": "flac", "extensions": [ "flac" ], "mime_type": "audio/flac", "group": "audio", "magic": "FLAC audio bitstream data", "description": "FLAC audio bitstream data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "flac", "target_label": "flac", "correct_labels": [ "flac" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "fortran": { "name": "fortran", "extensions": [ "f90", "f95", "f03" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "fpx": { "name": "fpx", "extensions": [ "fpx" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": "fpx", "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "gif": { "name": "gif", "extensions": [ "gif" ], "mime_type": "image/gif", "group": "image", "magic": "GIF image data", "description": "GIF image data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "gif", "target_label": "gif", "correct_labels": [ "gif" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "go": { "name": "go", "extensions": [ "go" ], "mime_type": "text/x-golang", "group": "code", "magic": "ASCII text", "description": "Golang source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "go", "target_label": "go", "correct_labels": [ "go" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "gpx": { "name": "gpx", "extensions": [ "gpx" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "groovy": { "name": "groovy", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "gzip": { "name": "gzip", "extensions": [ "gz", "gzip", "tgz", "tar.gz" ], "mime_type": "application/gzip", "group": "archive", "magic": "gzip compressed data", "description": "gzip compressed data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "gzip", "target_label": "gzip", "correct_labels": [ "gzip" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "haskell": { "name": "haskell", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "hfs": { "name": "hfs", "extensions": [ "hfs" ], "mime_type": "application/x-hfs", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "hlp": { "name": "hlp", "extensions": [ "hlp" ], "mime_type": "application/winhlp", "group": "application", "magic": "MS Windows help", "description": "MS Windows help", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "hlp", "target_label": "hlp", "correct_labels": [ "hlp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "hta": { "name": "hta", "extensions": [ "hta" ], "mime_type": "application/hta", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "html": { "name": "html", "extensions": [ "html", "htm", "xhtml", "xht" ], "mime_type": "text/html", "group": "code", "magic": "HTML document", "description": "HTML document", "vt_type": "html", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "html", "target_label": "html", "correct_labels": [ "html" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "hve": { "name": "hve", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ico": { "name": "ico", "extensions": [ "ico" ], "mime_type": "image/vnd.microsoft.icon", "group": "image", "magic": "MS Windows icon resource", "description": "MS Windows icon resource", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "ico", "target_label": "ico", "correct_labels": [ "ico" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "img": { "name": "img", "extensions": [ "img" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ini": { "name": "ini", "extensions": [ "ini" ], "mime_type": "text/plain", "group": "text", "magic": "Generic INItialization configuration", "description": "INI configuration file", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "ini", "target_label": "ini", "correct_labels": [ "ini" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "internetshortcut": { "name": "internetshortcut", "extensions": [ "url" ], "mime_type": "application/x-mswinurl", "group": "application", "magic": "MS Windows 95 Internet shortcut", "description": "MS Windows Internet shortcut", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "internetshortcut", "target_label": "internetshortcut", "correct_labels": [ "internetshortcut" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "iosapp": { "name": "iosapp", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "iso": { "name": "iso", "extensions": [ "iso" ], "mime_type": "application/x-iso9660-image", "group": "archive", "magic": "ISO 9660 CD-ROM filesystem data", "description": "ISO 9660 CD-ROM filesystem data", "vt_type": "isoimage", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "iso", "target_label": "iso", "correct_labels": [ "iso", "udf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "jar": { "name": "jar", "extensions": [ "jar" ], "mime_type": "application/java-archive", "group": "archive", "magic": "Java archive data (JAR)", "description": "Java archive data (JAR)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "jar", "target_label": "jar", "correct_labels": [ "jar" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "java": { "name": "java", "extensions": [ "java" ], "mime_type": "text/x-java", "group": "code", "magic": "Java source", "description": "Java source", "vt_type": "java", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "java", "target_label": "java", "correct_labels": [ "java" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "javabytecode": { "name": "javabytecode", "extensions": [ "class" ], "mime_type": "application/x-java-applet", "group": "executable", "magic": "compiled Java class data", "description": "Java compiled bytecode", "vt_type": "class", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "javabytecode", "target_label": "javabytecode", "correct_labels": [ "javabytecode" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "javascript": { "name": "javascript", "extensions": [ "js" ], "mime_type": "application/javascript", "group": "code", "magic": "JavaScript source", "description": "JavaScript source", "vt_type": "javascript", "datasets": [ "github", "vt-ext", "vt-ext-malicious" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "javascript", "target_label": "javascript", "correct_labels": [ "javascript", "typescript" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "jpeg": { "name": "jpeg", "extensions": [ "jpg", "jpeg" ], "mime_type": "image/jpeg", "group": "image", "magic": "JPEG image data", "description": "JPEG image data", "vt_type": "jpeg", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "jpeg", "target_label": "jpeg", "correct_labels": [ "jpeg" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "json": { "name": "json", "extensions": [ "json" ], "mime_type": "application/json", "group": "code", "magic": "JSON data", "description": "JSON document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "json", "target_label": "json", "correct_labels": [ "json" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "julia": { "name": "julia", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ko": { "name": "ko", "extensions": [ "ko" ], "mime_type": "application/x-executable-elf", "group": "executable", "magic": "ELF executable", "description": "ELF executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "elf" ], "model_target_label": "elf", "target_label": "elf", "correct_labels": [ "elf", "ko" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "kotlin": { "name": "kotlin", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "latex": { "name": "latex", "extensions": [ "tex" ], "mime_type": "text/x-tex", "group": "text", "magic": "LaTeX document", "description": "LaTeX document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "latex", "target_label": "latex", "correct_labels": [ "latex" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "lisp": { "name": "lisp", "extensions": [ "lisp" ], "mime_type": "text/x-lisp", "group": "code", "magic": "Lisp/Scheme program", "description": "Lisp source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "lisp", "target_label": "lisp", "correct_labels": [ "lisp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "lnk": { "name": "lnk", "extensions": [ "lnk" ], "mime_type": "application/x-ms-shortcut", "group": "application", "magic": "MS Windows shortcut", "description": "MS Windows shortcut", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "lnk", "target_label": "lnk", "correct_labels": [ "lnk" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "lua": { "name": "lua", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "m3u": { "name": "m3u", "extensions": [ "m3u8", "m3u" ], "mime_type": "text/plain", "group": "application", "magic": "M3U playlist", "description": "M3U playlist", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "m3u", "target_label": "m3u", "correct_labels": [ "m3u" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "macho": { "name": "macho", "extensions": [], "mime_type": "application/x-mach-o", "group": "executable", "magic": "Mach-O executable", "description": "Mach-O executable", "vt_type": "macho", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary", "macho" ], "model_target_label": "macho", "target_label": "macho", "correct_labels": [ "macho", "dylib" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "maff": { "name": "maff", "extensions": [ "maff" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "makefile": { "name": "makefile", "extensions": [ "=Makefile" ], "mime_type": "text/x-makefile", "group": "code", "magic": "makefile script", "description": "Makefile source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "makefile", "target_label": "makefile", "correct_labels": [ "makefile" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "markdown": { "name": "markdown", "extensions": [ "md" ], "mime_type": "text/markdown", "group": "text", "magic": "ASCII text", "description": "Markdown document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "markdown", "target_label": "markdown", "correct_labels": [ "markdown" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "matlab": { "name": "matlab", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "mht": { "name": "mht", "extensions": [ "mht" ], "mime_type": "application/x-mimearchive", "group": "code", "magic": "HTML document", "description": "MHTML document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "mht", "target_label": "mht", "correct_labels": [ "mht" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mkv": { "name": "mkv", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "mov": { "name": "mov", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "mp3": { "name": "mp3", "extensions": [ "mp3" ], "mime_type": "audio/mpeg", "group": "audio", "magic": "Audio file with ID3", "description": "MP3 media file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "mp3", "target_label": "mp3", "correct_labels": [ "mp3" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mp4": { "name": "mp4", "extensions": [ "mp4" ], "mime_type": "video/mp4", "group": "video", "magic": "ISO Media", "description": "MP4 medial", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "mp4", "target_label": "mp4", "correct_labels": [ "mp4" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mscompress": { "name": "mscompress", "extensions": [], "mime_type": "application/x-ms-compress-szdd", "group": "archive", "magic": "MS Compress archive data", "description": "MS Compress archive data", "vt_type": "mscompress", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary" ], "model_target_label": "mscompress", "target_label": "mscompress", "correct_labels": [ "mscompress" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "msi": { "name": "msi", "extensions": [ "msi" ], "mime_type": "application/x-msi", "group": "archive", "magic": "Composite Document File", "description": "Microsoft Installer file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "msi", "target_label": "msi", "correct_labels": [ "msi" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mst": { "name": "mst", "extensions": [ "mst" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "msvisio": { "name": "msvisio", "extensions": [], "mime_type": "application/vnd.ms-visio.drawing.main+xml", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "mui": { "name": "mui", "extensions": [ "mui" ], "mime_type": "application/x-dosexec", "group": "application", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "mui" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mum": { "name": "mum", "extensions": [ "mum" ], "mime_type": "text/xml", "group": "application", "magic": "XML document", "description": "Windows Update Package file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "mum", "target_label": "mum", "correct_labels": [ "mum" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mun": { "name": "mun", "extensions": [ "mun" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "nim": { "name": "nim", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "null": { "name": "null", "extensions": [ "null" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "object": { "name": "object", "extensions": [ "o" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "objectivec": { "name": "objectivec", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ocaml": { "name": "ocaml", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ocx": { "name": "ocx", "extensions": [ "ocx" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "ax", "ocx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "odex": { "name": "odex", "extensions": [ "odex" ], "mime_type": "application/x-executable-elf", "group": "executable", "magic": "ELF executable", "description": "ODEX ELF executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "elf" ], "model_target_label": "odex", "target_label": "odex", "correct_labels": [ "odex", "elf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "odp": { "name": "odp", "extensions": [ "odp" ], "mime_type": "application/vnd.oasis.opendocument.presentation", "group": "document", "magic": "OpenDocument Presentation", "description": "OpenDocument Presentation", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive" ], "model_target_label": "odp", "target_label": "odp", "correct_labels": [ "odp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ods": { "name": "ods", "extensions": [ "ods" ], "mime_type": "application/vnd.oasis.opendocument.spreadsheet", "group": "document", "magic": "OpenDocument Spreadsheet", "description": "OpenDocument Spreadsheet", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive" ], "model_target_label": "ods", "target_label": "ods", "correct_labels": [ "ods" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "odt": { "name": "odt", "extensions": [ "odt" ], "mime_type": "application/vnd.oasis.opendocument.text", "group": "document", "magic": "OpenDocument Text", "description": "OpenDocument Text", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive" ], "model_target_label": "odt", "target_label": "odt", "correct_labels": [ "odt" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ogg": { "name": "ogg", "extensions": [ "ogg" ], "mime_type": "audio/ogg", "group": "audio", "magic": "Ogg data", "description": "Ogg data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "ogg", "target_label": "ogg", "correct_labels": [ "ogg" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ole": { "name": "ole", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ooxml": { "name": "ooxml", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "outlook": { "name": "outlook", "extensions": [], "mime_type": "application/vnd.ms-outlook", "group": "application", "magic": "CDFV2 Microsoft Outlook Message", "description": "MS Outlook Message", "vt_type": "outlook", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "cdf", "target_label": "outlook", "correct_labels": [ "outlook" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "palmos": { "name": "palmos", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": "palmos", "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "pascal": { "name": "pascal", "extensions": [ "pascal" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "pbm": { "name": "pbm", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "pcap": { "name": "pcap", "extensions": [ "pcap", "pcapng" ], "mime_type": "application/vnd.tcpdump.pcap", "group": "application", "magic": "pcap capture file", "description": "pcap capture file", "vt_type": "pcap", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "pcap", "target_label": "pcap", "correct_labels": [ "pcap" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pdf": { "name": "pdf", "extensions": [ "pdf" ], "mime_type": "application/pdf", "group": "document", "magic": "PDF document", "description": "PDF document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "pdf", "target_label": "pdf", "correct_labels": [ "pdf", "ai" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pebin": { "name": "pebin", "extensions": [ "exe", "dll", "sys" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE executable", "description": "PE executable", "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "pem": { "name": "pem", "extensions": [ "pem", "pub" ], "mime_type": "application/x-pem-file", "group": "application", "magic": "PEM certificate", "description": "PEM certificate", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text" ], "model_target_label": "pem", "target_label": "pem", "correct_labels": [ "pem", "pgpkey" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "perl": { "name": "perl", "extensions": [ "pl" ], "mime_type": "text/x-perl", "group": "code", "magic": "Perl script text executable", "description": "Perl source", "vt_type": "perl", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "perl", "target_label": "perl", "correct_labels": [ "perl" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pgpkey": { "name": "pgpkey", "extensions": [], "mime_type": "application/pgp-keys", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "php": { "name": "php", "extensions": [ "php" ], "mime_type": "text/x-php", "group": "code", "magic": "PHP script", "description": "PHP source", "vt_type": "php", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "php", "target_label": "php", "correct_labels": [ "php" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "png": { "name": "png", "extensions": [ "png" ], "mime_type": "image/png", "group": "image", "magic": "PNG image data", "description": "PNG image data", "vt_type": "png", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "png", "target_label": "png", "correct_labels": [ "png" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "postscript": { "name": "postscript", "extensions": [ "ps" ], "mime_type": "application/postscript", "group": "document", "magic": "PostScript document text", "description": "PostScript document", "vt_type": "postscript", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "postscript", "target_label": "postscript", "correct_labels": [ "postscript" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "powershell": { "name": "powershell", "extensions": [ "ps1" ], "mime_type": "application/x-powershell", "group": "code", "magic": "a powershell script", "description": "Powershell source", "vt_type": "ps", "datasets": [ "github", "vt-ext", "vt-ext-malicious" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "powershell", "target_label": "powershell", "correct_labels": [ "powershell" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ppt": { "name": "ppt", "extensions": [ "ppt" ], "mime_type": "application/vnd.ms-powerpoint", "group": "document", "magic": "Composite Document File", "description": "Microsoft PowerPoint CDF document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "cdf", "target_label": "ppt", "correct_labels": [ "ppt" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pptx": { "name": "pptx", "extensions": [ "pptx", "pptm" ], "mime_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "group": "document", "magic": "Microsoft PowerPoint 2007+", "description": "Microsoft PowerPoint 2007+ document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "ooxml", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "pptx", "correct_labels": [ "pptx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "printfox": { "name": "printfox", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "prolog": { "name": "prolog", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "pub": { "name": "pub", "extensions": [ "pub" ], "mime_type": "application/x-mspublisher", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "python": { "name": "python", "extensions": [ "py" ], "mime_type": "text/x-python", "group": "code", "magic": "Python script", "description": "Python source", "vt_type": "python", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "python", "target_label": "python", "correct_labels": [ "python" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pythonbytecode": { "name": "pythonbytecode", "extensions": [ "pyc", "pyo" ], "mime_type": "application/x-bytecode.python", "group": "executable", "magic": "python byte-compiled", "description": "Python compiled bytecode", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "pythonbytecode", "target_label": "pythonbytecode", "correct_labels": [ "pythonbytecode" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pythonpar": { "name": "pythonpar", "extensions": [ "par" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "r": { "name": "r", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "randombytes": { "name": "randombytes", "extensions": [], "mime_type": "application/octet-stream", "group": "unknown", "magic": "data", "description": "Random bytes", "vt_type": null, "datasets": [ "synthetic" ], "parent": null, "tags": [], "model_target_label": "unknown", "target_label": "unknown", "correct_labels": [ "unknown" ], "in_scope_for_output_content_type": false, "in_scope_for_training": true }, "rar": { "name": "rar", "extensions": [ "rar" ], "mime_type": "application/x-rar", "group": "archive", "magic": "RAR archive data", "description": "RAR archive data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "rar", "target_label": "rar", "correct_labels": [ "rar" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rdf": { "name": "rdf", "extensions": [ "rdf" ], "mime_type": "application/rdf+xml", "group": "text", "magic": "XML document", "description": "Resource Description Framework document (RDF)", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text" ], "model_target_label": "rdf", "target_label": "rdf", "correct_labels": [ "rdf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rll": { "name": "rll", "extensions": [ "rll" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "rpm": { "name": "rpm", "extensions": [ "rpm" ], "mime_type": "application/x-rpm", "group": "archive", "magic": "RPM", "description": "RedHat Package Manager archive (RPM)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "rpm", "target_label": "rpm", "correct_labels": [ "rpm" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rst": { "name": "rst", "extensions": [ "rst" ], "mime_type": "text/x-rst", "group": "text", "magic": "ReStructuredText file", "description": "ReStructuredText document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "rst", "target_label": "rst", "correct_labels": [ "rst" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rtf": { "name": "rtf", "extensions": [ "rtf" ], "mime_type": "text/rtf", "group": "text", "magic": "Rich Text Format data", "description": "Rich Text Format document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "rtf", "target_label": "rtf", "correct_labels": [ "rtf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ruby": { "name": "ruby", "extensions": [ "rb" ], "mime_type": "application/x-ruby", "group": "code", "magic": "Ruby script", "description": "Ruby source", "vt_type": "ruby", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "ruby", "target_label": "ruby", "correct_labels": [ "ruby" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rust": { "name": "rust", "extensions": [ "rs" ], "mime_type": "application/x-rust", "group": "code", "magic": "ASCII text", "description": "Rust source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "rust", "target_label": "rust", "correct_labels": [ "rust" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "s": { "name": "s", "extensions": [ "s" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "scala": { "name": "scala", "extensions": [ "scala" ], "mime_type": "application/x-scala", "group": "code", "magic": "ASCII text", "description": "Scala source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "scala", "target_label": "scala", "correct_labels": [ "scala" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "scr": { "name": "scr", "extensions": [ "scr" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "scr" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "scriptwsf": { "name": "scriptwsf", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "sevenzip": { "name": "sevenzip", "extensions": [ "7z" ], "mime_type": "application/x-7z-compressed", "group": "archive", "magic": "7-zip archive data", "description": "7-zip archive data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "sevenzip", "target_label": "sevenzip", "correct_labels": [ "sevenzip" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "sgml": { "name": "sgml", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "sh3d": { "name": "sh3d", "extensions": [ "sh3d" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "shell": { "name": "shell", "extensions": [ "sh" ], "mime_type": "text/x-shellscript", "group": "code", "magic": "shell script", "description": "Shell script", "vt_type": "shell", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "shell", "target_label": "shell", "correct_labels": [ "shell" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "smali": { "name": "smali", "extensions": [ "smali" ], "mime_type": "application/x-smali", "group": "code", "magic": "ASCII text", "description": "Smali source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text" ], "model_target_label": "smali", "target_label": "smali", "correct_labels": [ "smali" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "so": { "name": "so", "extensions": [ "so" ], "mime_type": "application/x-executable-elf", "group": "executable", "magic": "ELF executable", "description": "ELF executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "elf" ], "model_target_label": "elf", "target_label": "elf", "correct_labels": [ "elf", "so" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "sql": { "name": "sql", "extensions": [ "sql" ], "mime_type": "application/x-sql", "group": "code", "magic": "ASCII text", "description": "SQL source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "sql", "target_label": "sql", "correct_labels": [ "sql" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "squashfs": { "name": "squashfs", "extensions": [], "mime_type": "application/octet-stream", "group": "archive", "magic": "Squashfs filesystem", "description": "Squash filesystem", "vt_type": "squashfs", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary" ], "model_target_label": "squashfs", "target_label": "squashfs", "correct_labels": [ "squashfs" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "svd": { "name": "svd", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "svg": { "name": "svg", "extensions": [ "svg" ], "mime_type": "image/svg+xml", "group": "image", "magic": "SVG Scalable Vector Graphics image", "description": "SVG Scalable Vector Graphics image data", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text" ], "model_target_label": "svg", "target_label": "svg", "correct_labels": [ "svg" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "swf": { "name": "swf", "extensions": [ "swf" ], "mime_type": "application/x-shockwave-flash", "group": "executable", "magic": "Macromedia Flash data", "description": "Macromedia Flash data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "swf", "target_label": "swf", "correct_labels": [ "swf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "swift": { "name": "swift", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "symlinktext": { "name": "symlinktext", "extensions": [], "mime_type": "text/plain", "group": "application", "magic": "ASCII text", "description": "Symbolic link (textual representation)", "vt_type": null, "datasets": [ "synthetic" ], "parent": null, "tags": [ "text" ], "model_target_label": "symlinktext", "target_label": "symlinktext", "correct_labels": [ "symlinktext", "txt" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "symlink": { "name": "symlink", "extensions": [], "mime_type": "inode/symlink", "group": "inode", "magic": "symbolic link to ", "description": "Symbolic link to ", "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": "symlink", "correct_labels": [ "symlink" ], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "sys": { "name": "sys", "extensions": [ "sys" ], "mime_type": "application/x-windows-driver", "group": "executable", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "sys" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "tar": { "name": "tar", "extensions": [ "tar" ], "mime_type": "application/x-tar", "group": "archive", "magic": "POSIX tar archive", "description": "POSIX tar archive", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "tar", "target_label": "tar", "correct_labels": [ "tar" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "tga": { "name": "tga", "extensions": [ "tga" ], "mime_type": "image/x-tga", "group": "image", "magic": "Targa image data", "description": "Targa image data", "vt_type": "targa", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "tga", "target_label": "tga", "correct_labels": [ "tga" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "tiff": { "name": "tiff", "extensions": [ "tiff", "tif" ], "mime_type": "image/tiff", "group": "image", "magic": "TIFF image data", "description": "TIFF image data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "tiff", "target_label": "tiff", "correct_labels": [ "tiff" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "tmdx": { "name": "tmdx", "extensions": [ "tmdx", "tmvx" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "toml": { "name": "toml", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "torrent": { "name": "torrent", "extensions": [ "torrent" ], "mime_type": "application/x-bittorrent", "group": "application", "magic": "BitTorrent file", "description": "BitTorrent file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "torrent", "target_label": "torrent", "correct_labels": [ "torrent" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "troff": { "name": "troff", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ttf": { "name": "ttf", "extensions": [ "ttf" ], "mime_type": "font/sfnt", "group": "font", "magic": "TrueType Font data", "description": "TrueType Font data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "ttf", "target_label": "ttf", "correct_labels": [ "ttf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "txt": { "name": "txt", "extensions": [ "txt" ], "mime_type": "text/plain", "group": "text", "magic": "ASCII text", "description": "Generic text document", "vt_type": null, "datasets": [ "github", "synthetic" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "txt", "target_label": "txt", "correct_labels": [ "txt" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "typescript": { "name": "typescript", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "udf": { "name": "udf", "extensions": [], "mime_type": "application/x-udf-image", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "unixcompress": { "name": "unixcompress", "extensions": [ "z" ], "mime_type": "application/x-compress", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "unknown": { "name": "unknown", "extensions": [], "mime_type": "application/octet-stream", "group": "unknown", "magic": "data", "description": "Unknown binary data", "vt_type": null, "datasets": [], "parent": null, "tags": [ "binary" ], "model_target_label": "unknown", "target_label": "unknown", "correct_labels": [ "unknown" ], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "vba": { "name": "vba", "extensions": [ "vbs" ], "mime_type": "text/vbscript", "group": "code", "magic": "ASCII text", "description": "MS Visual Basic source (VBA)", "vt_type": "vba", "datasets": [ "vt-ext", "vt-ext-malicious" ], "parent": null, "tags": [ "text" ], "model_target_label": "vba", "target_label": "vba", "correct_labels": [ "vba" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "verilog": { "name": "verilog", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "vhd": { "name": "vhd", "extensions": [], "mime_type": "application/x-vhd", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "wasm": { "name": "wasm", "extensions": [ "wasm" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "wav": { "name": "wav", "extensions": [ "wav" ], "mime_type": "audio/x-wav", "group": "audio", "magic": "RIFF data", "description": "Waveform Audio file (WAV)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "wav", "target_label": "wav", "correct_labels": [ "wav" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "webm": { "name": "webm", "extensions": [ "webm" ], "mime_type": "video/webm", "group": "video", "magic": "WebM", "description": "WebM data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "webm", "target_label": "webm", "correct_labels": [ "webm" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "webp": { "name": "webp", "extensions": [ "webp" ], "mime_type": "image/webp", "group": "image", "magic": "RIFF data", "description": "WebP data", "vt_type": "webp", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "webp", "target_label": "webp", "correct_labels": [ "webp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "winregistry": { "name": "winregistry", "extensions": [ "reg" ], "mime_type": "text/x-ms-regedit", "group": "application", "magic": "Windows Registry text", "description": "Windows Registry text", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "winregistry", "target_label": "winregistry", "correct_labels": [ "winregistry" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "wmf": { "name": "wmf", "extensions": [ "wmf" ], "mime_type": "image/wmf", "group": "image", "magic": "Windows metafile", "description": "Windows metafile", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "wmf", "target_label": "wmf", "correct_labels": [ "wmf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "woff": { "name": "woff", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "xar": { "name": "xar", "extensions": [ "pkg" ], "mime_type": "application/x-xar", "group": "archive", "magic": "xar archive compressed", "description": "XAR archive compressed data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "xar", "target_label": "xar", "correct_labels": [ "xar" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xls": { "name": "xls", "extensions": [ "xls" ], "mime_type": "application/vnd.ms-excel", "group": "document", "magic": "Composite Document File", "description": "Microsoft Excel CDF document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "cdf", "target_label": "xls", "correct_labels": [ "xls" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xlsb": { "name": "xlsb", "extensions": [ "xlsb" ], "mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "group": "document", "magic": "Microsoft Excel 2007+", "description": "Microsoft Excel 2007+ document (binary format)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "ooxml", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "xlsb", "correct_labels": [ "xlsb", "xlsx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xlsx": { "name": "xlsx", "extensions": [ "xlsx", "xlsm" ], "mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "group": "document", "magic": "Microsoft Excel 2007+", "description": "Microsoft Excel 2007+ document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "ooxml", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "xlsx", "correct_labels": [ "xlsx", "xlsb" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xml": { "name": "xml", "extensions": [ "xml" ], "mime_type": "text/xml", "group": "code", "magic": "XML document", "description": "XML document", "vt_type": "xml", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "xml", "target_label": "xml", "correct_labels": [ "xml" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xpi": { "name": "xpi", "extensions": [ "xpi" ], "mime_type": "application/zip", "group": "archive", "magic": "Zip archive data", "description": "Compressed installation archive (XPI)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "xpi", "correct_labels": [ "xpi" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xz": { "name": "xz", "extensions": [ "xz" ], "mime_type": "application/x-xz", "group": "archive", "magic": "XZ compressed data", "description": "XZ compressed data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "xz", "target_label": "xz", "correct_labels": [ "xz" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "yaml": { "name": "yaml", "extensions": [ "yml", "yaml" ], "mime_type": "application/x-yaml", "group": "code", "magic": "ASCII text", "description": "YAML source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "yaml", "target_label": "yaml", "correct_labels": [ "yaml" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "zip": { "name": "zip", "extensions": [ "zip" ], "mime_type": "application/zip", "group": "archive", "magic": "Zip archive data", "description": "Zip archive data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "zip", "correct_labels": [ "zip" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "zlibstream": { "name": "zlibstream", "extensions": [], "mime_type": "application/zlib", "group": "application", "magic": "zlib compressed data", "description": "zlib compressed data", "vt_type": "zlib", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary" ], "model_target_label": "zlibstream", "target_label": "zlibstream", "correct_labels": [ "zlibstream" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true } } ================================================ FILE: assets/models/standard_v1/magika_config.json ================================================ { "default_model_name": "standard_v1", "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 16, "padding_token": 256 } ================================================ FILE: assets/models/standard_v1/model_config.json ================================================ { "cfg": { "model_class": "dense_v4", "shuffle_size": 10000, "input_sizes": { "beg": 512, "mid": 512, "end": 512 }, "batch_size": 256, "namespace": "2023-08-dense_v4", "max_train_samples_num_per_ds_per_ct": 1000000, "epochs": 30, "dense_v4.input_embedding_dim": 128, "dense_v4.step_size": 4, "dense_v4.use_first_layernorm": true, "dense_v4.premaxpool_dense_dim": 256, "dense_v4.premaxpool_dense2_dim": 256, "dense_v4.use_second_layernorm": true, "dense_v4.spatial_dropout_rate": 0.1, "dense_v4.dropout_rate": 0.1, "dataset_format": "int-concat/one-hot", "dense_v4.dense_activation": "gelu", "cutmix_t": 0.05, "dense_v4.augmentation_ratio": 0.0, "wandb_mode": "online", "max_test_samples_num_per_ds_per_ct": 10000, "target_labels_spec": "target-label", "ds_query": "all", "ct_query": "all", "tags": [ "namespace:2023-08-dense_v4" ], "metrics": [ "acc" ], "loss_function": "categorical_crossentropy", "optimizer": "adam", "augmentations_num": 0, "return_after_summary": false, "xgboost_model_dir": null, "dense_v4.use_meanpool": false, "dense_v4.use_file_size": false, "dense_v4.padding_byte": 256 }, "train_dataset_info": { "split_name": "train", "ds_query": "all", "ct_query": "all", "batch_size": 256, "shuffle": 10000, "block_length": 1000, "repeat_and_randomize": true, "include_sha256s": false, "input_sizes": { "beg": 512, "mid": 512, "end": 512 }, "max_samples_num_per_ds_per_ct": 1000000, "dataset_format": "int-concat/one-hot", "cutmix_t": 0.05, "target_labels_spec": "target-label", "total_samples_num_upper_bound": 129000000, "target_labels_info": { "target_labels_spec": "target-label", "content_types": [ "ai", "apk", "appleplist", "asm", "asp", "batch", "bmp", "bzip", "c", "cab", "cat", "chm", "coff", "cpl", "crx", "cs", "css", "csv", "deb", "dex", "dll", "dmg", "doc", "docx", "dylib", "elf", "emf", "eml", "epub", "exe", "flac", "gif", "go", "gzip", "hlp", "html", "ico", "ini", "internetshortcut", "iso", "jar", "java", "javabytecode", "javascript", "jpeg", "json", "ko", "latex", "lisp", "lnk", "m3u", "macho", "makefile", "markdown", "mht", "mp3", "mp4", "mscompress", "msi", "mui", "mum", "ocx", "odex", "odp", "ods", "odt", "ogg", "outlook", "pcap", "pdf", "pem", "perl", "php", "png", "postscript", "powershell", "ppt", "pptx", "python", "pythonbytecode", "randombytes", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scr", "sevenzip", "shell", "smali", "so", "sql", "squashfs", "svg", "swf", "symlinktext", "sys", "tar", "tga", "tiff", "torrent", "ttf", "txt", "vba", "wav", "webm", "webp", "winregistry", "wmf", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "zip", "zlibstream" ], "content_types_to_ids": { "ai": 0, "apk": 1, "appleplist": 2, "asm": 3, "asp": 4, "batch": 5, "bmp": 6, "bzip": 7, "c": 8, "cab": 9, "cat": 10, "chm": 11, "coff": 12, "cpl": 13, "crx": 14, "cs": 15, "css": 16, "csv": 17, "deb": 18, "dex": 19, "dll": 20, "dmg": 21, "doc": 22, "docx": 23, "dylib": 24, "elf": 25, "emf": 26, "eml": 27, "epub": 28, "exe": 29, "flac": 30, "gif": 31, "go": 32, "gzip": 33, "hlp": 34, "html": 35, "ico": 36, "ini": 37, "internetshortcut": 38, "iso": 39, "jar": 40, "java": 41, "javabytecode": 42, "javascript": 43, "jpeg": 44, "json": 45, "ko": 46, "latex": 47, "lisp": 48, "lnk": 49, "m3u": 50, "macho": 51, "makefile": 52, "markdown": 53, "mht": 54, "mp3": 55, "mp4": 56, "mscompress": 57, "msi": 58, "mui": 59, "mum": 60, "ocx": 61, "odex": 62, "odp": 63, "ods": 64, "odt": 65, "ogg": 66, "outlook": 67, "pcap": 68, "pdf": 69, "pem": 70, "perl": 71, "php": 72, "png": 73, "postscript": 74, "powershell": 75, "ppt": 76, "pptx": 77, "python": 78, "pythonbytecode": 79, "randombytes": 80, "rar": 81, "rdf": 82, "rpm": 83, "rst": 84, "rtf": 85, "ruby": 86, "rust": 87, "scala": 88, "scr": 89, "sevenzip": 90, "shell": 91, "smali": 92, "so": 93, "sql": 94, "squashfs": 95, "svg": 96, "swf": 97, "symlinktext": 98, "sys": 99, "tar": 100, "tga": 101, "tiff": 102, "torrent": 103, "ttf": 104, "txt": 105, "vba": 106, "wav": 107, "webm": 108, "webp": 109, "winregistry": 110, "wmf": 111, "xar": 112, "xls": 113, "xlsb": 114, "xlsx": 115, "xml": 116, "xpi": 117, "xz": 118, "yaml": 119, "zip": 120, "zlibstream": 121 }, "content_types_num": 122, "target_labels": [ "ai", "apk", "appleplist", "asm", "asp", "batch", "bmp", "bzip", "c", "cab", "cat", "chm", "coff", "pebin", "crx", "cs", "css", "csv", "deb", "dex", "pebin", "dmg", "doc", "docx", "macho", "elf", "emf", "eml", "epub", "pebin", "flac", "gif", "go", "gzip", "hlp", "html", "ico", "ini", "internetshortcut", "iso", "jar", "java", "javabytecode", "javascript", "jpeg", "json", "elf", "latex", "lisp", "lnk", "m3u", "macho", "makefile", "markdown", "mht", "mp3", "mp4", "mscompress", "msi", "pebin", "mum", "pebin", "odex", "odp", "ods", "odt", "ogg", "outlook", "pcap", "pdf", "pem", "perl", "php", "png", "postscript", "powershell", "ppt", "pptx", "python", "pythonbytecode", "unknown", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "pebin", "sevenzip", "shell", "smali", "elf", "sql", "squashfs", "svg", "swf", "symlinktext", "pebin", "tar", "tga", "tiff", "torrent", "ttf", "txt", "vba", "wav", "webm", "webp", "winregistry", "wmf", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "zip", "zlibstream" ], "target_labels_space": [ "ai", "apk", "appleplist", "asm", "asp", "batch", "bmp", "bzip", "c", "cab", "cat", "chm", "coff", "crx", "cs", "css", "csv", "deb", "dex", "dmg", "doc", "docx", "elf", "emf", "eml", "epub", "flac", "gif", "go", "gzip", "hlp", "html", "ico", "ini", "internetshortcut", "iso", "jar", "java", "javabytecode", "javascript", "jpeg", "json", "latex", "lisp", "lnk", "m3u", "macho", "makefile", "markdown", "mht", "mp3", "mp4", "mscompress", "msi", "mum", "odex", "odp", "ods", "odt", "ogg", "outlook", "pcap", "pdf", "pebin", "pem", "perl", "php", "png", "postscript", "powershell", "ppt", "pptx", "python", "pythonbytecode", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "sevenzip", "shell", "smali", "sql", "squashfs", "svg", "swf", "symlinktext", "tar", "tga", "tiff", "torrent", "ttf", "txt", "unknown", "vba", "wav", "webm", "webp", "winregistry", "wmf", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "zip", "zlibstream" ], "target_labels_to_ids": { "ai": 0, "apk": 1, "appleplist": 2, "asm": 3, "asp": 4, "batch": 5, "bmp": 6, "bzip": 7, "c": 8, "cab": 9, "cat": 10, "chm": 11, "coff": 12, "crx": 13, "cs": 14, "css": 15, "csv": 16, "deb": 17, "dex": 18, "dmg": 19, "doc": 20, "docx": 21, "elf": 22, "emf": 23, "eml": 24, "epub": 25, "flac": 26, "gif": 27, "go": 28, "gzip": 29, "hlp": 30, "html": 31, "ico": 32, "ini": 33, "internetshortcut": 34, "iso": 35, "jar": 36, "java": 37, "javabytecode": 38, "javascript": 39, "jpeg": 40, "json": 41, "latex": 42, "lisp": 43, "lnk": 44, "m3u": 45, "macho": 46, "makefile": 47, "markdown": 48, "mht": 49, "mp3": 50, "mp4": 51, "mscompress": 52, "msi": 53, "mum": 54, "odex": 55, "odp": 56, "ods": 57, "odt": 58, "ogg": 59, "outlook": 60, "pcap": 61, "pdf": 62, "pebin": 63, "pem": 64, "perl": 65, "php": 66, "png": 67, "postscript": 68, "powershell": 69, "ppt": 70, "pptx": 71, "python": 72, "pythonbytecode": 73, "rar": 74, "rdf": 75, "rpm": 76, "rst": 77, "rtf": 78, "ruby": 79, "rust": 80, "scala": 81, "sevenzip": 82, "shell": 83, "smali": 84, "sql": 85, "squashfs": 86, "svg": 87, "swf": 88, "symlinktext": 89, "tar": 90, "tga": 91, "tiff": 92, "torrent": 93, "ttf": 94, "txt": 95, "unknown": 96, "vba": 97, "wav": 98, "webm": 99, "webp": 100, "winregistry": 101, "wmf": 102, "xar": 103, "xls": 104, "xlsb": 105, "xlsx": 106, "xml": 107, "xpi": 108, "xz": 109, "yaml": 110, "zip": 111, "zlibstream": 112 }, "target_labels_num": 113, "content_types_to_target_labels_ids": { "ai": 0, "apk": 1, "appleplist": 2, "asm": 3, "asp": 4, "batch": 5, "bmp": 6, "bzip": 7, "c": 8, "cab": 9, "cat": 10, "chm": 11, "coff": 12, "cpl": 63, "crx": 13, "cs": 14, "css": 15, "csv": 16, "deb": 17, "dex": 18, "dll": 63, "dmg": 19, "doc": 20, "docx": 21, "dylib": 46, "elf": 22, "emf": 23, "eml": 24, "epub": 25, "exe": 63, "flac": 26, "gif": 27, "go": 28, "gzip": 29, "hlp": 30, "html": 31, "ico": 32, "ini": 33, "internetshortcut": 34, "iso": 35, "jar": 36, "java": 37, "javabytecode": 38, "javascript": 39, "jpeg": 40, "json": 41, "ko": 22, "latex": 42, "lisp": 43, "lnk": 44, "m3u": 45, "macho": 46, "makefile": 47, "markdown": 48, "mht": 49, "mp3": 50, "mp4": 51, "mscompress": 52, "msi": 53, "mui": 63, "mum": 54, "ocx": 63, "odex": 55, "odp": 56, "ods": 57, "odt": 58, "ogg": 59, "outlook": 60, "pcap": 61, "pdf": 62, "pem": 64, "perl": 65, "php": 66, "png": 67, "postscript": 68, "powershell": 69, "ppt": 70, "pptx": 71, "python": 72, "pythonbytecode": 73, "randombytes": 96, "rar": 74, "rdf": 75, "rpm": 76, "rst": 77, "rtf": 78, "ruby": 79, "rust": 80, "scala": 81, "scr": 63, "sevenzip": 82, "shell": 83, "smali": 84, "so": 22, "sql": 85, "squashfs": 86, "svg": 87, "swf": 88, "symlinktext": 89, "sys": 63, "tar": 90, "tga": 91, "tiff": 92, "torrent": 93, "ttf": 94, "txt": 95, "vba": 97, "wav": 98, "webm": 99, "webp": 100, "winregistry": 101, "wmf": 102, "xar": 103, "xls": 104, "xlsb": 105, "xlsx": 106, "xml": 107, "xpi": 108, "xz": 109, "yaml": 110, "zip": 111, "zlibstream": 112 } } }, "test_dataset_info": { "split_name": "test", "ds_query": "all", "ct_query": "all", "batch_size": 256, "shuffle": 0, "block_length": 1000, "repeat_and_randomize": false, "include_sha256s": false, "input_sizes": { "beg": 512, "mid": 512, "end": 512 }, "max_samples_num_per_ds_per_ct": 10000, "dataset_format": "int-concat/one-hot", "cutmix_t": null, "target_labels_spec": "target-label", "total_samples_num_upper_bound": 1290000, "target_labels_info": { "target_labels_spec": "target-label", "content_types": [ "ai", "apk", "appleplist", "asm", "asp", "batch", "bmp", "bzip", "c", "cab", "cat", "chm", "coff", "cpl", "crx", "cs", "css", "csv", "deb", "dex", "dll", "dmg", "doc", "docx", "dylib", "elf", "emf", "eml", "epub", "exe", "flac", "gif", "go", "gzip", "hlp", "html", "ico", "ini", "internetshortcut", "iso", "jar", "java", "javabytecode", "javascript", "jpeg", "json", "ko", "latex", "lisp", "lnk", "m3u", "macho", "makefile", "markdown", "mht", "mp3", "mp4", "mscompress", "msi", "mui", "mum", "ocx", "odex", "odp", "ods", "odt", "ogg", "outlook", "pcap", "pdf", "pem", "perl", "php", "png", "postscript", "powershell", "ppt", "pptx", "python", "pythonbytecode", "randombytes", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scr", "sevenzip", "shell", "smali", "so", "sql", "squashfs", "svg", "swf", "symlinktext", "sys", "tar", "tga", "tiff", "torrent", "ttf", "txt", "vba", "wav", "webm", "webp", "winregistry", "wmf", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "zip", "zlibstream" ], "content_types_to_ids": { "ai": 0, "apk": 1, "appleplist": 2, "asm": 3, "asp": 4, "batch": 5, "bmp": 6, "bzip": 7, "c": 8, "cab": 9, "cat": 10, "chm": 11, "coff": 12, "cpl": 13, "crx": 14, "cs": 15, "css": 16, "csv": 17, "deb": 18, "dex": 19, "dll": 20, "dmg": 21, "doc": 22, "docx": 23, "dylib": 24, "elf": 25, "emf": 26, "eml": 27, "epub": 28, "exe": 29, "flac": 30, "gif": 31, "go": 32, "gzip": 33, "hlp": 34, "html": 35, "ico": 36, "ini": 37, "internetshortcut": 38, "iso": 39, "jar": 40, "java": 41, "javabytecode": 42, "javascript": 43, "jpeg": 44, "json": 45, "ko": 46, "latex": 47, "lisp": 48, "lnk": 49, "m3u": 50, "macho": 51, "makefile": 52, "markdown": 53, "mht": 54, "mp3": 55, "mp4": 56, "mscompress": 57, "msi": 58, "mui": 59, "mum": 60, "ocx": 61, "odex": 62, "odp": 63, "ods": 64, "odt": 65, "ogg": 66, "outlook": 67, "pcap": 68, "pdf": 69, "pem": 70, "perl": 71, "php": 72, "png": 73, "postscript": 74, "powershell": 75, "ppt": 76, "pptx": 77, "python": 78, "pythonbytecode": 79, "randombytes": 80, "rar": 81, "rdf": 82, "rpm": 83, "rst": 84, "rtf": 85, "ruby": 86, "rust": 87, "scala": 88, "scr": 89, "sevenzip": 90, "shell": 91, "smali": 92, "so": 93, "sql": 94, "squashfs": 95, "svg": 96, "swf": 97, "symlinktext": 98, "sys": 99, "tar": 100, "tga": 101, "tiff": 102, "torrent": 103, "ttf": 104, "txt": 105, "vba": 106, "wav": 107, "webm": 108, "webp": 109, "winregistry": 110, "wmf": 111, "xar": 112, "xls": 113, "xlsb": 114, "xlsx": 115, "xml": 116, "xpi": 117, "xz": 118, "yaml": 119, "zip": 120, "zlibstream": 121 }, "content_types_num": 122, "target_labels": [ "ai", "apk", "appleplist", "asm", "asp", "batch", "bmp", "bzip", "c", "cab", "cat", "chm", "coff", "pebin", "crx", "cs", "css", "csv", "deb", "dex", "pebin", "dmg", "doc", "docx", "macho", "elf", "emf", "eml", "epub", "pebin", "flac", "gif", "go", "gzip", "hlp", "html", "ico", "ini", "internetshortcut", "iso", "jar", "java", "javabytecode", "javascript", "jpeg", "json", "elf", "latex", "lisp", "lnk", "m3u", "macho", "makefile", "markdown", "mht", "mp3", "mp4", "mscompress", "msi", "pebin", "mum", "pebin", "odex", "odp", "ods", "odt", "ogg", "outlook", "pcap", "pdf", "pem", "perl", "php", "png", "postscript", "powershell", "ppt", "pptx", "python", "pythonbytecode", "unknown", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "pebin", "sevenzip", "shell", "smali", "elf", "sql", "squashfs", "svg", "swf", "symlinktext", "pebin", "tar", "tga", "tiff", "torrent", "ttf", "txt", "vba", "wav", "webm", "webp", "winregistry", "wmf", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "zip", "zlibstream" ], "target_labels_space": [ "ai", "apk", "appleplist", "asm", "asp", "batch", "bmp", "bzip", "c", "cab", "cat", "chm", "coff", "crx", "cs", "css", "csv", "deb", "dex", "dmg", "doc", "docx", "elf", "emf", "eml", "epub", "flac", "gif", "go", "gzip", "hlp", "html", "ico", "ini", "internetshortcut", "iso", "jar", "java", "javabytecode", "javascript", "jpeg", "json", "latex", "lisp", "lnk", "m3u", "macho", "makefile", "markdown", "mht", "mp3", "mp4", "mscompress", "msi", "mum", "odex", "odp", "ods", "odt", "ogg", "outlook", "pcap", "pdf", "pebin", "pem", "perl", "php", "png", "postscript", "powershell", "ppt", "pptx", "python", "pythonbytecode", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "sevenzip", "shell", "smali", "sql", "squashfs", "svg", "swf", "symlinktext", "tar", "tga", "tiff", "torrent", "ttf", "txt", "unknown", "vba", "wav", "webm", "webp", "winregistry", "wmf", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "zip", "zlibstream" ], "target_labels_to_ids": { "ai": 0, "apk": 1, "appleplist": 2, "asm": 3, "asp": 4, "batch": 5, "bmp": 6, "bzip": 7, "c": 8, "cab": 9, "cat": 10, "chm": 11, "coff": 12, "crx": 13, "cs": 14, "css": 15, "csv": 16, "deb": 17, "dex": 18, "dmg": 19, "doc": 20, "docx": 21, "elf": 22, "emf": 23, "eml": 24, "epub": 25, "flac": 26, "gif": 27, "go": 28, "gzip": 29, "hlp": 30, "html": 31, "ico": 32, "ini": 33, "internetshortcut": 34, "iso": 35, "jar": 36, "java": 37, "javabytecode": 38, "javascript": 39, "jpeg": 40, "json": 41, "latex": 42, "lisp": 43, "lnk": 44, "m3u": 45, "macho": 46, "makefile": 47, "markdown": 48, "mht": 49, "mp3": 50, "mp4": 51, "mscompress": 52, "msi": 53, "mum": 54, "odex": 55, "odp": 56, "ods": 57, "odt": 58, "ogg": 59, "outlook": 60, "pcap": 61, "pdf": 62, "pebin": 63, "pem": 64, "perl": 65, "php": 66, "png": 67, "postscript": 68, "powershell": 69, "ppt": 70, "pptx": 71, "python": 72, "pythonbytecode": 73, "rar": 74, "rdf": 75, "rpm": 76, "rst": 77, "rtf": 78, "ruby": 79, "rust": 80, "scala": 81, "sevenzip": 82, "shell": 83, "smali": 84, "sql": 85, "squashfs": 86, "svg": 87, "swf": 88, "symlinktext": 89, "tar": 90, "tga": 91, "tiff": 92, "torrent": 93, "ttf": 94, "txt": 95, "unknown": 96, "vba": 97, "wav": 98, "webm": 99, "webp": 100, "winregistry": 101, "wmf": 102, "xar": 103, "xls": 104, "xlsb": 105, "xlsx": 106, "xml": 107, "xpi": 108, "xz": 109, "yaml": 110, "zip": 111, "zlibstream": 112 }, "target_labels_num": 113, "content_types_to_target_labels_ids": { "ai": 0, "apk": 1, "appleplist": 2, "asm": 3, "asp": 4, "batch": 5, "bmp": 6, "bzip": 7, "c": 8, "cab": 9, "cat": 10, "chm": 11, "coff": 12, "cpl": 63, "crx": 13, "cs": 14, "css": 15, "csv": 16, "deb": 17, "dex": 18, "dll": 63, "dmg": 19, "doc": 20, "docx": 21, "dylib": 46, "elf": 22, "emf": 23, "eml": 24, "epub": 25, "exe": 63, "flac": 26, "gif": 27, "go": 28, "gzip": 29, "hlp": 30, "html": 31, "ico": 32, "ini": 33, "internetshortcut": 34, "iso": 35, "jar": 36, "java": 37, "javabytecode": 38, "javascript": 39, "jpeg": 40, "json": 41, "ko": 22, "latex": 42, "lisp": 43, "lnk": 44, "m3u": 45, "macho": 46, "makefile": 47, "markdown": 48, "mht": 49, "mp3": 50, "mp4": 51, "mscompress": 52, "msi": 53, "mui": 63, "mum": 54, "ocx": 63, "odex": 55, "odp": 56, "ods": 57, "odt": 58, "ogg": 59, "outlook": 60, "pcap": 61, "pdf": 62, "pem": 64, "perl": 65, "php": 66, "png": 67, "postscript": 68, "powershell": 69, "ppt": 70, "pptx": 71, "python": 72, "pythonbytecode": 73, "randombytes": 96, "rar": 74, "rdf": 75, "rpm": 76, "rst": 77, "rtf": 78, "ruby": 79, "rust": 80, "scala": 81, "scr": 63, "sevenzip": 82, "shell": 83, "smali": 84, "so": 22, "sql": 85, "squashfs": 86, "svg": 87, "swf": 88, "symlinktext": 89, "sys": 63, "tar": 90, "tga": 91, "tiff": 92, "torrent": 93, "ttf": 94, "txt": 95, "vba": 97, "wav": 98, "webm": 99, "webp": 100, "winregistry": 101, "wmf": 102, "xar": 103, "xls": 104, "xlsb": 105, "xlsx": 106, "xml": 107, "xpi": 108, "xz": 109, "yaml": 110, "zip": 111, "zlibstream": 112 } } }, "host_id": "zrh-rtx4090x1-d4", "git_last_commit": "a67d2c64cfbfc0e03898901899947a16b87b4dac" } ================================================ FILE: assets/models/standard_v1/thresholds.json ================================================ { "metadata": { "min_precision": 0.995, "min_recall": 0.99 }, "thresholds": { "ai": 0.95, "apk": 0.95, "appleplist": 0.95, "asm": 0.85, "asp": 0.5, "batch": 0.95, "bmp": 0.95, "bzip": 0.95, "c": 0.7, "cab": 0.95, "cat": 0.95, "chm": 0.95, "coff": 0.95, "crx": 0.95, "cs": 0.95, "css": 0.5, "csv": 0.85, "deb": 0.95, "dex": 0.95, "dmg": 0.95, "doc": 0.5, "docx": 0.91, "elf": 0.95, "emf": 0.95, "eml": 0.95, "epub": 0.95, "flac": 0.95, "gif": 0.95, "go": 0.95, "gzip": 0.95, "hlp": 0.95, "html": 0.95, "ico": 0.95, "ini": 0.85, "internetshortcut": 0.95, "iso": 0.95, "jar": 0.95, "java": 0.91, "javabytecode": 0.95, "javascript": 0.8, "jpeg": 0.95, "json": 0.75, "latex": 0.5, "lisp": 0.95, "lnk": 0.95, "m3u": 0.95, "macho": 0.95, "makefile": 0.95, "markdown": 0.95, "mht": 0.95, "mp3": 0.95, "mp4": 0.95, "mscompress": 0.95, "msi": 0.7, "mum": 0.95, "odex": 0.95, "odp": 0.95, "ods": 0.95, "odt": 0.95, "ogg": 0.95, "outlook": 0.95, "pcap": 0.95, "pdf": 0.95, "pebin": 0.95, "pem": 0.95, "perl": 0.85, "php": 0.95, "png": 0.95, "postscript": 0.95, "powershell": 0.95, "ppt": 0.95, "pptx": 0.95, "python": 0.85, "pythonbytecode": 0.95, "rar": 0.95, "rdf": 0.95, "rpm": 0.95, "rst": 0.95, "rtf": 0.95, "ruby": 0.93, "rust": 0.95, "scala": 0.95, "sevenzip": 0.95, "shell": 0.95, "smali": 0.95, "sql": 0.7, "squashfs": 0.95, "svg": 0.95, "swf": 0.95, "symlinktext": 0.95, "tar": 0.95, "tga": 0.95, "tiff": 0.95, "torrent": 0.95, "ttf": 0.95, "txt": 0.95, "unknown": 0.95, "vba": 0.5, "wav": 0.95, "webm": 0.95, "webp": 0.95, "winregistry": 0.95, "wmf": 0.95, "xar": 0.95, "xls": 0.8, "xlsb": 0.55, "xlsx": 0.95, "xml": 0.9, "xpi": 0.93, "xz": 0.95, "yaml": 0.75, "zip": 0.95, "zlibstream": 0.95 } } ================================================ FILE: assets/models/standard_v2_0/README.md ================================================ # Content types supported by model "standard_v2_0" | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | dm | Dream Maker | | 40 | dmg | Apple disk image | | 41 | doc | Microsoft Word CDF document | | 42 | dockerfile | Dockerfile | | 43 | docx | Microsoft Word 2007+ document | | 44 | dsstore | Application Desktop Services Store | | 45 | dwg | Autocad Drawing | | 46 | dxf | Audocad Drawing Exchange Format | | 47 | elf | ELF executable | | 48 | elixir | Elixir script | | 49 | emf | Windows Enhanced Metafile image data | | 50 | eml | RFC 822 mail | | 51 | epub | EPUB document | | 52 | erb | Embedded Ruby source | | 53 | erlang | Erlang source | | 54 | flac | FLAC audio bitstream data | | 55 | flv | Flash Video | | 56 | fortran | Fortran | | 57 | gemfile | Gemfile file | | 58 | gemspec | Gemspec file | | 59 | gif | GIF image data | | 60 | gitattributes | Gitattributes file | | 61 | gitmodules | Gitmodules file | | 62 | go | Golang source | | 63 | gradle | Gradle source | | 64 | groovy | Groovy source | | 65 | gzip | gzip compressed data | | 66 | h5 | Hierarchical Data Format v5 | | 67 | handlebars | Handlebars source | | 68 | haskell | Haskell source | | 69 | hcl | HashiCorp configuration language | | 70 | hlp | MS Windows help | | 71 | htaccess | Apache access configuration | | 72 | html | HTML document | | 73 | icns | Mac OS X icon | | 74 | ico | MS Windows icon resource | | 75 | ics | Internet Calendaring and Scheduling | | 76 | ignorefile | Ignorefile | | 77 | ini | INI configuration file | | 78 | internetshortcut | MS Windows Internet shortcut | | 79 | ipynb | Jupyter notebook | | 80 | iso | ISO 9660 CD-ROM filesystem data | | 81 | jar | Java archive data (JAR) | | 82 | java | Java source | | 83 | javabytecode | Java compiled bytecode | | 84 | javascript | JavaScript source | | 85 | jinja | Jinja template | | 86 | jp2 | jpeg2000 | | 87 | jpeg | JPEG image data | | 88 | json | JSON document | | 89 | jsonl | JSONL document | | 90 | julia | Julia source | | 91 | kotlin | Kotlin source | | 92 | latex | LaTeX document | | 93 | lha | LHarc archive | | 94 | lisp | Lisp source | | 95 | lnk | MS Windows shortcut | | 96 | lua | Lua | | 97 | m3u | M3U playlist | | 98 | m4 | GNU Macro | | 99 | macho | Mach-O executable | | 100 | makefile | Makefile source | | 101 | markdown | Markdown document | | 102 | matlab | Matlab Source | | 103 | mht | MHTML document | | 104 | midi | Midi | | 105 | mkv | Matroska | | 106 | mp3 | MP3 media file | | 107 | mp4 | MP4 media file | | 108 | mscompress | MS Compress archive data | | 109 | msi | Microsoft Installer file | | 110 | mum | Windows Update Package file | | 111 | npy | Numpy Array | | 112 | npz | Numpy Arrays Archive | | 113 | nupkg | NuGet Package | | 114 | objectivec | ObjectiveC source | | 115 | ocaml | OCaml | | 116 | odp | OpenDocument Presentation | | 117 | ods | OpenDocument Spreadsheet | | 118 | odt | OpenDocument Text | | 119 | ogg | Ogg data | | 120 | one | One Note | | 121 | onnx | Open Neural Network Exchange | | 122 | otf | OpenType font | | 123 | outlook | MS Outlook Message | | 124 | parquet | Apache Parquet | | 125 | pascal | Pascal source | | 126 | pcap | pcap capture file | | 127 | pdb | Windows Program Database | | 128 | pdf | PDF document | | 129 | pebin | PE Windows executable | | 130 | pem | PEM certificate | | 131 | perl | Perl source | | 132 | php | PHP source | | 133 | pickle | Python pickle | | 134 | png | PNG image | | 135 | po | Portable Object (PO) for i18n | | 136 | postscript | PostScript document | | 137 | powershell | Powershell source | | 138 | ppt | Microsoft PowerPoint CDF document | | 139 | pptx | Microsoft PowerPoint 2007+ document | | 140 | prolog | Prolog source | | 141 | proteindb | Protein DB | | 142 | proto | Protocol buffer definition | | 143 | psd | Adobe Photoshop | | 144 | python | Python source | | 145 | pythonbytecode | Python compiled bytecode | | 146 | qt | QuickTime | | 147 | r | R (language) | | 148 | rar | RAR archive data | | 149 | rdf | Resource Description Framework document (RDF) | | 150 | rpm | RedHat Package Manager archive (RPM) | | 151 | rst | ReStructuredText document | | 152 | rtf | Rich Text Format document | | 153 | ruby | Ruby source | | 154 | rust | Rust source | | 155 | scala | Scala source | | 156 | scss | SCSS source | | 157 | sevenzip | 7-zip archive data | | 158 | sgml | sgml | | 159 | shell | Shell script | | 160 | smali | Smali source | | 161 | snap | Snap archive | | 162 | solidity | Solidity source | | 163 | sql | SQL source | | 164 | sqlite | SQLITE database | | 165 | squashfs | Squash filesystem | | 166 | srt | SubRip Text Format | | 167 | stlbinary | Stereolithography CAD (binary) | | 168 | stltext | Stereolithography CAD (text) | | 169 | sum | Checksum file | | 170 | svg | SVG Scalable Vector Graphics image data | | 171 | swf | Small Web File | | 172 | swift | Swift | | 173 | tar | POSIX tar archive | | 174 | tcl | Tickle | | 175 | textproto | Text protocol buffer | | 176 | tga | Targa image data | | 177 | thumbsdb | Windows thumbnail cache | | 178 | tiff | TIFF image data | | 179 | toml | Tom's obvious, minimal language | | 180 | torrent | BitTorrent file | | 181 | tsv | TSV document | | 182 | ttf | TrueType Font data | | 183 | twig | Twig template | | 184 | txt | Generic text document | | 185 | typescript | Typescript | | 186 | unknown | Unknown binary data | | 187 | vba | MS Visual Basic source (VBA) | | 188 | vcxproj | Visual Studio MSBuild project | | 189 | verilog | Verilog source | | 190 | vhdl | VHDL source | | 191 | vtt | Web Video Text Tracks | | 192 | vue | Vue source | | 193 | wasm | Web Assembly | | 194 | wav | Waveform Audio file (WAV) | | 195 | webm | WebM media file | | 196 | webp | WebP media file | | 197 | winregistry | Windows Registry text | | 198 | wmf | Windows metafile | | 199 | woff | Web Open Font Format | | 200 | woff2 | Web Open Font Format v2 | | 201 | xar | XAR archive compressed data | | 202 | xls | Microsoft Excel CDF document | | 203 | xlsb | Microsoft Excel 2007+ document (binary format) | | 204 | xlsx | Microsoft Excel 2007+ document | | 205 | xml | XML document | | 206 | xpi | Compressed installation archive (XPI) | | 207 | xz | XZ compressed data | | 208 | yaml | YAML source | | 209 | yara | YARA rule | | 210 | zig | Zig source | | 211 | zip | Zip archive data | | 212 | zlibstream | zlib compressed data | ================================================ FILE: assets/models/standard_v2_0/config.min.json ================================================ {"beg_size": 2048, "mid_size": 0, "end_size": 2048, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {"latex": 0.95}, "overwrite_map": {}} ================================================ FILE: assets/models/standard_v2_0/metadata.json ================================================ {"model_name_hash": "5ae665b58305628b173e97edf9d3043a6021dd40a434b1f72bd88087713d8209", "namespace_hash": "c3a09b7885a7151502927c0380d2bf9a19c2999e54e8ffd282b21ad7b2aeec62"} ================================================ FILE: assets/models/standard_v2_1/README.md ================================================ # Content types supported by model "standard_v2_1" | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | dm | Dream Maker | | 40 | dmg | Apple disk image | | 41 | doc | Microsoft Word CDF document | | 42 | dockerfile | Dockerfile | | 43 | docx | Microsoft Word 2007+ document | | 44 | dsstore | Application Desktop Services Store | | 45 | dwg | Autocad Drawing | | 46 | dxf | Audocad Drawing Exchange Format | | 47 | elf | ELF executable | | 48 | elixir | Elixir script | | 49 | emf | Windows Enhanced Metafile image data | | 50 | eml | RFC 822 mail | | 51 | epub | EPUB document | | 52 | erb | Embedded Ruby source | | 53 | erlang | Erlang source | | 54 | flac | FLAC audio bitstream data | | 55 | flv | Flash Video | | 56 | fortran | Fortran | | 57 | gemfile | Gemfile file | | 58 | gemspec | Gemspec file | | 59 | gif | GIF image data | | 60 | gitattributes | Gitattributes file | | 61 | gitmodules | Gitmodules file | | 62 | go | Golang source | | 63 | gradle | Gradle source | | 64 | groovy | Groovy source | | 65 | gzip | gzip compressed data | | 66 | h5 | Hierarchical Data Format v5 | | 67 | handlebars | Handlebars source | | 68 | haskell | Haskell source | | 69 | hcl | HashiCorp configuration language | | 70 | hlp | MS Windows help | | 71 | htaccess | Apache access configuration | | 72 | html | HTML document | | 73 | icns | Mac OS X icon | | 74 | ico | MS Windows icon resource | | 75 | ics | Internet Calendaring and Scheduling | | 76 | ignorefile | Ignorefile | | 77 | ini | INI configuration file | | 78 | internetshortcut | MS Windows Internet shortcut | | 79 | ipynb | Jupyter notebook | | 80 | iso | ISO 9660 CD-ROM filesystem data | | 81 | jar | Java archive data (JAR) | | 82 | java | Java source | | 83 | javabytecode | Java compiled bytecode | | 84 | javascript | JavaScript source | | 85 | jinja | Jinja template | | 86 | jp2 | jpeg2000 | | 87 | jpeg | JPEG image data | | 88 | json | JSON document | | 89 | jsonl | JSONL document | | 90 | julia | Julia source | | 91 | kotlin | Kotlin source | | 92 | latex | LaTeX document | | 93 | lha | LHarc archive | | 94 | lisp | Lisp source | | 95 | lnk | MS Windows shortcut | | 96 | lua | Lua | | 97 | m3u | M3U playlist | | 98 | m4 | GNU Macro | | 99 | macho | Mach-O executable | | 100 | makefile | Makefile source | | 101 | markdown | Markdown document | | 102 | matlab | Matlab Source | | 103 | mht | MHTML document | | 104 | midi | Midi | | 105 | mkv | Matroska | | 106 | mp3 | MP3 media file | | 107 | mp4 | MP4 media file | | 108 | mscompress | MS Compress archive data | | 109 | msi | Microsoft Installer file | | 110 | mum | Windows Update Package file | | 111 | npy | Numpy Array | | 112 | npz | Numpy Arrays Archive | | 113 | nupkg | NuGet Package | | 114 | objectivec | ObjectiveC source | | 115 | ocaml | OCaml | | 116 | odp | OpenDocument Presentation | | 117 | ods | OpenDocument Spreadsheet | | 118 | odt | OpenDocument Text | | 119 | ogg | Ogg data | | 120 | one | One Note | | 121 | onnx | Open Neural Network Exchange | | 122 | otf | OpenType font | | 123 | outlook | MS Outlook Message | | 124 | parquet | Apache Parquet | | 125 | pascal | Pascal source | | 126 | pcap | pcap capture file | | 127 | pdb | Windows Program Database | | 128 | pdf | PDF document | | 129 | pebin | PE Windows executable | | 130 | pem | PEM certificate | | 131 | perl | Perl source | | 132 | php | PHP source | | 133 | pickle | Python pickle | | 134 | png | PNG image | | 135 | po | Portable Object (PO) for i18n | | 136 | postscript | PostScript document | | 137 | powershell | Powershell source | | 138 | ppt | Microsoft PowerPoint CDF document | | 139 | pptx | Microsoft PowerPoint 2007+ document | | 140 | prolog | Prolog source | | 141 | proteindb | Protein DB | | 142 | proto | Protocol buffer definition | | 143 | psd | Adobe Photoshop | | 144 | python | Python source | | 145 | pythonbytecode | Python compiled bytecode | | 146 | pytorch | Pytorch storage file | | 147 | qt | QuickTime | | 148 | r | R (language) | | 149 | rar | RAR archive data | | 150 | rdf | Resource Description Framework document (RDF) | | 151 | rpm | RedHat Package Manager archive (RPM) | | 152 | rst | ReStructuredText document | | 153 | rtf | Rich Text Format document | | 154 | ruby | Ruby source | | 155 | rust | Rust source | | 156 | scala | Scala source | | 157 | scss | SCSS source | | 158 | sevenzip | 7-zip archive data | | 159 | sgml | sgml | | 160 | shell | Shell script | | 161 | smali | Smali source | | 162 | snap | Snap archive | | 163 | solidity | Solidity source | | 164 | sql | SQL source | | 165 | sqlite | SQLITE database | | 166 | squashfs | Squash filesystem | | 167 | srt | SubRip Text Format | | 168 | stlbinary | Stereolithography CAD (binary) | | 169 | stltext | Stereolithography CAD (text) | | 170 | sum | Checksum file | | 171 | svg | SVG Scalable Vector Graphics image data | | 172 | swf | Small Web File | | 173 | swift | Swift | | 174 | tar | POSIX tar archive | | 175 | tcl | Tickle | | 176 | textproto | Text protocol buffer | | 177 | tga | Targa image data | | 178 | thumbsdb | Windows thumbnail cache | | 179 | tiff | TIFF image data | | 180 | toml | Tom's obvious, minimal language | | 181 | torrent | BitTorrent file | | 182 | tsv | TSV document | | 183 | ttf | TrueType Font data | | 184 | twig | Twig template | | 185 | txt | Generic text document | | 186 | typescript | Typescript | | 187 | unknown | Unknown binary data | | 188 | vba | MS Visual Basic source (VBA) | | 189 | vcxproj | Visual Studio MSBuild project | | 190 | verilog | Verilog source | | 191 | vhdl | VHDL source | | 192 | vtt | Web Video Text Tracks | | 193 | vue | Vue source | | 194 | wasm | Web Assembly | | 195 | wav | Waveform Audio file (WAV) | | 196 | webm | WebM media file | | 197 | webp | WebP media file | | 198 | winregistry | Windows Registry text | | 199 | wmf | Windows metafile | | 200 | woff | Web Open Font Format | | 201 | woff2 | Web Open Font Format v2 | | 202 | xar | XAR archive compressed data | | 203 | xls | Microsoft Excel CDF document | | 204 | xlsb | Microsoft Excel 2007+ document (binary format) | | 205 | xlsx | Microsoft Excel 2007+ document | | 206 | xml | XML document | | 207 | xpi | Compressed installation archive (XPI) | | 208 | xz | XZ compressed data | | 209 | yaml | YAML source | | 210 | yara | YARA rule | | 211 | zig | Zig source | | 212 | zip | Zip archive data | | 213 | zlibstream | zlib compressed data | ================================================ FILE: assets/models/standard_v2_1/config.min.json ================================================ {"beg_size": 2048, "mid_size": 0, "end_size": 2048, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "pytorch", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {"latex": 0.95, "pascal": 0.95}, "overwrite_map": {}} ================================================ FILE: assets/models/standard_v2_1/metadata.json ================================================ {"model_name_hash": "5ae665b58305628b173e97edf9d3043a6021dd40a434b1f72bd88087713d8209", "namespace_hash": "ce3c9130af6416f40d71c5934f927acbd174f904a550fca2185aa3cd3528ca35"} ================================================ FILE: assets/models/standard_v3_0/README.md ================================================ # Model documentation ## Table of Contents 1. [List of possible outputs](#list-of-possible-outputs) 1. [List of possible model's outputs](#list-of-possible-models-outputs) ## List of possible outputs This is the full list of all possible tool's outputs (which are different than the possible raw output of the model, see table below). E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.output.label`. | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | directory | A directory | | 40 | dm | Dream Maker | | 41 | dmg | Apple disk image | | 42 | doc | Microsoft Word CDF document | | 43 | dockerfile | Dockerfile | | 44 | docx | Microsoft Word 2007+ document | | 45 | dsstore | Application Desktop Services Store | | 46 | dwg | Autocad Drawing | | 47 | dxf | Audocad Drawing Exchange Format | | 48 | elf | ELF executable | | 49 | elixir | Elixir script | | 50 | emf | Windows Enhanced Metafile image data | | 51 | eml | RFC 822 mail | | 52 | empty | Empty file | | 53 | epub | EPUB document | | 54 | erb | Embedded Ruby source | | 55 | erlang | Erlang source | | 56 | flac | FLAC audio bitstream data | | 57 | flv | Flash Video | | 58 | fortran | Fortran | | 59 | gemfile | Gemfile file | | 60 | gemspec | Gemspec file | | 61 | gif | GIF image data | | 62 | gitattributes | Gitattributes file | | 63 | gitmodules | Gitmodules file | | 64 | go | Golang source | | 65 | gradle | Gradle source | | 66 | groovy | Groovy source | | 67 | gzip | gzip compressed data | | 68 | h5 | Hierarchical Data Format v5 | | 69 | handlebars | Handlebars source | | 70 | haskell | Haskell source | | 71 | hcl | HashiCorp configuration language | | 72 | hlp | MS Windows help | | 73 | htaccess | Apache access configuration | | 74 | html | HTML document | | 75 | icns | Mac OS X icon | | 76 | ico | MS Windows icon resource | | 77 | ics | Internet Calendaring and Scheduling | | 78 | ignorefile | Ignorefile | | 79 | ini | INI configuration file | | 80 | internetshortcut | MS Windows Internet shortcut | | 81 | ipynb | Jupyter notebook | | 82 | iso | ISO 9660 CD-ROM filesystem data | | 83 | jar | Java archive data (JAR) | | 84 | java | Java source | | 85 | javabytecode | Java compiled bytecode | | 86 | javascript | JavaScript source | | 87 | jinja | Jinja template | | 88 | jp2 | jpeg2000 | | 89 | jpeg | JPEG image data | | 90 | json | JSON document | | 91 | jsonl | JSONL document | | 92 | julia | Julia source | | 93 | kotlin | Kotlin source | | 94 | latex | LaTeX document | | 95 | lha | LHarc archive | | 96 | lisp | Lisp source | | 97 | lnk | MS Windows shortcut | | 98 | lua | Lua | | 99 | m3u | M3U playlist | | 100 | m4 | GNU Macro | | 101 | macho | Mach-O executable | | 102 | makefile | Makefile source | | 103 | markdown | Markdown document | | 104 | matlab | Matlab Source | | 105 | mht | MHTML document | | 106 | midi | Midi | | 107 | mkv | Matroska | | 108 | mp3 | MP3 media file | | 109 | mp4 | MP4 media file | | 110 | mscompress | MS Compress archive data | | 111 | msi | Microsoft Installer file | | 112 | mum | Windows Update Package file | | 113 | npy | Numpy Array | | 114 | npz | Numpy Arrays Archive | | 115 | nupkg | NuGet Package | | 116 | objectivec | ObjectiveC source | | 117 | ocaml | OCaml | | 118 | odp | OpenDocument Presentation | | 119 | ods | OpenDocument Spreadsheet | | 120 | odt | OpenDocument Text | | 121 | ogg | Ogg data | | 122 | one | One Note | | 123 | onnx | Open Neural Network Exchange | | 124 | otf | OpenType font | | 125 | outlook | MS Outlook Message | | 126 | parquet | Apache Parquet | | 127 | pascal | Pascal source | | 128 | pcap | pcap capture file | | 129 | pdb | Windows Program Database | | 130 | pdf | PDF document | | 131 | pebin | PE Windows executable | | 132 | pem | PEM certificate | | 133 | perl | Perl source | | 134 | php | PHP source | | 135 | pickle | Python pickle | | 136 | png | PNG image | | 137 | po | Portable Object (PO) for i18n | | 138 | postscript | PostScript document | | 139 | powershell | Powershell source | | 140 | ppt | Microsoft PowerPoint CDF document | | 141 | pptx | Microsoft PowerPoint 2007+ document | | 142 | prolog | Prolog source | | 143 | proteindb | Protein DB | | 144 | proto | Protocol buffer definition | | 145 | psd | Adobe Photoshop | | 146 | python | Python source | | 147 | pythonbytecode | Python compiled bytecode | | 148 | pytorch | Pytorch storage file | | 149 | qt | QuickTime | | 150 | r | R (language) | | 151 | rar | RAR archive data | | 152 | rdf | Resource Description Framework document (RDF) | | 153 | rpm | RedHat Package Manager archive (RPM) | | 154 | rst | ReStructuredText document | | 155 | rtf | Rich Text Format document | | 156 | ruby | Ruby source | | 157 | rust | Rust source | | 158 | scala | Scala source | | 159 | scss | SCSS source | | 160 | sevenzip | 7-zip archive data | | 161 | sgml | sgml | | 162 | shell | Shell script | | 163 | smali | Smali source | | 164 | snap | Snap archive | | 165 | solidity | Solidity source | | 166 | sql | SQL source | | 167 | sqlite | SQLITE database | | 168 | squashfs | Squash filesystem | | 169 | srt | SubRip Text Format | | 170 | stlbinary | Stereolithography CAD (binary) | | 171 | stltext | Stereolithography CAD (text) | | 172 | sum | Checksum file | | 173 | svg | SVG Scalable Vector Graphics image data | | 174 | swf | Small Web File | | 175 | swift | Swift | | 176 | symlink | Symbolic link | | 177 | tar | POSIX tar archive | | 178 | tcl | Tickle | | 179 | textproto | Text protocol buffer | | 180 | tga | Targa image data | | 181 | thumbsdb | Windows thumbnail cache | | 182 | tiff | TIFF image data | | 183 | toml | Tom's obvious, minimal language | | 184 | torrent | BitTorrent file | | 185 | tsv | TSV document | | 186 | ttf | TrueType Font data | | 187 | twig | Twig template | | 188 | txt | Generic text document | | 189 | typescript | Typescript | | 190 | unknown | Unknown binary data | | 191 | vba | MS Visual Basic source (VBA) | | 192 | vcxproj | Visual Studio MSBuild project | | 193 | verilog | Verilog source | | 194 | vhdl | VHDL source | | 195 | vtt | Web Video Text Tracks | | 196 | vue | Vue source | | 197 | wasm | Web Assembly | | 198 | wav | Waveform Audio file (WAV) | | 199 | webm | WebM media file | | 200 | webp | WebP media file | | 201 | winregistry | Windows Registry text | | 202 | wmf | Windows metafile | | 203 | woff | Web Open Font Format | | 204 | woff2 | Web Open Font Format v2 | | 205 | xar | XAR archive compressed data | | 206 | xls | Microsoft Excel CDF document | | 207 | xlsb | Microsoft Excel 2007+ document (binary format) | | 208 | xlsx | Microsoft Excel 2007+ document | | 209 | xml | XML document | | 210 | xpi | Compressed installation archive (XPI) | | 211 | xz | XZ compressed data | | 212 | yaml | YAML source | | 213 | yara | YARA rule | | 214 | zig | Zig source | | 215 | zip | Zip archive data | | 216 | zlibstream | zlib compressed data | ## List of possible model's outputs This is the full list of all possible model's output. E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.dl.label`. Note that, in general, the list of "model outputs" is different than the "tool outputs" as in some cases the model is not even used, or the model's output is overwritten due to a low-confidence score or other reasons. This list is useful mostly for debugging purposes; the vast majority of client should just consult the table above. | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | dm | Dream Maker | | 40 | dmg | Apple disk image | | 41 | doc | Microsoft Word CDF document | | 42 | dockerfile | Dockerfile | | 43 | docx | Microsoft Word 2007+ document | | 44 | dsstore | Application Desktop Services Store | | 45 | dwg | Autocad Drawing | | 46 | dxf | Audocad Drawing Exchange Format | | 47 | elf | ELF executable | | 48 | elixir | Elixir script | | 49 | emf | Windows Enhanced Metafile image data | | 50 | eml | RFC 822 mail | | 51 | epub | EPUB document | | 52 | erb | Embedded Ruby source | | 53 | erlang | Erlang source | | 54 | flac | FLAC audio bitstream data | | 55 | flv | Flash Video | | 56 | fortran | Fortran | | 57 | gemfile | Gemfile file | | 58 | gemspec | Gemspec file | | 59 | gif | GIF image data | | 60 | gitattributes | Gitattributes file | | 61 | gitmodules | Gitmodules file | | 62 | go | Golang source | | 63 | gradle | Gradle source | | 64 | groovy | Groovy source | | 65 | gzip | gzip compressed data | | 66 | h5 | Hierarchical Data Format v5 | | 67 | handlebars | Handlebars source | | 68 | haskell | Haskell source | | 69 | hcl | HashiCorp configuration language | | 70 | hlp | MS Windows help | | 71 | htaccess | Apache access configuration | | 72 | html | HTML document | | 73 | icns | Mac OS X icon | | 74 | ico | MS Windows icon resource | | 75 | ics | Internet Calendaring and Scheduling | | 76 | ignorefile | Ignorefile | | 77 | ini | INI configuration file | | 78 | internetshortcut | MS Windows Internet shortcut | | 79 | ipynb | Jupyter notebook | | 80 | iso | ISO 9660 CD-ROM filesystem data | | 81 | jar | Java archive data (JAR) | | 82 | java | Java source | | 83 | javabytecode | Java compiled bytecode | | 84 | javascript | JavaScript source | | 85 | jinja | Jinja template | | 86 | jp2 | jpeg2000 | | 87 | jpeg | JPEG image data | | 88 | json | JSON document | | 89 | jsonl | JSONL document | | 90 | julia | Julia source | | 91 | kotlin | Kotlin source | | 92 | latex | LaTeX document | | 93 | lha | LHarc archive | | 94 | lisp | Lisp source | | 95 | lnk | MS Windows shortcut | | 96 | lua | Lua | | 97 | m3u | M3U playlist | | 98 | m4 | GNU Macro | | 99 | macho | Mach-O executable | | 100 | makefile | Makefile source | | 101 | markdown | Markdown document | | 102 | matlab | Matlab Source | | 103 | mht | MHTML document | | 104 | midi | Midi | | 105 | mkv | Matroska | | 106 | mp3 | MP3 media file | | 107 | mp4 | MP4 media file | | 108 | mscompress | MS Compress archive data | | 109 | msi | Microsoft Installer file | | 110 | mum | Windows Update Package file | | 111 | npy | Numpy Array | | 112 | npz | Numpy Arrays Archive | | 113 | nupkg | NuGet Package | | 114 | objectivec | ObjectiveC source | | 115 | ocaml | OCaml | | 116 | odp | OpenDocument Presentation | | 117 | ods | OpenDocument Spreadsheet | | 118 | odt | OpenDocument Text | | 119 | ogg | Ogg data | | 120 | one | One Note | | 121 | onnx | Open Neural Network Exchange | | 122 | otf | OpenType font | | 123 | outlook | MS Outlook Message | | 124 | parquet | Apache Parquet | | 125 | pascal | Pascal source | | 126 | pcap | pcap capture file | | 127 | pdb | Windows Program Database | | 128 | pdf | PDF document | | 129 | pebin | PE Windows executable | | 130 | pem | PEM certificate | | 131 | perl | Perl source | | 132 | php | PHP source | | 133 | pickle | Python pickle | | 134 | png | PNG image | | 135 | po | Portable Object (PO) for i18n | | 136 | postscript | PostScript document | | 137 | powershell | Powershell source | | 138 | ppt | Microsoft PowerPoint CDF document | | 139 | pptx | Microsoft PowerPoint 2007+ document | | 140 | prolog | Prolog source | | 141 | proteindb | Protein DB | | 142 | proto | Protocol buffer definition | | 143 | psd | Adobe Photoshop | | 144 | python | Python source | | 145 | pythonbytecode | Python compiled bytecode | | 146 | pytorch | Pytorch storage file | | 147 | qt | QuickTime | | 148 | r | R (language) | | 149 | randombytes | Random bytes | | 150 | randomtxt | Random text | | 151 | rar | RAR archive data | | 152 | rdf | Resource Description Framework document (RDF) | | 153 | rpm | RedHat Package Manager archive (RPM) | | 154 | rst | ReStructuredText document | | 155 | rtf | Rich Text Format document | | 156 | ruby | Ruby source | | 157 | rust | Rust source | | 158 | scala | Scala source | | 159 | scss | SCSS source | | 160 | sevenzip | 7-zip archive data | | 161 | sgml | sgml | | 162 | shell | Shell script | | 163 | smali | Smali source | | 164 | snap | Snap archive | | 165 | solidity | Solidity source | | 166 | sql | SQL source | | 167 | sqlite | SQLITE database | | 168 | squashfs | Squash filesystem | | 169 | srt | SubRip Text Format | | 170 | stlbinary | Stereolithography CAD (binary) | | 171 | stltext | Stereolithography CAD (text) | | 172 | sum | Checksum file | | 173 | svg | SVG Scalable Vector Graphics image data | | 174 | swf | Small Web File | | 175 | swift | Swift | | 176 | tar | POSIX tar archive | | 177 | tcl | Tickle | | 178 | textproto | Text protocol buffer | | 179 | tga | Targa image data | | 180 | thumbsdb | Windows thumbnail cache | | 181 | tiff | TIFF image data | | 182 | toml | Tom's obvious, minimal language | | 183 | torrent | BitTorrent file | | 184 | tsv | TSV document | | 185 | ttf | TrueType Font data | | 186 | twig | Twig template | | 187 | txt | Generic text document | | 188 | typescript | Typescript | | 189 | undefined | Undefined | | 190 | vba | MS Visual Basic source (VBA) | | 191 | vcxproj | Visual Studio MSBuild project | | 192 | verilog | Verilog source | | 193 | vhdl | VHDL source | | 194 | vtt | Web Video Text Tracks | | 195 | vue | Vue source | | 196 | wasm | Web Assembly | | 197 | wav | Waveform Audio file (WAV) | | 198 | webm | WebM media file | | 199 | webp | WebP media file | | 200 | winregistry | Windows Registry text | | 201 | wmf | Windows metafile | | 202 | woff | Web Open Font Format | | 203 | woff2 | Web Open Font Format v2 | | 204 | xar | XAR archive compressed data | | 205 | xls | Microsoft Excel CDF document | | 206 | xlsb | Microsoft Excel 2007+ document (binary format) | | 207 | xlsx | Microsoft Excel 2007+ document | | 208 | xml | XML document | | 209 | xpi | Compressed installation archive (XPI) | | 210 | xz | XZ compressed data | | 211 | yaml | YAML source | | 212 | yara | YARA rule | | 213 | zig | Zig source | | 214 | zip | Zip archive data | | 215 | zlibstream | zlib compressed data |> ================================================ FILE: assets/models/standard_v3_0/config.min.json ================================================ {"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"ocaml":0.9,"pascal":0.95,"rst":0.9,"sql":0.9,"tsv":0.9},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3} ================================================ FILE: assets/models/standard_v3_0/metadata.json ================================================ {"namespace_hash":"7ca577b96738951c36df428f8435c81780f92c6f9ef3a73d796a792ffc817703","model_name_hash":"e5368af178b89eb94cb1927b8481d5a9dedf5d1ee810d335b018d19eb5195b45","epoch_num":"100"} ================================================ FILE: assets/models/standard_v3_1/README.md ================================================ # Model documentation ## Table of Contents 1. [List of possible outputs](#list-of-possible-outputs) 1. [List of possible model's outputs](#list-of-possible-models-outputs) ## List of possible outputs This is the full list of all possible tool's outputs (which are different than the possible raw output of the model, see table below). E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.output.label`. | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | directory | A directory | | 40 | dm | Dream Maker | | 41 | dmg | Apple disk image | | 42 | doc | Microsoft Word CDF document | | 43 | dockerfile | Dockerfile | | 44 | docx | Microsoft Word 2007+ document | | 45 | dsstore | Application Desktop Services Store | | 46 | dwg | Autocad Drawing | | 47 | dxf | Audocad Drawing Exchange Format | | 48 | elf | ELF executable | | 49 | elixir | Elixir script | | 50 | emf | Windows Enhanced Metafile image data | | 51 | eml | RFC 822 mail | | 52 | empty | Empty file | | 53 | epub | EPUB document | | 54 | erb | Embedded Ruby source | | 55 | erlang | Erlang source | | 56 | flac | FLAC audio bitstream data | | 57 | flv | Flash Video | | 58 | fortran | Fortran | | 59 | gemfile | Gemfile file | | 60 | gemspec | Gemspec file | | 61 | gif | GIF image data | | 62 | gitattributes | Gitattributes file | | 63 | gitmodules | Gitmodules file | | 64 | go | Golang source | | 65 | gradle | Gradle source | | 66 | groovy | Groovy source | | 67 | gzip | gzip compressed data | | 68 | h5 | Hierarchical Data Format v5 | | 69 | handlebars | Handlebars source | | 70 | haskell | Haskell source | | 71 | hcl | HashiCorp configuration language | | 72 | hlp | MS Windows help | | 73 | htaccess | Apache access configuration | | 74 | html | HTML document | | 75 | icns | Mac OS X icon | | 76 | ico | MS Windows icon resource | | 77 | ics | Internet Calendaring and Scheduling | | 78 | ignorefile | Ignorefile | | 79 | ini | INI configuration file | | 80 | internetshortcut | MS Windows Internet shortcut | | 81 | ipynb | Jupyter notebook | | 82 | iso | ISO 9660 CD-ROM filesystem data | | 83 | jar | Java archive data (JAR) | | 84 | java | Java source | | 85 | javabytecode | Java compiled bytecode | | 86 | javascript | JavaScript source | | 87 | jinja | Jinja template | | 88 | jp2 | jpeg2000 | | 89 | jpeg | JPEG image data | | 90 | json | JSON document | | 91 | jsonl | JSONL document | | 92 | julia | Julia source | | 93 | kotlin | Kotlin source | | 94 | latex | LaTeX document | | 95 | lha | LHarc archive | | 96 | lisp | Lisp source | | 97 | lnk | MS Windows shortcut | | 98 | lua | Lua | | 99 | m3u | M3U playlist | | 100 | m4 | GNU Macro | | 101 | macho | Mach-O executable | | 102 | makefile | Makefile source | | 103 | markdown | Markdown document | | 104 | matlab | Matlab Source | | 105 | mht | MHTML document | | 106 | midi | Midi | | 107 | mkv | Matroska | | 108 | mp3 | MP3 media file | | 109 | mp4 | MP4 media file | | 110 | mscompress | MS Compress archive data | | 111 | msi | Microsoft Installer file | | 112 | mum | Windows Update Package file | | 113 | npy | Numpy Array | | 114 | npz | Numpy Arrays Archive | | 115 | nupkg | NuGet Package | | 116 | objectivec | ObjectiveC source | | 117 | ocaml | OCaml | | 118 | odp | OpenDocument Presentation | | 119 | ods | OpenDocument Spreadsheet | | 120 | odt | OpenDocument Text | | 121 | ogg | Ogg data | | 122 | one | One Note | | 123 | onnx | Open Neural Network Exchange | | 124 | otf | OpenType font | | 125 | outlook | MS Outlook Message | | 126 | parquet | Apache Parquet | | 127 | pascal | Pascal source | | 128 | pcap | pcap capture file | | 129 | pdb | Windows Program Database | | 130 | pdf | PDF document | | 131 | pebin | PE Windows executable | | 132 | pem | PEM certificate | | 133 | perl | Perl source | | 134 | php | PHP source | | 135 | pickle | Python pickle | | 136 | png | PNG image | | 137 | po | Portable Object (PO) for i18n | | 138 | postscript | PostScript document | | 139 | powershell | Powershell source | | 140 | ppt | Microsoft PowerPoint CDF document | | 141 | pptx | Microsoft PowerPoint 2007+ document | | 142 | prolog | Prolog source | | 143 | proteindb | Protein DB | | 144 | proto | Protocol buffer definition | | 145 | psd | Adobe Photoshop | | 146 | python | Python source | | 147 | pythonbytecode | Python compiled bytecode | | 148 | pytorch | Pytorch storage file | | 149 | qt | QuickTime | | 150 | r | R (language) | | 151 | rar | RAR archive data | | 152 | rdf | Resource Description Framework document (RDF) | | 153 | rpm | RedHat Package Manager archive (RPM) | | 154 | rst | ReStructuredText document | | 155 | rtf | Rich Text Format document | | 156 | ruby | Ruby source | | 157 | rust | Rust source | | 158 | scala | Scala source | | 159 | scss | SCSS source | | 160 | sevenzip | 7-zip archive data | | 161 | sgml | sgml | | 162 | shell | Shell script | | 163 | smali | Smali source | | 164 | snap | Snap archive | | 165 | solidity | Solidity source | | 166 | sql | SQL source | | 167 | sqlite | SQLITE database | | 168 | squashfs | Squash filesystem | | 169 | srt | SubRip Text Format | | 170 | stlbinary | Stereolithography CAD (binary) | | 171 | stltext | Stereolithography CAD (text) | | 172 | sum | Checksum file | | 173 | svg | SVG Scalable Vector Graphics image data | | 174 | swf | Small Web File | | 175 | swift | Swift | | 176 | symlink | Symbolic link | | 177 | tar | POSIX tar archive | | 178 | tcl | Tickle | | 179 | textproto | Text protocol buffer | | 180 | tga | Targa image data | | 181 | thumbsdb | Windows thumbnail cache | | 182 | tiff | TIFF image data | | 183 | toml | Tom's obvious, minimal language | | 184 | torrent | BitTorrent file | | 185 | tsv | TSV document | | 186 | ttf | TrueType Font data | | 187 | twig | Twig template | | 188 | txt | Generic text document | | 189 | typescript | Typescript | | 190 | unknown | Unknown binary data | | 191 | vba | MS Visual Basic source (VBA) | | 192 | vcxproj | Visual Studio MSBuild project | | 193 | verilog | Verilog source | | 194 | vhdl | VHDL source | | 195 | vtt | Web Video Text Tracks | | 196 | vue | Vue source | | 197 | wasm | Web Assembly | | 198 | wav | Waveform Audio file (WAV) | | 199 | webm | WebM media file | | 200 | webp | WebP media file | | 201 | winregistry | Windows Registry text | | 202 | wmf | Windows metafile | | 203 | woff | Web Open Font Format | | 204 | woff2 | Web Open Font Format v2 | | 205 | xar | XAR archive compressed data | | 206 | xls | Microsoft Excel CDF document | | 207 | xlsb | Microsoft Excel 2007+ document (binary format) | | 208 | xlsx | Microsoft Excel 2007+ document | | 209 | xml | XML document | | 210 | xpi | Compressed installation archive (XPI) | | 211 | xz | XZ compressed data | | 212 | yaml | YAML source | | 213 | yara | YARA rule | | 214 | zig | Zig source | | 215 | zip | Zip archive data | | 216 | zlibstream | zlib compressed data | ## List of possible model's outputs This is the full list of all possible model's output. E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.dl.label`. Note that, in general, the list of "model outputs" is different than the "tool outputs" as in some cases the model is not even used, or the model's output is overwritten due to a low-confidence score or other reasons. This list is useful mostly for debugging purposes; the vast majority of client should just consult the table above. | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | dm | Dream Maker | | 40 | dmg | Apple disk image | | 41 | doc | Microsoft Word CDF document | | 42 | dockerfile | Dockerfile | | 43 | docx | Microsoft Word 2007+ document | | 44 | dsstore | Application Desktop Services Store | | 45 | dwg | Autocad Drawing | | 46 | dxf | Audocad Drawing Exchange Format | | 47 | elf | ELF executable | | 48 | elixir | Elixir script | | 49 | emf | Windows Enhanced Metafile image data | | 50 | eml | RFC 822 mail | | 51 | epub | EPUB document | | 52 | erb | Embedded Ruby source | | 53 | erlang | Erlang source | | 54 | flac | FLAC audio bitstream data | | 55 | flv | Flash Video | | 56 | fortran | Fortran | | 57 | gemfile | Gemfile file | | 58 | gemspec | Gemspec file | | 59 | gif | GIF image data | | 60 | gitattributes | Gitattributes file | | 61 | gitmodules | Gitmodules file | | 62 | go | Golang source | | 63 | gradle | Gradle source | | 64 | groovy | Groovy source | | 65 | gzip | gzip compressed data | | 66 | h5 | Hierarchical Data Format v5 | | 67 | handlebars | Handlebars source | | 68 | haskell | Haskell source | | 69 | hcl | HashiCorp configuration language | | 70 | hlp | MS Windows help | | 71 | htaccess | Apache access configuration | | 72 | html | HTML document | | 73 | icns | Mac OS X icon | | 74 | ico | MS Windows icon resource | | 75 | ics | Internet Calendaring and Scheduling | | 76 | ignorefile | Ignorefile | | 77 | ini | INI configuration file | | 78 | internetshortcut | MS Windows Internet shortcut | | 79 | ipynb | Jupyter notebook | | 80 | iso | ISO 9660 CD-ROM filesystem data | | 81 | jar | Java archive data (JAR) | | 82 | java | Java source | | 83 | javabytecode | Java compiled bytecode | | 84 | javascript | JavaScript source | | 85 | jinja | Jinja template | | 86 | jp2 | jpeg2000 | | 87 | jpeg | JPEG image data | | 88 | json | JSON document | | 89 | jsonl | JSONL document | | 90 | julia | Julia source | | 91 | kotlin | Kotlin source | | 92 | latex | LaTeX document | | 93 | lha | LHarc archive | | 94 | lisp | Lisp source | | 95 | lnk | MS Windows shortcut | | 96 | lua | Lua | | 97 | m3u | M3U playlist | | 98 | m4 | GNU Macro | | 99 | macho | Mach-O executable | | 100 | makefile | Makefile source | | 101 | markdown | Markdown document | | 102 | matlab | Matlab Source | | 103 | mht | MHTML document | | 104 | midi | Midi | | 105 | mkv | Matroska | | 106 | mp3 | MP3 media file | | 107 | mp4 | MP4 media file | | 108 | mscompress | MS Compress archive data | | 109 | msi | Microsoft Installer file | | 110 | mum | Windows Update Package file | | 111 | npy | Numpy Array | | 112 | npz | Numpy Arrays Archive | | 113 | nupkg | NuGet Package | | 114 | objectivec | ObjectiveC source | | 115 | ocaml | OCaml | | 116 | odp | OpenDocument Presentation | | 117 | ods | OpenDocument Spreadsheet | | 118 | odt | OpenDocument Text | | 119 | ogg | Ogg data | | 120 | one | One Note | | 121 | onnx | Open Neural Network Exchange | | 122 | otf | OpenType font | | 123 | outlook | MS Outlook Message | | 124 | parquet | Apache Parquet | | 125 | pascal | Pascal source | | 126 | pcap | pcap capture file | | 127 | pdb | Windows Program Database | | 128 | pdf | PDF document | | 129 | pebin | PE Windows executable | | 130 | pem | PEM certificate | | 131 | perl | Perl source | | 132 | php | PHP source | | 133 | pickle | Python pickle | | 134 | png | PNG image | | 135 | po | Portable Object (PO) for i18n | | 136 | postscript | PostScript document | | 137 | powershell | Powershell source | | 138 | ppt | Microsoft PowerPoint CDF document | | 139 | pptx | Microsoft PowerPoint 2007+ document | | 140 | prolog | Prolog source | | 141 | proteindb | Protein DB | | 142 | proto | Protocol buffer definition | | 143 | psd | Adobe Photoshop | | 144 | python | Python source | | 145 | pythonbytecode | Python compiled bytecode | | 146 | pytorch | Pytorch storage file | | 147 | qt | QuickTime | | 148 | r | R (language) | | 149 | randombytes | Random bytes | | 150 | randomtxt | Random text | | 151 | rar | RAR archive data | | 152 | rdf | Resource Description Framework document (RDF) | | 153 | rpm | RedHat Package Manager archive (RPM) | | 154 | rst | ReStructuredText document | | 155 | rtf | Rich Text Format document | | 156 | ruby | Ruby source | | 157 | rust | Rust source | | 158 | scala | Scala source | | 159 | scss | SCSS source | | 160 | sevenzip | 7-zip archive data | | 161 | sgml | sgml | | 162 | shell | Shell script | | 163 | smali | Smali source | | 164 | snap | Snap archive | | 165 | solidity | Solidity source | | 166 | sql | SQL source | | 167 | sqlite | SQLITE database | | 168 | squashfs | Squash filesystem | | 169 | srt | SubRip Text Format | | 170 | stlbinary | Stereolithography CAD (binary) | | 171 | stltext | Stereolithography CAD (text) | | 172 | sum | Checksum file | | 173 | svg | SVG Scalable Vector Graphics image data | | 174 | swf | Small Web File | | 175 | swift | Swift | | 176 | tar | POSIX tar archive | | 177 | tcl | Tickle | | 178 | textproto | Text protocol buffer | | 179 | tga | Targa image data | | 180 | thumbsdb | Windows thumbnail cache | | 181 | tiff | TIFF image data | | 182 | toml | Tom's obvious, minimal language | | 183 | torrent | BitTorrent file | | 184 | tsv | TSV document | | 185 | ttf | TrueType Font data | | 186 | twig | Twig template | | 187 | txt | Generic text document | | 188 | typescript | Typescript | | 189 | undefined | Undefined | | 190 | vba | MS Visual Basic source (VBA) | | 191 | vcxproj | Visual Studio MSBuild project | | 192 | verilog | Verilog source | | 193 | vhdl | VHDL source | | 194 | vtt | Web Video Text Tracks | | 195 | vue | Vue source | | 196 | wasm | Web Assembly | | 197 | wav | Waveform Audio file (WAV) | | 198 | webm | WebM media file | | 199 | webp | WebP media file | | 200 | winregistry | Windows Registry text | | 201 | wmf | Windows metafile | | 202 | woff | Web Open Font Format | | 203 | woff2 | Web Open Font Format v2 | | 204 | xar | XAR archive compressed data | | 205 | xls | Microsoft Excel CDF document | | 206 | xlsb | Microsoft Excel 2007+ document (binary format) | | 207 | xlsx | Microsoft Excel 2007+ document | | 208 | xml | XML document | | 209 | xpi | Compressed installation archive (XPI) | | 210 | xz | XZ compressed data | | 211 | yaml | YAML source | | 212 | yara | YARA rule | | 213 | zig | Zig source | | 214 | zip | Zip archive data | | 215 | zlibstream | zlib compressed data |> ================================================ FILE: assets/models/standard_v3_1/config.min.json ================================================ {"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"crt":0.9,"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"ocaml":0.9,"pascal":0.95,"rst":0.9,"sql":0.9,"tsv":0.9},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"protection":"none","aes_key_hex":"","version_major":3} ================================================ FILE: assets/models/standard_v3_1/metadata.json ================================================ {"epoch_num":"200"} ================================================ FILE: assets/models/standard_v3_2/README.md ================================================ # Model documentation ## Table of Contents 1. [List of possible outputs](#list-of-possible-outputs) 1. [List of possible model's outputs](#list-of-possible-models-outputs) ## List of possible outputs This is the full list of all possible tool's outputs (which are different than the possible raw output of the model, see table below). E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.output.label`. | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | directory | A directory | | 40 | dm | Dream Maker | | 41 | dmg | Apple disk image | | 42 | doc | Microsoft Word CDF document | | 43 | dockerfile | Dockerfile | | 44 | docx | Microsoft Word 2007+ document | | 45 | dsstore | Application Desktop Services Store | | 46 | dwg | Autocad Drawing | | 47 | dxf | Audocad Drawing Exchange Format | | 48 | elf | ELF executable | | 49 | elixir | Elixir script | | 50 | emf | Windows Enhanced Metafile image data | | 51 | eml | RFC 822 mail | | 52 | empty | Empty file | | 53 | epub | EPUB document | | 54 | erb | Embedded Ruby source | | 55 | erlang | Erlang source | | 56 | flac | FLAC audio bitstream data | | 57 | flv | Flash Video | | 58 | fortran | Fortran | | 59 | gemfile | Gemfile file | | 60 | gemspec | Gemspec file | | 61 | gif | GIF image data | | 62 | gitattributes | Gitattributes file | | 63 | gitmodules | Gitmodules file | | 64 | go | Golang source | | 65 | gradle | Gradle source | | 66 | groovy | Groovy source | | 67 | gzip | gzip compressed data | | 68 | h5 | Hierarchical Data Format v5 | | 69 | handlebars | Handlebars source | | 70 | haskell | Haskell source | | 71 | hcl | HashiCorp configuration language | | 72 | hlp | MS Windows help | | 73 | htaccess | Apache access configuration | | 74 | html | HTML document | | 75 | icns | Mac OS X icon | | 76 | ico | MS Windows icon resource | | 77 | ics | Internet Calendaring and Scheduling | | 78 | ignorefile | Ignorefile | | 79 | ini | INI configuration file | | 80 | internetshortcut | MS Windows Internet shortcut | | 81 | ipynb | Jupyter notebook | | 82 | iso | ISO 9660 CD-ROM filesystem data | | 83 | jar | Java archive data (JAR) | | 84 | java | Java source | | 85 | javabytecode | Java compiled bytecode | | 86 | javascript | JavaScript source | | 87 | jinja | Jinja template | | 88 | jp2 | jpeg2000 | | 89 | jpeg | JPEG image data | | 90 | json | JSON document | | 91 | jsonl | JSONL document | | 92 | julia | Julia source | | 93 | kotlin | Kotlin source | | 94 | latex | LaTeX document | | 95 | lha | LHarc archive | | 96 | lisp | Lisp source | | 97 | lnk | MS Windows shortcut | | 98 | lua | Lua | | 99 | m3u | M3U playlist | | 100 | m4 | GNU Macro | | 101 | macho | Mach-O executable | | 102 | makefile | Makefile source | | 103 | markdown | Markdown document | | 104 | matlab | Matlab Source | | 105 | mht | MHTML document | | 106 | midi | Midi | | 107 | mkv | Matroska | | 108 | mp3 | MP3 media file | | 109 | mp4 | MP4 media file | | 110 | mscompress | MS Compress archive data | | 111 | msi | Microsoft Installer file | | 112 | mum | Windows Update Package file | | 113 | npy | Numpy Array | | 114 | npz | Numpy Arrays Archive | | 115 | nupkg | NuGet Package | | 116 | objectivec | ObjectiveC source | | 117 | ocaml | OCaml | | 118 | odp | OpenDocument Presentation | | 119 | ods | OpenDocument Spreadsheet | | 120 | odt | OpenDocument Text | | 121 | ogg | Ogg data | | 122 | one | One Note | | 123 | onnx | Open Neural Network Exchange | | 124 | otf | OpenType font | | 125 | outlook | MS Outlook Message | | 126 | parquet | Apache Parquet | | 127 | pascal | Pascal source | | 128 | pcap | pcap capture file | | 129 | pdb | Windows Program Database | | 130 | pdf | PDF document | | 131 | pebin | PE Windows executable | | 132 | pem | PEM certificate | | 133 | perl | Perl source | | 134 | php | PHP source | | 135 | pickle | Python pickle | | 136 | png | PNG image | | 137 | po | Portable Object (PO) for i18n | | 138 | postscript | PostScript document | | 139 | powershell | Powershell source | | 140 | ppt | Microsoft PowerPoint CDF document | | 141 | pptx | Microsoft PowerPoint 2007+ document | | 142 | prolog | Prolog source | | 143 | proteindb | Protein DB | | 144 | proto | Protocol buffer definition | | 145 | psd | Adobe Photoshop | | 146 | python | Python source | | 147 | pythonbytecode | Python compiled bytecode | | 148 | pytorch | Pytorch storage file | | 149 | qt | QuickTime | | 150 | r | R (language) | | 151 | rar | RAR archive data | | 152 | rdf | Resource Description Framework document (RDF) | | 153 | rpm | RedHat Package Manager archive (RPM) | | 154 | rst | ReStructuredText document | | 155 | rtf | Rich Text Format document | | 156 | ruby | Ruby source | | 157 | rust | Rust source | | 158 | scala | Scala source | | 159 | scss | SCSS source | | 160 | sevenzip | 7-zip archive data | | 161 | sgml | sgml | | 162 | shell | Shell script | | 163 | smali | Smali source | | 164 | snap | Snap archive | | 165 | solidity | Solidity source | | 166 | sql | SQL source | | 167 | sqlite | SQLITE database | | 168 | squashfs | Squash filesystem | | 169 | srt | SubRip Text Format | | 170 | stlbinary | Stereolithography CAD (binary) | | 171 | stltext | Stereolithography CAD (text) | | 172 | sum | Checksum file | | 173 | svg | SVG Scalable Vector Graphics image data | | 174 | swf | Small Web File | | 175 | swift | Swift | | 176 | symlink | Symbolic link | | 177 | tar | POSIX tar archive | | 178 | tcl | Tickle | | 179 | textproto | Text protocol buffer | | 180 | tga | Targa image data | | 181 | thumbsdb | Windows thumbnail cache | | 182 | tiff | TIFF image data | | 183 | toml | Tom's obvious, minimal language | | 184 | torrent | BitTorrent file | | 185 | tsv | TSV document | | 186 | ttf | TrueType Font data | | 187 | twig | Twig template | | 188 | txt | Generic text document | | 189 | typescript | TypeScript source | | 190 | unknown | Unknown binary data | | 191 | vba | MS Visual Basic source (VBA) | | 192 | vcxproj | Visual Studio MSBuild project | | 193 | verilog | Verilog source | | 194 | vhdl | VHDL source | | 195 | vtt | Web Video Text Tracks | | 196 | vue | Vue source | | 197 | wasm | Web Assembly | | 198 | wav | Waveform Audio file (WAV) | | 199 | webm | WebM media file | | 200 | webp | WebP media file | | 201 | winregistry | Windows Registry text | | 202 | wmf | Windows metafile | | 203 | woff | Web Open Font Format | | 204 | woff2 | Web Open Font Format v2 | | 205 | xar | XAR archive compressed data | | 206 | xls | Microsoft Excel CDF document | | 207 | xlsb | Microsoft Excel 2007+ document (binary format) | | 208 | xlsx | Microsoft Excel 2007+ document | | 209 | xml | XML document | | 210 | xpi | Compressed installation archive (XPI) | | 211 | xz | XZ compressed data | | 212 | yaml | YAML source | | 213 | yara | YARA rule | | 214 | zig | Zig source | | 215 | zip | Zip archive data | | 216 | zlibstream | zlib compressed data | ## List of possible model's outputs This is the full list of all possible model's output. E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.dl.label`. Note that, in general, the list of "model outputs" is different than the "tool outputs" as in some cases the model is not even used, or the model's output is overwritten due to a low-confidence score or other reasons. This list is useful mostly for debugging purposes; the vast majority of client should just consult the table above. | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | dm | Dream Maker | | 40 | dmg | Apple disk image | | 41 | doc | Microsoft Word CDF document | | 42 | dockerfile | Dockerfile | | 43 | docx | Microsoft Word 2007+ document | | 44 | dsstore | Application Desktop Services Store | | 45 | dwg | Autocad Drawing | | 46 | dxf | Audocad Drawing Exchange Format | | 47 | elf | ELF executable | | 48 | elixir | Elixir script | | 49 | emf | Windows Enhanced Metafile image data | | 50 | eml | RFC 822 mail | | 51 | epub | EPUB document | | 52 | erb | Embedded Ruby source | | 53 | erlang | Erlang source | | 54 | flac | FLAC audio bitstream data | | 55 | flv | Flash Video | | 56 | fortran | Fortran | | 57 | gemfile | Gemfile file | | 58 | gemspec | Gemspec file | | 59 | gif | GIF image data | | 60 | gitattributes | Gitattributes file | | 61 | gitmodules | Gitmodules file | | 62 | go | Golang source | | 63 | gradle | Gradle source | | 64 | groovy | Groovy source | | 65 | gzip | gzip compressed data | | 66 | h5 | Hierarchical Data Format v5 | | 67 | handlebars | Handlebars source | | 68 | haskell | Haskell source | | 69 | hcl | HashiCorp configuration language | | 70 | hlp | MS Windows help | | 71 | htaccess | Apache access configuration | | 72 | html | HTML document | | 73 | icns | Mac OS X icon | | 74 | ico | MS Windows icon resource | | 75 | ics | Internet Calendaring and Scheduling | | 76 | ignorefile | Ignorefile | | 77 | ini | INI configuration file | | 78 | internetshortcut | MS Windows Internet shortcut | | 79 | ipynb | Jupyter notebook | | 80 | iso | ISO 9660 CD-ROM filesystem data | | 81 | jar | Java archive data (JAR) | | 82 | java | Java source | | 83 | javabytecode | Java compiled bytecode | | 84 | javascript | JavaScript source | | 85 | jinja | Jinja template | | 86 | jp2 | jpeg2000 | | 87 | jpeg | JPEG image data | | 88 | json | JSON document | | 89 | jsonl | JSONL document | | 90 | julia | Julia source | | 91 | kotlin | Kotlin source | | 92 | latex | LaTeX document | | 93 | lha | LHarc archive | | 94 | lisp | Lisp source | | 95 | lnk | MS Windows shortcut | | 96 | lua | Lua | | 97 | m3u | M3U playlist | | 98 | m4 | GNU Macro | | 99 | macho | Mach-O executable | | 100 | makefile | Makefile source | | 101 | markdown | Markdown document | | 102 | matlab | Matlab Source | | 103 | mht | MHTML document | | 104 | midi | Midi | | 105 | mkv | Matroska | | 106 | mp3 | MP3 media file | | 107 | mp4 | MP4 media file | | 108 | mscompress | MS Compress archive data | | 109 | msi | Microsoft Installer file | | 110 | mum | Windows Update Package file | | 111 | npy | Numpy Array | | 112 | npz | Numpy Arrays Archive | | 113 | nupkg | NuGet Package | | 114 | objectivec | ObjectiveC source | | 115 | ocaml | OCaml | | 116 | odp | OpenDocument Presentation | | 117 | ods | OpenDocument Spreadsheet | | 118 | odt | OpenDocument Text | | 119 | ogg | Ogg data | | 120 | one | One Note | | 121 | onnx | Open Neural Network Exchange | | 122 | otf | OpenType font | | 123 | outlook | MS Outlook Message | | 124 | parquet | Apache Parquet | | 125 | pascal | Pascal source | | 126 | pcap | pcap capture file | | 127 | pdb | Windows Program Database | | 128 | pdf | PDF document | | 129 | pebin | PE Windows executable | | 130 | pem | PEM certificate | | 131 | perl | Perl source | | 132 | php | PHP source | | 133 | pickle | Python pickle | | 134 | png | PNG image | | 135 | po | Portable Object (PO) for i18n | | 136 | postscript | PostScript document | | 137 | powershell | Powershell source | | 138 | ppt | Microsoft PowerPoint CDF document | | 139 | pptx | Microsoft PowerPoint 2007+ document | | 140 | prolog | Prolog source | | 141 | proteindb | Protein DB | | 142 | proto | Protocol buffer definition | | 143 | psd | Adobe Photoshop | | 144 | python | Python source | | 145 | pythonbytecode | Python compiled bytecode | | 146 | pytorch | Pytorch storage file | | 147 | qt | QuickTime | | 148 | r | R (language) | | 149 | randombytes | Random bytes | | 150 | randomtxt | Random text | | 151 | rar | RAR archive data | | 152 | rdf | Resource Description Framework document (RDF) | | 153 | rpm | RedHat Package Manager archive (RPM) | | 154 | rst | ReStructuredText document | | 155 | rtf | Rich Text Format document | | 156 | ruby | Ruby source | | 157 | rust | Rust source | | 158 | scala | Scala source | | 159 | scss | SCSS source | | 160 | sevenzip | 7-zip archive data | | 161 | sgml | sgml | | 162 | shell | Shell script | | 163 | smali | Smali source | | 164 | snap | Snap archive | | 165 | solidity | Solidity source | | 166 | sql | SQL source | | 167 | sqlite | SQLITE database | | 168 | squashfs | Squash filesystem | | 169 | srt | SubRip Text Format | | 170 | stlbinary | Stereolithography CAD (binary) | | 171 | stltext | Stereolithography CAD (text) | | 172 | sum | Checksum file | | 173 | svg | SVG Scalable Vector Graphics image data | | 174 | swf | Small Web File | | 175 | swift | Swift | | 176 | tar | POSIX tar archive | | 177 | tcl | Tickle | | 178 | textproto | Text protocol buffer | | 179 | tga | Targa image data | | 180 | thumbsdb | Windows thumbnail cache | | 181 | tiff | TIFF image data | | 182 | toml | Tom's obvious, minimal language | | 183 | torrent | BitTorrent file | | 184 | tsv | TSV document | | 185 | ttf | TrueType Font data | | 186 | twig | Twig template | | 187 | txt | Generic text document | | 188 | typescript | TypeScript source | | 189 | undefined | Undefined | | 190 | vba | MS Visual Basic source (VBA) | | 191 | vcxproj | Visual Studio MSBuild project | | 192 | verilog | Verilog source | | 193 | vhdl | VHDL source | | 194 | vtt | Web Video Text Tracks | | 195 | vue | Vue source | | 196 | wasm | Web Assembly | | 197 | wav | Waveform Audio file (WAV) | | 198 | webm | WebM media file | | 199 | webp | WebP media file | | 200 | winregistry | Windows Registry text | | 201 | wmf | Windows metafile | | 202 | woff | Web Open Font Format | | 203 | woff2 | Web Open Font Format v2 | | 204 | xar | XAR archive compressed data | | 205 | xls | Microsoft Excel CDF document | | 206 | xlsb | Microsoft Excel 2007+ document (binary format) | | 207 | xlsx | Microsoft Excel 2007+ document | | 208 | xml | XML document | | 209 | xpi | Compressed installation archive (XPI) | | 210 | xz | XZ compressed data | | 211 | yaml | YAML source | | 212 | yara | YARA rule | | 213 | zig | Zig source | | 214 | zip | Zip archive data | | 215 | zlibstream | zlib compressed data |> ================================================ FILE: assets/models/standard_v3_2/config.min.json ================================================ {"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"crt":0.9,"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.75,"ocaml":0.9,"pascal":0.95,"rst":0.9,"sql":0.9,"tsv":0.9},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"protection":"none","aes_key_hex":"","version_major":3} ================================================ FILE: assets/models/standard_v3_2/metadata.json ================================================ {"epoch_num":"190"} ================================================ FILE: assets/models/standard_v3_3/README.md ================================================ # Model documentation ## Table of Contents 1. [List of possible outputs](#list-of-possible-outputs) 1. [List of possible model's outputs](#list-of-possible-models-outputs) ## List of possible outputs This is the full list of all possible tool's outputs (which are different than the possible raw output of the model, see table below). E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.output.label`. | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | directory | A directory | | 40 | dm | Dream Maker | | 41 | dmg | Apple disk image | | 42 | doc | Microsoft Word CDF document | | 43 | dockerfile | Dockerfile | | 44 | docx | Microsoft Word 2007+ document | | 45 | dsstore | Application Desktop Services Store | | 46 | dwg | Autocad Drawing | | 47 | dxf | Audocad Drawing Exchange Format | | 48 | elf | ELF executable | | 49 | elixir | Elixir script | | 50 | emf | Windows Enhanced Metafile image data | | 51 | eml | RFC 822 mail | | 52 | empty | Empty file | | 53 | epub | EPUB document | | 54 | erb | Embedded Ruby source | | 55 | erlang | Erlang source | | 56 | flac | FLAC audio bitstream data | | 57 | flv | Flash Video | | 58 | fortran | Fortran | | 59 | gemfile | Gemfile file | | 60 | gemspec | Gemspec file | | 61 | gif | GIF image data | | 62 | gitattributes | Gitattributes file | | 63 | gitmodules | Gitmodules file | | 64 | go | Golang source | | 65 | gradle | Gradle source | | 66 | groovy | Groovy source | | 67 | gzip | gzip compressed data | | 68 | h5 | Hierarchical Data Format v5 | | 69 | handlebars | Handlebars source | | 70 | haskell | Haskell source | | 71 | hcl | HashiCorp configuration language | | 72 | hlp | MS Windows help | | 73 | htaccess | Apache access configuration | | 74 | html | HTML document | | 75 | icns | Mac OS X icon | | 76 | ico | MS Windows icon resource | | 77 | ics | Internet Calendaring and Scheduling | | 78 | ignorefile | Ignorefile | | 79 | ini | INI configuration file | | 80 | internetshortcut | MS Windows Internet shortcut | | 81 | ipynb | Jupyter notebook | | 82 | iso | ISO 9660 CD-ROM filesystem data | | 83 | jar | Java archive data (JAR) | | 84 | java | Java source | | 85 | javabytecode | Java compiled bytecode | | 86 | javascript | JavaScript source | | 87 | jinja | Jinja template | | 88 | jp2 | jpeg2000 | | 89 | jpeg | JPEG image data | | 90 | json | JSON document | | 91 | jsonl | JSONL document | | 92 | julia | Julia source | | 93 | kotlin | Kotlin source | | 94 | latex | LaTeX document | | 95 | lha | LHarc archive | | 96 | lisp | Lisp source | | 97 | lnk | MS Windows shortcut | | 98 | lua | Lua | | 99 | m3u | M3U playlist | | 100 | m4 | GNU Macro | | 101 | macho | Mach-O executable | | 102 | makefile | Makefile source | | 103 | markdown | Markdown document | | 104 | matlab | Matlab Source | | 105 | mht | MHTML document | | 106 | midi | Midi | | 107 | mkv | Matroska | | 108 | mp3 | MP3 media file | | 109 | mp4 | MP4 media file | | 110 | mscompress | MS Compress archive data | | 111 | msi | Microsoft Installer file | | 112 | mum | Windows Update Package file | | 113 | npy | Numpy Array | | 114 | npz | Numpy Arrays Archive | | 115 | nupkg | NuGet Package | | 116 | objectivec | ObjectiveC source | | 117 | ocaml | OCaml | | 118 | odp | OpenDocument Presentation | | 119 | ods | OpenDocument Spreadsheet | | 120 | odt | OpenDocument Text | | 121 | ogg | Ogg data | | 122 | one | One Note | | 123 | onnx | Open Neural Network Exchange | | 124 | otf | OpenType font | | 125 | outlook | MS Outlook Message | | 126 | parquet | Apache Parquet | | 127 | pascal | Pascal source | | 128 | pcap | pcap capture file | | 129 | pdb | Windows Program Database | | 130 | pdf | PDF document | | 131 | pebin | PE Windows executable | | 132 | pem | PEM certificate | | 133 | perl | Perl source | | 134 | php | PHP source | | 135 | pickle | Python pickle | | 136 | png | PNG image | | 137 | po | Portable Object (PO) for i18n | | 138 | postscript | PostScript document | | 139 | powershell | Powershell source | | 140 | ppt | Microsoft PowerPoint CDF document | | 141 | pptx | Microsoft PowerPoint 2007+ document | | 142 | prolog | Prolog source | | 143 | proteindb | Protein DB | | 144 | proto | Protocol buffer definition | | 145 | psd | Adobe Photoshop | | 146 | python | Python source | | 147 | pythonbytecode | Python compiled bytecode | | 148 | pytorch | Pytorch storage file | | 149 | qt | QuickTime | | 150 | r | R (language) | | 151 | rar | RAR archive data | | 152 | rdf | Resource Description Framework document (RDF) | | 153 | rpm | RedHat Package Manager archive (RPM) | | 154 | rst | ReStructuredText document | | 155 | rtf | Rich Text Format document | | 156 | ruby | Ruby source | | 157 | rust | Rust source | | 158 | scala | Scala source | | 159 | scss | SCSS source | | 160 | sevenzip | 7-zip archive data | | 161 | sgml | sgml | | 162 | shell | Shell script | | 163 | smali | Smali source | | 164 | snap | Snap archive | | 165 | solidity | Solidity source | | 166 | sql | SQL source | | 167 | sqlite | SQLITE database | | 168 | squashfs | Squash filesystem | | 169 | srt | SubRip Text Format | | 170 | stlbinary | Stereolithography CAD (binary) | | 171 | stltext | Stereolithography CAD (text) | | 172 | sum | Checksum file | | 173 | svg | SVG Scalable Vector Graphics image data | | 174 | swf | Small Web File | | 175 | swift | Swift | | 176 | symlink | Symbolic link | | 177 | tar | POSIX tar archive | | 178 | tcl | Tickle | | 179 | textproto | Text protocol buffer | | 180 | tga | Targa image data | | 181 | thumbsdb | Windows thumbnail cache | | 182 | tiff | TIFF image data | | 183 | toml | Tom's obvious, minimal language | | 184 | torrent | BitTorrent file | | 185 | tsv | TSV document | | 186 | ttf | TrueType Font data | | 187 | twig | Twig template | | 188 | txt | Generic text document | | 189 | typescript | TypeScript source | | 190 | unknown | Unknown binary data | | 191 | vba | MS Visual Basic source (VBA) | | 192 | vcxproj | Visual Studio MSBuild project | | 193 | verilog | Verilog source | | 194 | vhdl | VHDL source | | 195 | vtt | Web Video Text Tracks | | 196 | vue | Vue source | | 197 | wasm | Web Assembly | | 198 | wav | Waveform Audio file (WAV) | | 199 | webm | WebM media file | | 200 | webp | WebP media file | | 201 | winregistry | Windows Registry text | | 202 | wmf | Windows metafile | | 203 | woff | Web Open Font Format | | 204 | woff2 | Web Open Font Format v2 | | 205 | xar | XAR archive compressed data | | 206 | xls | Microsoft Excel CDF document | | 207 | xlsb | Microsoft Excel 2007+ document (binary format) | | 208 | xlsx | Microsoft Excel 2007+ document | | 209 | xml | XML document | | 210 | xpi | Compressed installation archive (XPI) | | 211 | xz | XZ compressed data | | 212 | yaml | YAML source | | 213 | yara | YARA rule | | 214 | zig | Zig source | | 215 | zip | Zip archive data | | 216 | zlibstream | zlib compressed data | ## List of possible model's outputs This is the full list of all possible model's output. E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.dl.label`. Note that, in general, the list of "model outputs" is different than the "tool outputs" as in some cases the model is not even used, or the model's output is overwritten due to a low-confidence score or other reasons. This list is useful mostly for debugging purposes; the vast majority of client should just consult the table above. | Index | Content Type Label | Description | |----------|:-------------:|------| | 1 | 3gp | 3GPP multimedia file | | 2 | ace | ACE archive | | 3 | ai | Adobe Illustrator Artwork | | 4 | aidl | Android Interface Definition Language | | 5 | apk | Android package | | 6 | applebplist | Apple binary property list | | 7 | appleplist | Apple property list | | 8 | asm | Assembly | | 9 | asp | ASP source | | 10 | autohotkey | AutoHotKey script | | 11 | autoit | AutoIt script | | 12 | awk | Awk | | 13 | batch | DOS batch file | | 14 | bazel | Bazel build file | | 15 | bib | BibTeX | | 16 | bmp | BMP image data | | 17 | bzip | bzip2 compressed data | | 18 | c | C source | | 19 | cab | Microsoft Cabinet archive data | | 20 | cat | Windows Catalog file | | 21 | chm | MS Windows HtmlHelp Data | | 22 | clojure | Clojure | | 23 | cmake | CMake build file | | 24 | cobol | Cobol | | 25 | coff | Intel 80386 COFF | | 26 | coffeescript | CoffeeScript | | 27 | cpp | C++ source | | 28 | crt | Certificates (binary format) | | 29 | crx | Google Chrome extension | | 30 | cs | C# source | | 31 | csproj | .NET project config | | 32 | css | CSS source | | 33 | csv | CSV document | | 34 | dart | Dart source | | 35 | deb | Debian binary package | | 36 | dex | Dalvik dex file | | 37 | dicom | DICOM | | 38 | diff | Diff file | | 39 | dm | Dream Maker | | 40 | dmg | Apple disk image | | 41 | doc | Microsoft Word CDF document | | 42 | dockerfile | Dockerfile | | 43 | docx | Microsoft Word 2007+ document | | 44 | dsstore | Application Desktop Services Store | | 45 | dwg | Autocad Drawing | | 46 | dxf | Audocad Drawing Exchange Format | | 47 | elf | ELF executable | | 48 | elixir | Elixir script | | 49 | emf | Windows Enhanced Metafile image data | | 50 | eml | RFC 822 mail | | 51 | epub | EPUB document | | 52 | erb | Embedded Ruby source | | 53 | erlang | Erlang source | | 54 | flac | FLAC audio bitstream data | | 55 | flv | Flash Video | | 56 | fortran | Fortran | | 57 | gemfile | Gemfile file | | 58 | gemspec | Gemspec file | | 59 | gif | GIF image data | | 60 | gitattributes | Gitattributes file | | 61 | gitmodules | Gitmodules file | | 62 | go | Golang source | | 63 | gradle | Gradle source | | 64 | groovy | Groovy source | | 65 | gzip | gzip compressed data | | 66 | h5 | Hierarchical Data Format v5 | | 67 | handlebars | Handlebars source | | 68 | haskell | Haskell source | | 69 | hcl | HashiCorp configuration language | | 70 | hlp | MS Windows help | | 71 | htaccess | Apache access configuration | | 72 | html | HTML document | | 73 | icns | Mac OS X icon | | 74 | ico | MS Windows icon resource | | 75 | ics | Internet Calendaring and Scheduling | | 76 | ignorefile | Ignorefile | | 77 | ini | INI configuration file | | 78 | internetshortcut | MS Windows Internet shortcut | | 79 | ipynb | Jupyter notebook | | 80 | iso | ISO 9660 CD-ROM filesystem data | | 81 | jar | Java archive data (JAR) | | 82 | java | Java source | | 83 | javabytecode | Java compiled bytecode | | 84 | javascript | JavaScript source | | 85 | jinja | Jinja template | | 86 | jp2 | jpeg2000 | | 87 | jpeg | JPEG image data | | 88 | json | JSON document | | 89 | jsonl | JSONL document | | 90 | julia | Julia source | | 91 | kotlin | Kotlin source | | 92 | latex | LaTeX document | | 93 | lha | LHarc archive | | 94 | lisp | Lisp source | | 95 | lnk | MS Windows shortcut | | 96 | lua | Lua | | 97 | m3u | M3U playlist | | 98 | m4 | GNU Macro | | 99 | macho | Mach-O executable | | 100 | makefile | Makefile source | | 101 | markdown | Markdown document | | 102 | matlab | Matlab Source | | 103 | mht | MHTML document | | 104 | midi | Midi | | 105 | mkv | Matroska | | 106 | mp3 | MP3 media file | | 107 | mp4 | MP4 media file | | 108 | mscompress | MS Compress archive data | | 109 | msi | Microsoft Installer file | | 110 | mum | Windows Update Package file | | 111 | npy | Numpy Array | | 112 | npz | Numpy Arrays Archive | | 113 | nupkg | NuGet Package | | 114 | objectivec | ObjectiveC source | | 115 | ocaml | OCaml | | 116 | odp | OpenDocument Presentation | | 117 | ods | OpenDocument Spreadsheet | | 118 | odt | OpenDocument Text | | 119 | ogg | Ogg data | | 120 | one | One Note | | 121 | onnx | Open Neural Network Exchange | | 122 | otf | OpenType font | | 123 | outlook | MS Outlook Message | | 124 | parquet | Apache Parquet | | 125 | pascal | Pascal source | | 126 | pcap | pcap capture file | | 127 | pdb | Windows Program Database | | 128 | pdf | PDF document | | 129 | pebin | PE Windows executable | | 130 | pem | PEM certificate | | 131 | perl | Perl source | | 132 | php | PHP source | | 133 | pickle | Python pickle | | 134 | png | PNG image | | 135 | po | Portable Object (PO) for i18n | | 136 | postscript | PostScript document | | 137 | powershell | Powershell source | | 138 | ppt | Microsoft PowerPoint CDF document | | 139 | pptx | Microsoft PowerPoint 2007+ document | | 140 | prolog | Prolog source | | 141 | proteindb | Protein DB | | 142 | proto | Protocol buffer definition | | 143 | psd | Adobe Photoshop | | 144 | python | Python source | | 145 | pythonbytecode | Python compiled bytecode | | 146 | pytorch | Pytorch storage file | | 147 | qt | QuickTime | | 148 | r | R (language) | | 149 | randombytes | Random bytes | | 150 | randomtxt | Random text | | 151 | rar | RAR archive data | | 152 | rdf | Resource Description Framework document (RDF) | | 153 | rpm | RedHat Package Manager archive (RPM) | | 154 | rst | ReStructuredText document | | 155 | rtf | Rich Text Format document | | 156 | ruby | Ruby source | | 157 | rust | Rust source | | 158 | scala | Scala source | | 159 | scss | SCSS source | | 160 | sevenzip | 7-zip archive data | | 161 | sgml | sgml | | 162 | shell | Shell script | | 163 | smali | Smali source | | 164 | snap | Snap archive | | 165 | solidity | Solidity source | | 166 | sql | SQL source | | 167 | sqlite | SQLITE database | | 168 | squashfs | Squash filesystem | | 169 | srt | SubRip Text Format | | 170 | stlbinary | Stereolithography CAD (binary) | | 171 | stltext | Stereolithography CAD (text) | | 172 | sum | Checksum file | | 173 | svg | SVG Scalable Vector Graphics image data | | 174 | swf | Small Web File | | 175 | swift | Swift | | 176 | tar | POSIX tar archive | | 177 | tcl | Tickle | | 178 | textproto | Text protocol buffer | | 179 | tga | Targa image data | | 180 | thumbsdb | Windows thumbnail cache | | 181 | tiff | TIFF image data | | 182 | toml | Tom's obvious, minimal language | | 183 | torrent | BitTorrent file | | 184 | tsv | TSV document | | 185 | ttf | TrueType Font data | | 186 | twig | Twig template | | 187 | txt | Generic text document | | 188 | typescript | TypeScript source | | 189 | undefined | Undefined | | 190 | vba | MS Visual Basic source (VBA) | | 191 | vcxproj | Visual Studio MSBuild project | | 192 | verilog | Verilog source | | 193 | vhdl | VHDL source | | 194 | vtt | Web Video Text Tracks | | 195 | vue | Vue source | | 196 | wasm | Web Assembly | | 197 | wav | Waveform Audio file (WAV) | | 198 | webm | WebM media file | | 199 | webp | WebP media file | | 200 | winregistry | Windows Registry text | | 201 | wmf | Windows metafile | | 202 | woff | Web Open Font Format | | 203 | woff2 | Web Open Font Format v2 | | 204 | xar | XAR archive compressed data | | 205 | xls | Microsoft Excel CDF document | | 206 | xlsb | Microsoft Excel 2007+ document (binary format) | | 207 | xlsx | Microsoft Excel 2007+ document | | 208 | xml | XML document | | 209 | xpi | Compressed installation archive (XPI) | | 210 | xz | XZ compressed data | | 211 | yaml | YAML source | | 212 | yara | YARA rule | | 213 | zig | Zig source | | 214 | zip | Zip archive data | | 215 | zlibstream | zlib compressed data |> ================================================ FILE: assets/models/standard_v3_3/config.min.json ================================================ {"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"crt":0.9,"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.75,"ocaml":0.9,"pascal":0.95,"r":0.9,"rst":0.9,"sql":0.9,"tsv":0.9,"zig":0.9},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"protection":"none","aes_key_hex":"","version_major":3} ================================================ FILE: assets/models/standard_v3_3/metadata.json ================================================ {"epoch_num":"91"} ================================================ FILE: dist-workspace.toml ================================================ [workspace] members = ["cargo:rust/cli"] # Config for 'dist' [dist] # Skip checking whether the specified configuration files are up to date allow-dirty = ["ci"] # The preferred dist version to use in CI (Cargo.toml SemVer syntax) cargo-dist-version = "0.31.0" # CI backends to support ci = "github" # The installers to generate for each app installers = ["shell", "powershell"] # Target platforms to build apps for (Rust target-triple syntax) targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"] # Path that installers should place binaries in install-path = "CARGO_HOME" # Whether to install an updater program install-updater = true # Generate and dist a source tarball source-tarball = false # Whether to auto-include files like READMEs, LICENSEs, and CHANGELOGs (default true) auto-includes = false # A prefix git tags must include for dist to care about them tag-namespace = "cli" # Whether +crt-static should be used on msvc msvc-crt-static = false # Whether to enable GitHub Attestations github-attestations = true # Post-announce jobs to run in CI post-announce-jobs = ["./cli-latest"] [dist.github-custom-runners] global = "ubuntu-latest" aarch64-unknown-linux-gnu = "ubuntu-24.04-arm" x86_64-unknown-linux-gnu = "ubuntu-latest" aarch64-apple-darwin = "macos-latest" x86_64-pc-windows-msvc = "windows-latest" ================================================ FILE: docs/concepts.md ================================================ # Magika Concepts The documentation has moved, see the [Core Concepts](https://securityresearch.google/magika/core-concepts/how-magika-works/) section of the new website. ================================================ FILE: docs/js.md ================================================ # JavaScript Documentation The documentation has moved, see the [JavaScript bindings](https://securityresearch.google/magika/cli-and-bindings/js/) section of the new website. ================================================ FILE: go/README.md ================================================ # Go library This directory contains the Go library for Magika. The inference relies on the [ONNX Runtime](https://onnxruntime.ai/), and it requires [cgo](https://go.dev/blog/cgo) for interfacing with the ONNX Runtime [C API](https://onnxruntime.ai/docs/api/c/). ## Usage As illustrated in [`example/main.go`](./example/main.go), calling magika from go boils down to creating a scanner associated with a given model, and scanning the content. ```golang //go:build cgo && onnxruntime // This package illustrates the usage of the Magika go binding. // // It requires the onnxruntime and the Magika assets to be accessible. // onnxruntime is available on https://github.com/microsoft/onnxruntime/releases // Magika asserts are available on https://github.com/google/magika/tree/main/assets // // Tag and link directives must be provided a build or run time: // go run -tags onnxruntime -ldflags="-linkmode=external -extldflags=-L/opt/onnxruntime/lib" . package main import ( "fmt" "strings" "github.com/google/magika/go/magika" ) const ( // assetsDir holds where the Magika assets have been installed. assetsDir = "/opt/magika/assets" // modelName holds the Magika model to use. modelName = "standard_v3_3" ) func main() { // Create a scanner. s, err := magika.NewScanner(assetsDir, modelName) if err != nil { log.Fatalf("NewScanner failed: %v", err) } // Scan ct, err := s.Scan(strings.NewReader("Hello world"), 11) if err != nil { log.Fatalf("Scan failed: %v", err) } fmt.Printf("%+v\n", ct) } ``` Inspiration on how to download and install onnxruntime and magika assets can be found in [`docker/Dockerfile`](docker/Dockerfile), and [`cli/cli.go`](cli/cli.go) provides a somewhat more elaborate usage of the go binding. ## Content - [`docker`](./docker) contains a sample docker file that builds a container image that ties together a Magika CLI, an ONNX Runtime, and a [model](../assets/models/standard_v3_3). - [`cli`](./cli) contains a basic CLI that illustrates how the Magika go library may be called from within an application. - [`magika`](./magika) contains the library, that extracts features from a sequence of bytes, and provides a scanner to infer content types. - [`onnx`](./onnx) wraps the C API of the ONNX Runtime to provide an inference engine. - [`example`](./example) contains a rudimentary example for creating and using a content type scanner. ================================================ FILE: go/cli/cli.go ================================================ package main import ( "bytes" "fmt" "io" "os" "github.com/google/magika/go/magika" ) const ( assetsDirEnv = "MAGIKA_ASSETS_DIR" modelNameEnv = "MAGIKA_MODEL" ) // cli is a basic CLI that infers the content type of the files listed on // the command line. The assets dir and the model name are given via the // environment variable MAGIKA_ASSETS_DIR and MAGIKA_MODEL respectively. func cli(w io.Writer, args ...string) error { assetsDir := os.Getenv(assetsDirEnv) if assetsDir == "" { return fmt.Errorf("%s environment variable not set or empty", assetsDirEnv) } modelName := os.Getenv(modelNameEnv) if modelName == "" { return fmt.Errorf("%s environment variable not set or empty", modelNameEnv) } s, err := magika.NewScanner(assetsDir, modelName) if err != nil { return fmt.Errorf("create scanner: %w", err) } // For each filename given as argument, read the file and scan its content. for _, a := range args { fmt.Fprintf(w, "%s: ", a) b, err := os.ReadFile(a) if err != nil { fmt.Fprintf(w, "%v\n", err) continue } ct, err := s.Scan(bytes.NewReader(b), len(b)) if err != nil { fmt.Fprintf(w, "scan: %v\n", err) continue } fmt.Fprintf(w, "%s\n", ct.Label) } return nil } ================================================ FILE: go/cli/cli_test.go ================================================ //go:build cgo && onnxruntime package main import ( "path" "strings" "testing" "github.com/google/go-cmp/cmp" ) func TestCLI(t *testing.T) { const basicDir = "../../tests_data/basic" var ( files = []string{ path.Join(basicDir, "python/code.py"), path.Join(basicDir, "zip/magika_test.zip"), } b strings.Builder ) if err := cli(&b, files...); err != nil { t.Fatal(err) } if d := cmp.Diff(strings.Join([]string{ "../../tests_data/basic/python/code.py: python", "../../tests_data/basic/zip/magika_test.zip: zip", }, "\n"), strings.TrimSpace(b.String())); d != "" { t.Errorf("mismatch (-want +got):\n%s", d) } } ================================================ FILE: go/cli/main.go ================================================ /* CLI is a simple command line interface for magika. It takes a list of files as argument, and infers their types in sequence. For example: $ magika test.go readme.md test.go: go readme.md: markdown The primary intent is to illustrate how the magika go library can be used and compiled, using cgo and the ONNX Runtime library. */ package main import ( "fmt" "os" ) func main() { if err := cli(os.Stdout, os.Args[1:]...); err != nil { fmt.Printf("Error: %v\n", err) os.Exit(1) } } ================================================ FILE: go/cli/tests_data/magika_test_pptx.txt ================================================ This is a test for Magika! Very cool if this can be detected correctly! ================================================ FILE: go/docker/Dockerfile ================================================ # Sample Dockerfile to build an image that ties together an ONNX Runtime, # a Magika model, and a Magika CLI. # # It expects the root of the repository as build context: # $ docker build -f go/docker/Dockerfile -t magika-go:latest . # # Then, to list the content type of the files in the current directory: # docker run --rm --name magika-go -v $PWD:$PWD:ro -w $PWD magika-go:latest * # Build stage for ONNX Runtime and magika. FROM golang:latest AS build # Work in a clean temp directory. WORKDIR /tmp # Download, check, and install ONNX Runtime (https://onnxruntime.ai/) in # /opt/onnxruntime. # Releases are located at https://github.com/microsoft/onnxruntime/releases. # We need the SDK (/include) for compiling, and the library (/lib) for inference. ARG ONNX_NAME=onnxruntime ARG ONNX_ARCH=linux-x64 ARG ONNX_VERSION=1.19.2 ARG ONNX_FULLNAME=${ONNX_NAME}-${ONNX_ARCH}-${ONNX_VERSION} ARG ONNX_TARBALL=${ONNX_FULLNAME}.tgz ARG ONNX_DIGEST=eb00c64e0041f719913c4080e0fed7d9963dc3aa9b54664df6036d8308dbcd33 RUN curl -sL -O https://github.com/microsoft/${ONNX_NAME}/releases/download/v${ONNX_VERSION}/${ONNX_TARBALL} \ && echo "${ONNX_DIGEST} ${ONNX_TARBALL}" > checksum.txt \ && sha256sum -c checksum.txt \ && tar -xzf ${ONNX_TARBALL} -C /opt \ && ln -s /opt/${ONNX_FULLNAME} /opt/onnxruntime # Retrieve the magika go code from the build context, test, and build the cli. COPY go go/ COPY tests_data tests_data/ COPY assets/content_types_kb.min.json assets/content_types_kb.min.json COPY assets/models/standard_v3_3 assets/models/standard_v3_3/ ARG CGO_ENABLED=1 ARG CGO_CFLAGS=-I/opt/onnxruntime/include ARG LD_LIBRARY_PATH=/opt/onnxruntime/lib # Run the tests. WORKDIR go RUN MAGIKA_ASSETS_DIR=../../assets \ MAGIKA_MODEL=standard_v3_3 \ go test -v -tags onnxruntime -ldflags="-linkmode=external -extldflags=-L/opt/onnxruntime/lib" ./... # Build the CLI. WORKDIR cli RUN go build -tags onnxruntime -ldflags="-linkmode=external -extldflags=-L/opt/onnxruntime/lib" . # Final stage: copy resources from the build and set environment variables. FROM debian:latest # Add the ONNX Runtime. ENV LD_LIBRARY_PATH=/opt/onnxruntime/lib COPY --from=build /opt/onnxruntime/lib ${LD_LIBRARY_PATH} # Magika model. ENV MAGIKA_ASSETS_DIR=/opt/magika/assets ENV MAGIKA_MODEL=standard_v3_3 COPY assets/models/${MAGIKA_MODEL} ${MAGIKA_ASSETS_DIR}/models/${MAGIKA_MODEL}/ COPY assets/content_types_kb.min.json ${MAGIKA_ASSETS_DIR}/content_types_kb.min.json # Magika CLI. COPY --from=build /tmp/go/cli/cli /usr/local/bin/magika ENTRYPOINT ["magika"] ================================================ FILE: go/example/main.go ================================================ //go:build cgo && onnxruntime // This package illustrates the usage of the Magika go binding. // // It requires the onnxruntime and the Magika assets to be accessible. // onnxruntime is available on https://github.com/microsoft/onnxruntime/releases // Magika asserts are available on https://github.com/google/magika/tree/main/assets // // Tag and link directives must be provided a build or run time: // go run -tags onnxruntime -ldflags="-linkmode=external -extldflags=-L/opt/onnxruntime/lib" . package main import ( "fmt" "log" "strings" "github.com/google/magika/go/magika" ) const ( // assetsDir holds where the Magika assets have been installed. assetsDir = "/opt/magika/assets" // modelName holds the Magika model to use. modelName = "standard_v3_3" ) func main() { // Create a scanner. s, err := magika.NewScanner(assetsDir, modelName) if err != nil { log.Fatalf("NewScanner failed: %v", err) } // Scan ct, err := s.Scan(strings.NewReader("Hello world"), 11) if err != nil { log.Fatalf("Scan failed: %v", err) } fmt.Printf("%+v\n", ct) } ================================================ FILE: go/go.mod ================================================ module github.com/google/magika/go go 1.22.3 require github.com/google/go-cmp v0.6.0 // indirect ================================================ FILE: go/go.sum ================================================ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= ================================================ FILE: go/magika/config.go ================================================ package magika import ( "encoding/json" "fmt" "os" "path" ) const ( configFile = "config.min.json" contentTypesKBFile = "content_types_kb.min.json" modelFile = "model.onnx" modelsDir = "models" ) // Config holds the portion of Magika's model configuration that is relevant // for inference. type Config struct { BegSize int `json:"beg_size"` MidSize int `json:"mid_size"` EndSize int `json:"end_size"` UseInputsAtOffsets bool `json:"use_inputs_at_offsets"` MediumConfidenceThreshold float32 `json:"medium_confidence_threshold"` MinFileSizeForDl int64 `json:"min_file_size_for_dl"` PaddingToken int `json:"padding_token"` BlockSize int `json:"block_size"` TargetLabelsSpace []string `json:"target_labels_space"` Thresholds map[string]float32 `json:"thresholds"` Overwrite map[string]string `json:"overwrite_map"` } // ReadConfig is a helper that reads and unmarshal a Config, given an assets // dir and a model name. func ReadConfig(assetsDir, name string) (Config, error) { var cfg Config p := configPath(assetsDir, name) b, err := os.ReadFile(p) if err != nil { return Config{}, fmt.Errorf("read %q: %w", p, err) } if err := json.Unmarshal(b, &cfg); err != nil { return Config{}, fmt.Errorf("unmarshal: %w", err) } return cfg, nil } // contentTypesKBPath returns the content types KB path for the given // asset folder. func contentTypesKBPath(assetDir string) string { return path.Join(assetDir, contentTypesKBFile) } // configPath returns the model config for the given asset folder and model // name. func configPath(assetDir, name string) string { return path.Join(assetDir, modelsDir, name, configFile) } // modelPath returns the Onnx model for the given asset folder and model name. func modelPath(assetDir, name string) string { return path.Join(assetDir, modelsDir, name, modelFile) } ================================================ FILE: go/magika/content.go ================================================ package magika import ( "encoding/json" "fmt" "os" ) const ( contentTypeLabelEmpty = "empty" contentTypeLabelTxt = "txt" contentTypeLabelUnknown = "unknown" ) // ContentType holds the definition of a content type. type ContentType struct { Label string // As keyed in the content types KB. MimeType string `json:"mime_type"` Group string `json:"group"` Description string `json:"description"` Extensions []string `json:"extensions"` IsText bool `json:"is_text"` } // readContentTypesKB is a helper that reads and unmarshal a content types KB, // given the assets dir. // It returns a dictionary that maps a label as defined in the model config // target label space to a content type. func readContentTypesKB(assetsDir string) (map[string]ContentType, error) { var ckb map[string]ContentType p := contentTypesKBPath(assetsDir) b, err := os.ReadFile(p) if err != nil { return nil, fmt.Errorf("read %q: %w", p, err) } if err := json.Unmarshal(b, &ckb); err != nil { return nil, fmt.Errorf("unmarshal: %w", err) } for label, ct := range ckb { ct.Label = label ckb[label] = ct } return ckb, nil } ================================================ FILE: go/magika/features.go ================================================ package magika import ( "bytes" "fmt" "io" ) // Features holds the features of a give slice of bytes. type Features struct { firstBlock []byte Beg []int32 `json:"beg"` Mid []int32 `json:"mid"` End []int32 `json:"end"` Offset8000 []int32 `json:"offset_0x8000_0x8007"` Offset8800 []int32 `json:"offset_0x8800_0x8807"` Offset9000 []int32 `json:"offset_0x9000_0x9007"` Offset9800 []int32 `json:"offset_0x9800_0x9807"` } // ExtractFeatures extract the features from the given reader. // The number of bytes that can be read from the reader is given by size. func ExtractFeatures(cfg Config, r io.ReaderAt, size int) (Features, error) { var ( er = errReader{r: r, sz: size} beg = er.readAt(0, cfg.BlockSize) mid = er.readAt((size-cfg.MidSize)/2, cfg.MidSize) end = er.readAt(size-cfg.BlockSize, cfg.BlockSize) ) f := buildFeatures(cfg, beg, mid, end) peek := func(off int) []int32 { b := er.readAt(off, 8) if len(b) < 8 { b = nil } return padInt32(cfg, b, 0, 8) } f.Offset8000 = peek(0x8000) f.Offset8800 = peek(0x8800) f.Offset9000 = peek(0x9000) f.Offset9800 = peek(0x9800) if er.err != nil { return Features{}, er.err } return f, nil } // Flatten returns a flattened array of the given features. func (f Features) Flatten() []int32 { res := make([]int32, 0, len(f.Beg)+len(f.Mid)+len(f.End)) res = append(res, f.Beg...) res = append(res, f.Mid...) res = append(res, f.End...) return res } // errReader wraps an io.ReaderAt and accumulates errors that may arise during // reading. It also silently protects against out of range read. // This allows for a simpler parsing code flow with a unique error check at // the end of parsing. type errReader struct { r io.ReaderAt sz int err error } func (e *errReader) readAt(off, n int) []byte { if e.err != nil || off >= e.sz { return nil } if off < 0 { n += off off = 0 } n = min(n, e.sz-off) b := make([]byte, n) p, err := e.r.ReadAt(b, int64(max(off, 0))) if err != nil && err != io.EOF { e.err = fmt.Errorf("read %d bytes at %d: %w", n, max(off, 0), err) return nil } return b[:p] } // buildFeatures builds features from the beg, mid, and end bytes. func buildFeatures(cfg Config, beg, mid, end []byte) Features { firstBlock := beg spaces := string([]rune{'\t', '\n', '\v', '\f', '\r', ' '}) // Trim beg and end, and truncate to BegSize and EndSize. beg = bytes.TrimLeft(beg, spaces) end = bytes.TrimRight(end, spaces) beg = safeSlice(beg, 0, cfg.BegSize) end = safeSlice(end, len(end)-cfg.EndSize, len(end)) return Features{ firstBlock: firstBlock, Beg: padInt32(cfg, beg, 0, cfg.BegSize), Mid: padInt32(cfg, mid, (cfg.MidSize-len(mid))/2, cfg.MidSize), End: padInt32(cfg, end, cfg.EndSize-len(end), cfg.EndSize), } } // padInt32 pads and convert the given bytes into int32. // The len of the returned is the given size. // if prefix is non-zero, that many padding is add as prefix. // then the given bytes are converted into int32 // finally, padding occurs until the returned slice is of the given size. func padInt32(cfg Config, b []byte, prefix, size int) []int32 { r := make([]int32, 0, size) for len(r) < prefix { r = append(r, int32(cfg.PaddingToken)) } for _, bb := range b { r = append(r, int32(bb)) } for len(r) < size { r = append(r, int32(cfg.PaddingToken)) } return r } // safeSlice returns a slice from the given array, silently clipping // out-of-bound indices. This happens when the given input data contains // fewer bytes than the sampling size. func safeSlice(b []byte, from, to int) []byte { return b[max(from, 0):min(to, len(b))] } ================================================ FILE: go/magika/features_test.go ================================================ package magika import ( "bytes" "compress/gzip" "encoding/json" "io" "os" "testing" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" ) func TestExtractFeatures(t *testing.T) { const artifacts = "../../tests_data/features_extraction/reference.json.gz" b, err := loadArtifacts(t, artifacts) if err != nil { t.Fatalf("load artifacts: %s", err) } var cases []struct { TestInfo Config `json:"test_info"` Content []byte `json:"content"` FeaturesV2 Features `json:"features_v2"` } if err := json.Unmarshal(b, &cases); err != nil { t.Fatal(err) } for i, c := range cases { ft, err := ExtractFeatures(c.TestInfo, bytes.NewReader(c.Content), len(c.Content)) if err != nil { t.Fatal(err) } if d := cmp.Diff(c.FeaturesV2, ft, cmpopts.IgnoreUnexported(Features{})); d != "" { t.Errorf("Feature [%d]: mismatch (-want +got):\n%s", i, d) } } } func TestReferenceExtractFeatures(t *testing.T) { const artifacts = "../../tests_data/reference/features_extraction_examples.json.gz" b, err := loadArtifacts(t, artifacts) if err != nil { t.Fatalf("load artifacts: %s", err) } var cases []struct { TestInfo Config `json:"args"` Content []byte `json:"content_base64"` FeaturesV2 Features `json:"features"` } if err := json.Unmarshal(b, &cases); err != nil { t.Fatal(err) } for i, c := range cases { ft, err := ExtractFeatures(c.TestInfo, bytes.NewReader(c.Content), len(c.Content)) if err != nil { t.Fatal(err) } if d := cmp.Diff(c.FeaturesV2, ft, cmpopts.IgnoreUnexported(Features{}), cmpopts.IgnoreFields(Features{}, "Offset8000", "Offset8800", "Offset9000", "Offset9800"), ); d != "" { t.Errorf("Feature [%d]: mismatch (-want +got):\n%s", i, d) } } } func loadArtifacts(t *testing.T, path string) ([]byte, error) { t.Helper() f, err := os.Open(path) if err != nil { t.Fatalf("Open %s: %v", path, err) } r, err := gzip.NewReader(f) if err != nil { t.Fatalf("could not uncompress test data: %s", err) } b, err := io.ReadAll(r) if err != nil { t.Fatalf("could not read uncompress test data: %s", err) } return b, nil } ================================================ FILE: go/magika/scanner.go ================================================ package magika import ( "errors" "fmt" "io" "unicode/utf8" "github.com/google/magika/go/onnx" ) // Scanner represents a Magika scanner that returns the content type // of the scanned content running the Magika model using an ONNX Runtime. // This is a similar scanner interface to licensecheck, that scans // content to identify licenses. type Scanner struct { onnx onnx.Onnx cfg Config ckb map[string]ContentType } // NewScanner returns a scanner based on the model of the given name defined // in the given the assets dir. func NewScanner(assetsDir, name string) (*Scanner, error) { cfg, err := ReadConfig(assetsDir, name) if err != nil { return nil, fmt.Errorf("read config: %w", err) } p := modelPath(assetsDir, name) ob, err := onnx.NewOnnx(p, len(cfg.TargetLabelsSpace)) if err != nil { return nil, fmt.Errorf("new onnx: %w", err) } if ob == nil { return nil, errors.New("new onnx: nil onnx object") } ckb, err := readContentTypesKB(assetsDir) if err != nil { return nil, fmt.Errorf("read content types KB: %w", err) } return &Scanner{ onnx: ob, cfg: cfg, ckb: ckb, }, nil } // Scan scans the given reader containing the given size of bytes, and // returns the inferred content type. // It is safe for concurrent use. func (s *Scanner) Scan(r io.ReaderAt, size int) (ContentType, error) { ct, _, err := s.scanScore(r, size) return ct, err } // scanScore scans the given reader containing the given size of bytes, and // returns the inferred content type and its score. // It is safe for concurrent use. func (s *Scanner) scanScore(r io.ReaderAt, size int) (ContentType, float32, error) { if size == 0 { return s.ckb[contentTypeLabelEmpty], 1, nil } ft, err := ExtractFeatures(s.cfg, r, size) if err != nil { return ContentType{}, 0, fmt.Errorf("extract features: %w", err) } // Do not use the model for small files. if ft.Beg[s.cfg.MinFileSizeForDl-1] == int32(s.cfg.PaddingToken) { if utf8.Valid(ft.firstBlock) { return s.ckb[contentTypeLabelTxt], 1, nil } else { return s.ckb[contentTypeLabelUnknown], 1, nil } } scores, err := s.onnx.Run(ft.Flatten()) if err != nil { return ContentType{}, 0, fmt.Errorf("run onnx: %w", err) } if len(scores) == 0 { return ContentType{}, 0, errors.New("run onnx: empty result") } best := 0 for i, v := range scores { if v > scores[best] { best = i } } ct, err := s.contentType(best, scores[best]) if err != nil { return ContentType{}, 0, fmt.Errorf("get content type: %w", err) } return ct, scores[best], nil } func (s *Scanner) contentType(best int, score float32) (ContentType, error) { l := s.cfg.TargetLabelsSpace[best] ct, ok := s.ckb[l] if !ok { return ContentType{}, fmt.Errorf("no content type found for %q", l) } th := s.cfg.MediumConfidenceThreshold if t, ok := s.cfg.Thresholds[l]; ok { th = t } // Return the inferred content type if the threshold is met, otherwise // falls back to a relevant default. switch { case score >= th: case ct.IsText: l = contentTypeLabelTxt default: l = contentTypeLabelUnknown } ct, ok = s.ckb[l] if !ok { return ContentType{}, fmt.Errorf("no content type found for %q", l) } if l, ok = s.cfg.Overwrite[l]; ok { if ct, ok = s.ckb[l]; !ok { return ContentType{}, fmt.Errorf("no content type found for %q", l) } } return ct, nil } ================================================ FILE: go/magika/scanner_test.go ================================================ //go:build cgo && onnxruntime package magika import ( "bytes" "encoding/json" "fmt" "os" "path" "testing" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" ) func TestScannerBasic(t *testing.T) { const basicDir = "../../tests_data/basic" es, err := os.ReadDir(basicDir) if err != nil { t.Fatalf("read tests data: %v", err) } s := newTestScanner(t) for _, e := range es { t.Run(e.Name(), func(t *testing.T) { dir := path.Join(basicDir, e.Name()) es, err := os.ReadDir(dir) if err != nil { t.Fatalf("read tests data: %v", err) } for _, ee := range es { p := path.Join(dir, ee.Name()) b, err := os.ReadFile(p) if err != nil { t.Fatalf("read %s: %v", p, err) } ct, err := s.Scan(bytes.NewReader(b), len(b)) if err != nil { t.Fatalf("scan %s: %v", p, err) } if d := cmp.Diff(e.Name(), ct.Label); d != "" { t.Errorf("unexpected content type for %s (-want +got):\n%s", ee.Name(), d) } } }) } } func TestScannerSmall(t *testing.T) { s := newTestScanner(t) for _, c := range []struct { name string data []byte want string }{{ name: "empty", data: []byte{}, want: contentTypeLabelEmpty, }, { name: "small txt", data: []byte("small"), want: contentTypeLabelTxt, }, { name: "small bin", data: []byte{0x80, 0x80, 0x80, 0x80}, want: contentTypeLabelUnknown, }} { t.Run(c.name, func(t *testing.T) { ct, err := s.Scan(bytes.NewReader(c.data), len(c.data)) if err != nil { t.Fatalf("scan: %v", err) } if d := cmp.Diff(s.ckb[c.want], ct); d != "" { t.Errorf("unexpected content type (-want +got):\n%s", d) } }) } } func TestScannerReference(t *testing.T) { type prediction struct { Dl string `json:"dl"` Output string `json:"output"` Score float32 `json:"score"` OverwriteReason string `json:"overwrite_reason"` } type tcase struct { PredictionMode string `json:"prediction_mode"` Path string `json:"path"` Content []byte `json:"content_base64"` Status string `json:"status"` Prediction prediction `json:"prediction"` } for _, artifacts := range []string{ "standard_v3_3-inference_examples_by_content.json.gz", "standard_v3_3-inference_examples_by_path.json.gz", } { b, err := loadArtifacts(t, path.Join("../../tests_data/reference", artifacts)) if err != nil { t.Fatalf("load artifacts: %v", err) } var tcases []*tcase if err := json.Unmarshal(b, &tcases); err != nil { t.Fatalf("unmarshal: %s", err) } s := newTestScanner(t) for _, pm := range []string{"high_confidence"} { t.Run(fmt.Sprintf("%s-%s", artifacts, pm), func(t *testing.T) { var count int for i, c := range tcases { if c.PredictionMode != pm { continue } count++ if c.Path != "" { p := path.Join("../..", c.Path) b, err := os.ReadFile(p) if err != nil { t.Errorf("read %s [%d]: %v", p, i, err) continue } c.Content = b } ct, score, err := s.scanScore(bytes.NewReader(c.Content), len(c.Content)) if err != nil { t.Errorf("scan [%d]: %v", i, err) continue } got := prediction{ Output: ct.Label, Score: score, } if d := cmp.Diff(c.Prediction, got, cmpopts.EquateApprox(0, 1e-5), cmpopts.IgnoreFields(prediction{}, "Dl", "OverwriteReason"), ); d != "" { t.Errorf("unexpected score [%d] (-want +got):\n%s", i, d) } } if count == 0 { t.Errorf("no test cases found") } }) } } } func newTestScanner(t *testing.T) *Scanner { t.Helper() const ( assetsDir = "../../assets" modelName = "standard_v3_3" ) s, err := NewScanner(assetsDir, modelName) if err != nil { t.Fatalf("new scanner: %v", err) } return s } ================================================ FILE: go/onnx/onnx.go ================================================ package onnx // Onnx represents something that can run inferences on features. type Onnx interface { // Run returns the result of the inference on the given features. Run(features []int32) ([]float32, error) } ================================================ FILE: go/onnx/onnx_runtime.go ================================================ //go:build cgo && onnxruntime package onnx // #cgo LDFLAGS: -lonnxruntime // #include "onnx_runtime.h" import "C" import ( "fmt" ) // NewOnnx returns an onnx that can perform inferences using an ONNX Runtime // (https://onnxruntime.ai/) and the given model. // It wraps the C calls to the ONNX Runtime API https://onnxruntime.ai/docs/api/c. func NewOnnx(modelPath string, sizeTarget int) (Onnx, error) { ort := &onnxRuntime{ api: C.GetApiBase(), sizeTarget: sizeTarget, } if err := C.CreateSession(ort.api, C.CString(modelPath), &ort.session, &ort.memory); err != nil { return nil, fmt.Errorf("create session: %v", C.GoString(C.GetErrorMessage(err))) } return ort, nil } // onnxRuntime implements the Onnx interface relying on a cgo call // to a C ONNX Runtime library. type onnxRuntime struct { api *C.OrtApi session *C.OrtSession memory *C.OrtMemoryInfo sizeTarget int } func (ort *onnxRuntime) Run(features []int32) ([]float32, error) { target := make([]float32, ort.sizeTarget) if err := C.Run(ort.api, ort.session, ort.memory, (*C.int32_t)(&features[0]), C.int64_t(len(features)), (*C.float)(&target[0]), C.int64_t(len(target))); err != nil { return nil, fmt.Errorf("run: %v", C.GoString(C.GetErrorMessage(err))) } return target, nil } ================================================ FILE: go/onnx/onnx_runtime.h ================================================ #include #include #define RETURN_ON_ERROR(expr) { \ OrtStatus* onnx_status = (expr); \ if (onnx_status != NULL) { \ return onnx_status; \ } \ } const OrtApi *GetApiBase() { return OrtGetApiBase()->GetApi(ORT_API_VERSION); } OrtStatus *CreateSession(const OrtApi *ort, const char *model, OrtSession **session, OrtMemoryInfo **memory_info) { OrtEnv *env; RETURN_ON_ERROR(ort->CreateEnv(ORT_LOGGING_LEVEL_ERROR, "onnx", &env)); RETURN_ON_ERROR(ort->DisableTelemetryEvents(env)); OrtSessionOptions *options; RETURN_ON_ERROR(ort->CreateSessionOptions(&options)); RETURN_ON_ERROR(ort->EnableCpuMemArena(options)); RETURN_ON_ERROR(ort->CreateSession(env, model, options, session)); RETURN_ON_ERROR(ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, memory_info)); return NULL; } OrtStatus *Run(const OrtApi *ort, OrtSession *session, OrtMemoryInfo *memory_info, int32_t features[], int64_t sizeFeatures, float target[], int64_t sizeTarget) { const char *input_names[] = {"bytes"}; const char *output_names[] = {"target_label"}; const int64_t input_shape[] = {1, sizeFeatures}; OrtValue *input_tensor = NULL; RETURN_ON_ERROR(ort->CreateTensorWithDataAsOrtValue(memory_info, features, sizeFeatures * sizeof(int32_t), input_shape, 2, ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, &input_tensor)); OrtValue *output_tensor = NULL; RETURN_ON_ERROR(ort->Run(session, NULL, input_names, (const OrtValue *const *) &input_tensor, 1, output_names, 1, &output_tensor)); float *out = NULL; RETURN_ON_ERROR(ort->GetTensorMutableData(output_tensor, (void **) &out)); memcpy(target, out, sizeTarget * sizeof(float)); ort->ReleaseValue(input_tensor); ort->ReleaseValue(output_tensor); return NULL; } const char *GetErrorMessage(const OrtStatus* onnx_status) { if (onnx_status == NULL) { return ""; } return OrtGetApiBase()->GetApi(ORT_API_VERSION)->GetErrorMessage(onnx_status); } ================================================ FILE: go/onnx/onnx_runtime_test.go ================================================ //go:build cgo && onnxruntime package onnx_test import ( "math/rand/v2" "testing" "github.com/google/magika/go/magika" "github.com/google/magika/go/onnx" ) func TestONNXRuntime(t *testing.T) { const ( assetsDir = "../../assets" modelName = "standard_v3_3" modelPath = "../../assets/models/" + modelName + "/model.onnx" ) cfg, err := magika.ReadConfig(assetsDir, modelName) if err != nil { t.Fatal(err) } rt, err := onnx.NewOnnx(modelPath, len(cfg.TargetLabelsSpace)) if err != nil { t.Fatalf("Create onnx: %v", err) } // Initialize a random features tensor. features := make([]int32, cfg.BegSize+cfg.MidSize+cfg.EndSize) for i := range features { features[i] = rand.Int32() } // Get the scores and check its size. scores, err := rt.Run(features) if err != nil { t.Fatalf("Run onnx: %v", err) } if n, m := len(scores), len(cfg.TargetLabelsSpace); n != m { t.Fatalf("Unexpected scores len: got %d, want %d", n, m) } } ================================================ FILE: go/onnx/onnx_zero.go ================================================ //go:build !(cgo && onnxruntime) package onnx // NewOnnx returns a nil Onnx runtime. // This allows for building and unit testing in a non-cgo context. func NewOnnx(string, int) (Onnx, error) { return nil, nil } ================================================ FILE: js/.gitignore ================================================ package-lock.json node_modules ================================================ FILE: js/CHANGELOG.md ================================================ # CHANGELOG ## [1.0.0] - Mark end of experimental mode. No major changes. ## [0.3.2] - Upgrade to `standard_v3_3` model. ## [0.3.1] - Overhaul of the API to use much cleaner abstractions. - Removed identifyBytesFull and identifyStreamFull: identifyBytes and identifyStream now return all the scores as well (accessible with `result.prediction.scores_map`). - Restrict the input types to `Uint8Array` and `Buffer`. ## [0.2.13] - 2024-03-26 - This is the first working (but still very experimental) version. ================================================ FILE: js/README.md ================================================ # Magika TypeScript/JavaScript library Magika is a novel AI-powered file type detection tool that relies on the recent advance of deep learning to provide accurate detection. Under the hood, Magika employs a custom, highly optimized model that only weighs about a few MBs, and enables precise file identification within milliseconds, even when running on a single CPU. Magika has been trained and evaluated on a dataset of ~100M samples across 200+ content types (covering both binary and textual file formats), and it achieves an average ~99% accuracy on our test set. This npm package allows you to run Magika in the browser or in Node! Magika's website: [https://securityresearch.google/magika/](https://securityresearch.google/magika/). Magika on GitHub: [https://github.com/google/magika](https://github.com/google/magika). # Installing MagikaJS ```bash npm install magika ``` # Using MagikaJS Simple usage in Node: ```js import { readFile } from "fs/promises"; import { MagikaNode as Magika } from "magika/node"; const data = await readFile("some file"); const magika = await Magika().create(); const prediction = await magika.identifyBytes(data); console.log(prediction); ``` Simple usage in the browser: ```js import { Magika } from "magika"; const file = new File(["# Hello I am a markdown file"], "hello.md"); const fileBytes = new Uint8Array(await file.arrayBuffer()); const magika = await Magika.create(); const prediction = await magika.identifyBytes(fileBytes); console.log(prediction); ``` For more, see our [documentation](https://securityresearch.google/magika/cli-and-bindings/js/). # Command-line tool Please use the official CLI (with `pip install magika`) as it can perform batch processing and search for files recursively. Read more about that in the main [README](https://github.com/google/magika/blob/main/README.md). This one is useful to load the TensorflowJS model and see that it works as expected. Install it with `npm install -g magika`. You can then run it by executing `magika-js ` ```help Usage: magika-js [options] Magika JS - file type detection with ML. https://securityresearch.google/magika/ Arguments: paths Paths of the files to detect Options: --json-output Format output in JSON --model-url Model URL --model-path Model file path --model-config-url Model config URL --model-config-path Model config file path --by-stream Identify file via stream, not via bytes --debug Output debug information -h, --help display help for command ``` # Reporting errors in detections Please open an issue on [Github](https://github.com/google/magika/issues). # Citation If you use this software for your research, please cite it as: ```bibtex @InProceedings{fratantonio25:magika, author = {Yanick Fratantonio and Luca Invernizzi and Loua Farah and Kurt Thomas and Marina Zhang and Ange Albertini and Francois Galilee and Giancarlo Metitieri and Julien Cretin and Alexandre Petit-Bianco and David Tao and Elie Bursztein}, title = {{Magika: AI-Powered Content-Type Detection}}, booktitle = {Proceedings of the International Conference on Software Engineering (ICSE)}, month = {April}, year = {2025} } ``` # Loading the model and configuration MagikaJS is designed to be flexible in how you provide the model and configuration file to it. Both the Node and browser versions accept URLs to asynchronously load these two assets. ```js const magika = await magika.create({ modelURL: "https://...", configURL: "https://...", }); ``` The Node version also allows to load local files. ```js const magika = await magika.create({ modelPath: "./assets/...", configPath: "./assets/...", }); ``` # Development Using the model hosted On Github: ```bash yarn install yarn run build yarn run bin -- README.md ``` Using the local model: ```bash yarn install yarn run build (cd ../website; yarn install; yarn run dev) & yarn run bin --model-url http://localhost:5173/magika/model/model.json --config-url http://localhost:5173/magika/model/config.json ../tests_data/basic/* ``` Using the local `magika` package when developing the website: ```bash yarn install yarn run build yarn link (cd ../website; yarn link magika; yarn install; yarn run dev) & ``` ## Testing Execute: ```bash yarn install yarn run build yarn run test ``` ================================================ FILE: js/magika-cli.ts ================================================ #! /usr/bin/env node // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Command line tool to test the JavaScript implementation of Magika. Please use // the official command line tool (`pip install magika`) for normal use. // To run this, you need to install the optional dependencies too. import chalk from "chalk"; import { CommanderError, program } from "commander"; import * as fs from "fs"; import { readFile } from "fs/promises"; import { MagikaNode as Magika } from "./magika-node.js"; program .description( "Magika JS - file type detection with ML. https://google.github.io/magika", ) .option("--json-output", "Format output in JSON") .option("--model-url ", "Model URL", Magika.MODEL_URL) .option("--model-path ", "Model file path") .option( "--model-config-url ", "Model config URL", Magika.MODEL_CONFIG_URL, ) .option("--model-config-path ", "Model config file path") .option("--by-stream", "Identify file via stream, not via bytes") .option("--debug", "Output debug information") .argument("", "Paths of the files to detect"); program.exitOverride(); try { program.parse(process.argv); } catch (error: any) { if ( error instanceof CommanderError && error.code === "commander.helpDisplayed" ) { // Help was already displayed by commander, so just exit cleanly. process.exit(0); } else { // There was an error parsing the arguments, let's print the help. try { program.help(); } catch (error: any) { // Avoid that commander shows some weird exception. process.exit(1); } } } const flags = program.opts(); (async () => { const magika = await Magika.create({ modelURL: flags.modelUrl, modelPath: flags.modelPath, modelConfigURL: flags.configUrl, modelConfigPath: flags.configPath, }); await Promise.all( program.args.map(async (path) => { let data = null; try { data = await readFile(path); } catch (error) { console.error("Skipping file", path, error); } if (data != null) { if (flags.byStream) { const magika_result_by_stream = await magika.identifyStream( fs.createReadStream(path), data.length, ); if (flags.jsonOutput) { console.log(path, magika_result_by_stream); } else if (flags.debug) { console.log( chalk.yellow(path), "by_stream", chalk.green( magika_result_by_stream.prediction.dl.label, magika_result_by_stream.prediction.output.label, ), chalk.white(magika_result_by_stream.prediction.score), ); } else { console.log( chalk.yellow(path), chalk.green(magika_result_by_stream.prediction.output.label), chalk.white(magika_result_by_stream.prediction.score.toFixed(3)), ); } } else { const magika_result_by_path = await magika.identifyBytes(data); if (flags.jsonOutput) { console.log(path, magika_result_by_path); } else if (flags.debug) { console.log( chalk.yellow(path), "by_path", chalk.green( magika_result_by_path.prediction.dl.label, magika_result_by_path.prediction.output.label, ), chalk.white(magika_result_by_path.prediction.score), ); } else { console.log( chalk.yellow(path), chalk.green(magika_result_by_path.prediction.output.label), chalk.white(magika_result_by_path.prediction.score.toFixed(3)), ); } } } }), ); })(); ================================================ FILE: js/magika-node.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { ReadStream } from "fs"; import { finished } from "stream/promises"; import { Magika } from "./magika.js"; import { MagikaOptions } from "./src/magika-options.js"; import { MagikaResult } from "./src/magika-result.js"; import { ModelNode } from "./src/model-node.js"; import { ModelConfigNode } from "./src/model-config-node.js"; /** * The main Magika object for Node use (`MagikaNode`). * * Example usage: * ```js * import { readFile } from "fs/promises"; * import { MagikaNode as Magika } from "magika/node"; * const data = await readFile("some file"); * const magika = await Magika.create(); * const result = await magika.identifyBytes(data); * console.log(result.prediction.output.label); * ``` * For a client-side implementation, please import `Magika` instead. * * Note that this `MagikaNode` class extends `Magika`, which means that all * public `Magika` APIs (e.g., `identifyBytes`) are available for `MagikaNode` * as well. * * Demos: * - Node: `/js/magika-cli.js`, which you can run with `yarn run bin -h`. * - Client-side: see `/website/src/components/FileClassifierDemo.vue` */ export class MagikaNode extends Magika { model_config: ModelConfigNode; model: ModelNode; protected constructor() { super(); // We load the version of the model that uses tfjs/node. this.model_config = new ModelConfigNode(); this.model = new ModelNode(this.model_config); } /** * Factory method to create a Magika instance. * * @param {MagikaOptions} options The urls or file paths where the model and * its config are stored. * * Parameters are optional. If not provided, the model will be loaded from GitHub. */ public static async create(options?: MagikaOptions): Promise { const magika = new MagikaNode(); await magika.load(options); return magika; } protected async load(options?: MagikaOptions): Promise { const promises: Promise[] = []; if (options?.modelConfigPath != null) { promises.push(this.model_config.loadFile(options?.modelConfigPath)); } else { promises.push( this.model_config.loadUrl( options?.modelConfigURL || Magika.MODEL_CONFIG_URL, ), ); } if (options?.modelPath != null) { promises.push(this.model.loadFile(options?.modelPath)); } else { promises.push(this.model.loadUrl(options?.modelURL || Magika.MODEL_URL)); } await Promise.all(promises); } /** * Identifies the content type from a read stream * * @param {ReadStream} stream A read stream. * @param {number} length Total length of stream data. * @returns {MagikaResult} An object containing the result of the content type * prediction. */ public async identifyStream( stream: ReadStream, length: number, ): Promise { let result = await this._identifyFromStream(stream, length); return result; } private async _identifyFromStream( stream: ReadStream, length: number, ): Promise { let accData: Buffer = Buffer.from(""); let fileData: Buffer = Buffer.from(""); stream.on("data", (data: string | Buffer) => { if (typeof data === "string") { throw new Error("Stream data should be a Buffer, not a string"); } // ReadStream allows us to read a file chunk by chunk, sequentially. // It does not allow to seek around. So, the optimization we do here // is to avoid to store the full file in memory; but we are indeed // traversing the full file. // Here we collect the file bytes. For small files, we collect the full // stream of bytes. For large files, we collect only the first and last // `block_size` bytes. if (length <= 4 * this.model_config.block_size) { // The file is small; we read the full file in memory. fileData = Buffer.concat([fileData, data]); } else { accData = Buffer.concat([accData, data]); if (fileData.length === 0) { if (accData.length >= this.model_config.block_size) { // We have at least block_size bytes, let's keep them as the first // block. fileData = Buffer.concat([ fileData, accData.subarray(0, this.model_config.block_size), ]); } } if (fileData.length > 0) { // If we are here, it means we have already collected block_size bytes // and kept it as the "beg block". Now, we keep processing bytes, and // we just store the last block_size bytes. Then, once we are at the // very end of the stream, we take these last block_size bytes as the // "end block". accData = accData.subarray( accData.length - this.model_config.block_size, ); if (stream.bytesRead === length) { // We have just read the last chunk. We now these last block_size // bytes as "the end block", which together with the "beg block" // form the file's bytes that we can pass to the features // extraction. fileData = Buffer.concat([fileData, accData]); } } } }); await finished(stream); return await this._identifyFromBytes(fileData); } } ================================================ FILE: js/magika.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { ContentTypeInfo } from "./src/content-type-info.js"; import { ContentTypeLabel } from "./src/content-type-label.js"; import { ContentTypesInfos } from "./src/content-types-infos.js"; import { MagikaOptions } from "./src/magika-options.js"; import { MagikaResult } from "./src/magika-result.js"; import { ModelConfig } from "./src/model-config.js"; import { ModelFeatures } from "./src/model-features.js"; import { ModelPrediction } from "./src/model-prediction.js"; import { Model } from "./src/model.js"; import { OverwriteReason } from "./src/overwrite-reason.js"; import { Status } from "./src/status.js"; /** * The main Magika object for client-side use. * * Example usage: * ```js * const file = new File(["# Hello I am a markdown file"], "hello.md"); * const fileBytes = new Uint8Array(await file.arrayBuffer()); * const magika = await Magika.create(); * const result = await magika.identifyBytes(fileBytes); * console.log(result.prediction.output.label); * ``` * For a Node implementation, please import `MagikaNode` instead. * * Demos: * - Node: `/js/magika-cli.js`, which you can run with `yarn run bin -h`. * - Client-side: see `/website/src/components/FileClassifierDemo.vue` */ export class Magika { model_config: ModelConfig; model: Model; model_name: string; cts_infos: ContentTypesInfos; static MODEL_VERSION = "standard_v3_3"; static MODEL_CONFIG_URL = `https://google.github.io/magika/models/${this.MODEL_VERSION}/config.min.json`; static MODEL_URL = `https://google.github.io/magika/models/${this.MODEL_VERSION}/model.json`; static WHITESPACE_CHARS = [..." \t\n\r\v\f"].map((c) => c.charCodeAt(0)); protected constructor() { this.model_config = new ModelConfig(); this.model = new Model(this.model_config); this.model_name = "unknown"; this.cts_infos = ContentTypesInfos.get(); } /** * Factory method to create a Magika instance. * * @param {MagikaOptions} options The urls or file paths where the model and * its config are stored. * * Parameters are optional. If not provided, the model will be loaded from GitHub. */ public static async create(options?: MagikaOptions): Promise { const magika = new Magika(); await magika.load(options); return magika; } protected async load(options?: MagikaOptions): Promise { const modelURL = options?.modelURL || Magika.MODEL_URL; const modelConfigURL = options?.modelConfigURL || Magika.MODEL_CONFIG_URL; this.model_name = this._getModelName(modelURL); await Promise.all([ this.model.loadUrl(modelURL), this.model_config.loadUrl(modelConfigURL), ]); } /** * Identifies the content type of a byte array. * * @param {Uint8Array} fileBytes A fixed-length sequence of bytes. * @returns {MagikaResult} An object containing the result of the content type * prediction. */ public async identifyBytes(fileBytes: Uint8Array): Promise { const result = await this._identifyFromBytes(fileBytes); return result; } public getModelName(): string { return this.model_name; } private _getResultFromFewBytes( fileBytes: Uint8Array, path: string = "-", ): MagikaResult { if (fileBytes.length > 4 * this.model_config.block_size) { throw new Error("fileBytes is unexpectedly long for this function."); } const decoder = new TextDecoder("utf-8", { fatal: true }); try { decoder.decode(fileBytes); return this._getResultFromLabelsAndScore( path, Status.OK, ContentTypeLabel.UNDEFINED, ContentTypeLabel.TXT, 1.0, ); } catch (error) { return this._getResultFromLabelsAndScore( path, Status.OK, ContentTypeLabel.UNDEFINED, ContentTypeLabel.UNKNOWN, 1.0, ); } } private static _lstrip(fileBytes: Uint8Array): Uint8Array { let startIndex = 0; while ( startIndex < fileBytes.length && Magika.WHITESPACE_CHARS.includes(fileBytes[startIndex]) ) { startIndex++; } return fileBytes.subarray(startIndex); } private static _rstrip(fileBytes: Uint8Array): Uint8Array { let endIndex = fileBytes.length - 1; while ( endIndex >= 0 && Magika.WHITESPACE_CHARS.includes(fileBytes[endIndex]) ) { endIndex--; } return fileBytes.subarray(0, endIndex + 1); } protected async _identifyFromBytes( fileBytes: Uint8Array, ): Promise { if (fileBytes.length === 0) { return this._getResultFromLabelsAndScore( "-", Status.OK, ContentTypeLabel.UNDEFINED, ContentTypeLabel.EMPTY, 1.0, ); } if (fileBytes.length < this.model_config.min_file_size_for_dl) { return this._getResultFromFewBytes(fileBytes); } const features = Magika._extractFeaturesFromBytes( fileBytes, this.model_config.beg_size, this.model_config.mid_size, this.model_config.end_size, this.model_config.padding_token, this.model_config.block_size, this.model_config.use_inputs_at_offsets, ); return await this._getResultFromFeatures(features); } private _getOutputLabelFromModelPrediction( model_prediction: ModelPrediction, ): [ContentTypeLabel, OverwriteReason] { let overwrite_reason = OverwriteReason.NONE; // Overwrite model_prediction.label if specified in the overwrite_map. let output_label = this.model_config.overwrite_map[model_prediction.label] ?? model_prediction.label; if (output_label != model_prediction.label) { overwrite_reason = OverwriteReason.OVERWRITE_MAP; } // The following code checks whether the score is "high enough" according to // HIGH_CONFIDENCE prediction mode (the only one we currently support in // this implementation). If it's not, it means we can't trust the model, and // we return a generic content type. if ( model_prediction.score < (this.model_config.thresholds[model_prediction.label] ?? this.model_config.medium_confidence_threshold) ) { overwrite_reason = OverwriteReason.LOW_CONFIDENCE; if (this.cts_infos[model_prediction.label].is_text) { output_label = ContentTypeLabel.TXT; } else { output_label = ContentTypeLabel.UNKNOWN; } if (model_prediction.label === output_label) { // overwrite_reason is useful to convey to clients why the output // predicted is different than the model predicted type; if those two // are the same, the model predicted type has not actually been // overwritten, so we set this to NONE. overwrite_reason = OverwriteReason.NONE; } } return [output_label, overwrite_reason]; } protected static _extractFeaturesFromBytes( fileBytes: Uint8Array, beg_size: number, mid_size: number, end_size: number, padding_token: number, block_size: number, use_inputs_at_offsets: boolean, ): ModelFeatures { const begChunk = this._lstrip( fileBytes.slice(0, Math.min(block_size, fileBytes.length)), ); const begBytes = begChunk.slice(0, Math.min(begChunk.length, beg_size)); const endChunk = this._rstrip( fileBytes.slice(Math.max(0, fileBytes.length - block_size)), ); const endBytes = endChunk.slice(Math.max(0, endChunk.length - end_size)); const endOffset = Math.max(0, end_size - endBytes.length); return new ModelFeatures( beg_size, mid_size, end_size, padding_token, use_inputs_at_offsets, ) .withStart(begBytes, 0) .withEnd(endBytes, endOffset); } private _getContentTypeInfo(label: ContentTypeLabel): ContentTypeInfo { return this.cts_infos[label]; } private _getResultFromLabelsAndScore( path: string, status: Status = Status.OK, dl_label: ContentTypeLabel, output: ContentTypeLabel, score: number, overwrite_reason: OverwriteReason = OverwriteReason.NONE, scores_map?: Partial>, ): MagikaResult { return { path: path, status: status, prediction: { dl: this._getContentTypeInfo(dl_label), output: this._getContentTypeInfo(output), score: score, overwrite_reason: overwrite_reason, scores_map: scores_map, }, }; } private async _getResultFromFeatures( features: ModelFeatures, ): Promise { let model_prediction = await this.model.predict(features); let [output_label, overwrite_reason] = this._getOutputLabelFromModelPrediction(model_prediction); return this._getResultFromLabelsAndScore( "-", Status.OK, model_prediction.label, output_label, model_prediction.score, overwrite_reason, model_prediction.scores_map, ); } protected _getModelName(pathOrUrl: string): string { const UNKNOWN_MODEL_NAME = "unknown"; try { const parts = pathOrUrl.split("/"); // Filter out empty strings that can occur due to leading/trailing slashes const nonEmptyParts = parts.filter((part) => part !== ""); if (nonEmptyParts.length >= 2) { return nonEmptyParts[nonEmptyParts.length - 2]; } else { return UNKNOWN_MODEL_NAME; } } catch (error) { console.error("Error processing path or URL to get model name:", error); return UNKNOWN_MODEL_NAME; } } } ================================================ FILE: js/package.json ================================================ { "name": "magika", "version": "1.0.0", "description": "A tool to detect content types with deep learning.", "type": "module", "main": "./dist/cjs/magika.js", "types": "./dist/cjs/magika.d.ts", "module": "./dist/mjs/magika.js", "exports": { "./node": { "import": "./dist/mjs/magika-node.js", "require": "./dist/cjs/magika-node.js", "default": "./dist/cjs/magika-node.js" }, ".": { "import": "./dist/mjs/magika.js", "require": "./dist/cjs/magika.js", "default": "./dist/cjs/magika.js" } }, "repository": "https://github.com/google/magika", "author": "Luca Invernizzi , Yanick Fratantonio ", "license": "Apache-2.0", "keywords": [ "content type", "file type", "magic" ], "scripts": { "bin": "TF_CPP_MIN_LOG_LEVEL=1 node ./dist/mjs/magika-cli.js", "make-docs": "documentation build *.ts --parse-extension ts -f md --github -o ../docs/js.md", "test": "TF_CPP_MIN_LOG_LEVEL=1 yarn build && node node_modules/jest/bin/jest.js", "build": "rm -fr dist/* && tsc -p tsconfig.esm.json && tsc -p tsconfig.cjs.json && node postBuild.js", "format": "prettier --write *.ts src/*.ts" }, "bin": { "magika-js": "./dist/cjs/magika-cli.js" }, "dependencies": { "@tensorflow/tfjs": "^4.22.0" }, "optionalDependencies": { "@tensorflow/tfjs-node": "^4.22.0", "chalk": "^5.3.0", "commander": "^13.1.0" }, "jest": { "moduleFileExtensions": [ "js", "json", "ts" ], "transformIgnorePatterns": [ "/node_modules/" ], "testPathIgnorePatterns": [ "/node_modules/" ], "rootDir": "./", "testRegex": ".test.ts$", "transform": { "^.+\\.ts$": [ "ts-jest", { "tsconfig": "./tsconfig.cjs.json" } ] }, "moduleNameMapper": { "(.+)\\.js": "$1" }, "testEnvironment": "node", "collectCoverage": false }, "devDependencies": { "@types/jest": "^29.5.12", "@types/node": "^24.3.1", "documentation": "^14.0.3", "fast-check": "^4.1.0", "jest": "^29.5.0", "prettier": "^3.5.3", "ts-jest": "^29.0.5", "typescript": "^5.0.2" } } ================================================ FILE: js/postBuild.js ================================================ // format sub package.json for dual cjs and esm support import fs from 'fs'; const formatPackage = (source, output, type) => { const remove = {main: true, module: true, browser: true, types: true, exports: true}; const json = JSON.parse(fs.readFileSync(source, 'utf-8')); fs.writeFileSync(output, JSON.stringify({ ...Object.fromEntries(Object.entries(json).filter(([key]) => !remove[key])), type:type }, null, 4)); } formatPackage('./package.json', './dist/cjs/package.json', 'commonjs'); formatPackage('./package.json', './dist/mjs/package.json', 'module'); ================================================ FILE: js/simple_examples/browser-esmodule-example/index.html ================================================ Playwright Test ================================================ FILE: js/simple_examples/browser-esmodule-example/index.js ================================================ import { Magika } from "magika"; async function main() { const magika = await Magika.create(); const data = new TextEncoder().encode("import os; print(os.uname())"); const prediction = await magika.identifyBytes(data); const statusDiv = document.createElement("div"); statusDiv.className = "status"; statusDiv.textContent = prediction.status; document.body.appendChild(statusDiv); const labelDiv = document.createElement("div"); labelDiv.className = "label"; labelDiv.textContent = prediction.prediction.output.label; document.body.appendChild(labelDiv); } document.addEventListener("DOMContentLoaded", main); ================================================ FILE: js/simple_examples/browser-esmodule-example/package.json ================================================ { "name": "browser-esmodule-example", "version": "1.0.0", "description": "Magika browser esmodule example", "scripts": { "server": "vite --port 8000", "start": "npx playwright test" }, "author": "Luca Invernizzi ", "license": "ISC", "dependencies": { "magika": "file:../../", "vite": "^7.1.7" }, "devDependencies": { "@playwright/test": "^1.55.1" } } ================================================ FILE: js/simple_examples/browser-esmodule-example/playwright.config.ts ================================================ import { defineConfig } from "@playwright/test"; export default defineConfig({ // Run your local dev server before starting the tests webServer: { command: "npm run server", url: "http://localhost:8000", reuseExistingServer: !process.env.CI, stdout: "ignore", stderr: "pipe", }, }); ================================================ FILE: js/simple_examples/browser-esmodule-example/test/simple.spec.ts ================================================ import { test, expect } from "@playwright/test"; test("can run Magika", async ({ page }) => { await page.goto("http://localhost:8000"); await page.waitForSelector("div.status"); await expect(page.locator("div.status")).toHaveText("ok"); await page.waitForSelector("div.label"); await expect(page.locator("div.label")).toHaveText("python"); }); ================================================ FILE: js/simple_examples/node-commonjs-example/index.js ================================================ const { MagikaNode: Magika } = require("magika/node"); async function main() { const magika = await Magika.create(); const prediction = await magika.identifyBytes( Buffer.from("import os; print(os.uname())") ); console.log(prediction.status, prediction.prediction.output.label); } main(); ================================================ FILE: js/simple_examples/node-commonjs-example/package.json ================================================ { "name": "magika-node-commonjs-example", "version": "1.0.0", "main": "index.js", "scripts": { "start": "node index.js" }, "author": "Luca Invernizzi ", "license": "ISC", "description": "Magika node commonJS example", "dependencies": { "magika": "file:../../" } } ================================================ FILE: js/simple_examples/node-esmodule-example/index.js ================================================ import { MagikaNode as Magika } from "magika/node"; import { TextEncoder } from "util"; const magika = await Magika.create(); const text = "import os; print(os.uname())"; const bytes = new TextEncoder().encode(text); const prediction = await magika.identifyBytes(bytes); console.log(prediction.status, prediction.prediction.output.label); ================================================ FILE: js/simple_examples/node-esmodule-example/package.json ================================================ { "name": "magika-node-esmodule-example", "version": "1.0.0", "main": "index.js", "type": "module", "scripts": { "start": "node index.js" }, "author": "Luca Invernizzi ", "license": "ISC", "description": "Using Magika in Node via imports", "dependencies": { "magika": "file:../../" } } ================================================ FILE: js/simple_examples/run_examples.sh ================================================ #!/bin/bash # Exit on error set -e ROOT_DIR=$(pwd) export TF_CPP_MIN_LOG_LEVEL=2 export NODE_OPTIONS='--no-warnings' EXAMPLES=("node-commonjs-example" "node-esmodule-example" "browser-esmodule-example" "typescript-esmodule-example") for example in "${EXAMPLES[@]}" do echo "--- Running Example: $example ---" if [ -d "$example" ]; then cd "$example" if [ -f "package.json" ]; then npm run --silent start else echo "No package.json found in $(pwd)" fi cd "$ROOT_DIR" else echo "Directory $example not found" fi done ================================================ FILE: js/simple_examples/typescript-esmodule-example/index.ts ================================================ import { MagikaNode as Magika } from "magika/node"; async function main(): Promise { const magika = await Magika.create(); const data = Buffer.from("import os; print(os.uname())"); const prediction = await magika.identifyBytes(data); console.log(prediction.status, prediction.prediction.output.label); } main(); ================================================ FILE: js/simple_examples/typescript-esmodule-example/package.json ================================================ { "name": "magika-typescript-esmodule-example", "version": "1.0.0", "main": "index.js", "type": "module", "scripts": { "start": "npx tsx index.ts" }, "author": "Luca Invernizzi ", "license": "ISC", "description": "Using magika in node & typescript via imports", "dependencies": { "magika": "file:../../" }, "devDependencies": { "ts-node": "^10.9.2", "tsx": "^4.20.6", "typescript": "^5.4.5" } } ================================================ FILE: js/src/.npmignore ================================================ ================================================ FILE: js/src/content-type-info.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { ContentTypeLabel } from "./content-type-label.js"; export interface ContentTypeInfo { label: ContentTypeLabel; is_text: boolean; } ================================================ FILE: js/src/content-type-label.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // NOTE: DO NOT EDIT --- This file is automatically generated by sync.py. // This is the list of all possible content types we know about; however, models // support a smaller subset of them. See model's README.md for details. export enum ContentTypeLabel { _3DS = "3ds", _3DSM = "3dsm", _3DSX = "3dsx", _3GP = "3gp", _3MF = "3mf", ABNF = "abnf", ACE = "ace", ADA = "ada", AFF = "aff", AI = "ai", AIDL = "aidl", ALGOL68 = "algol68", ANI = "ani", APK = "apk", APPLEBPLIST = "applebplist", APPLEDOUBLE = "appledouble", APPLEPLIST = "appleplist", APPLESINGLE = "applesingle", AR = "ar", ARC = "arc", ARJ = "arj", ARROW = "arrow", ASC = "asc", ASD = "asd", ASF = "asf", ASM = "asm", ASP = "asp", AU = "au", AUTOHOTKEY = "autohotkey", AUTOIT = "autoit", AVI = "avi", AVIF = "avif", AVRO = "avro", AWK = "awk", AX = "ax", BATCH = "batch", BAZEL = "bazel", BCAD = "bcad", BIB = "bib", BMP = "bmp", BPG = "bpg", BPL = "bpl", BRAINFUCK = "brainfuck", BRF = "brf", BZIP = "bzip", BZIP3 = "bzip3", C = "c", CAB = "cab", CAD = "cad", CAT = "cat", CDF = "cdf", CHM = "chm", CLOJURE = "clojure", CMAKE = "cmake", COBOL = "cobol", COFF = "coff", COFFEESCRIPT = "coffeescript", COM = "com", CPL = "cpl", CPP = "cpp", CRT = "crt", CRX = "crx", CS = "cs", CSPROJ = "csproj", CSS = "css", CSV = "csv", CTL = "ctl", DART = "dart", DEB = "deb", DEX = "dex", DEY = "dey", DICOM = "dicom", DIFF = "diff", DIRECTORY = "directory", DJANGO = "django", DLL = "dll", DM = "dm", DMG = "dmg", DMIGD = "dmigd", DMSCRIPT = "dmscript", DOC = "doc", DOCKERFILE = "dockerfile", DOCX = "docx", DOSMBR = "dosmbr", DOTX = "dotx", DSSTORE = "dsstore", DWG = "dwg", DXF = "dxf", DYLIB = "dylib", EBML = "ebml", ELF = "elf", ELIXIR = "elixir", EMF = "emf", EML = "eml", EMPTY = "empty", EPUB = "epub", ERB = "erb", ERLANG = "erlang", ESE = "ese", EXE = "exe", EXP = "exp", FLAC = "flac", FLUTTER = "flutter", FLV = "flv", FORTRAN = "fortran", FPX = "fpx", GEMFILE = "gemfile", GEMSPEC = "gemspec", GIF = "gif", GITATTRIBUTES = "gitattributes", GITMODULES = "gitmodules", GLEAM = "gleam", GO = "go", GPX = "gpx", GRADLE = "gradle", GROOVY = "groovy", GZIP = "gzip", H = "h", H5 = "h5", HANDLEBARS = "handlebars", HASKELL = "haskell", HCL = "hcl", HEIF = "heif", HFS = "hfs", HLP = "hlp", HPP = "hpp", HTA = "hta", HTACCESS = "htaccess", HTML = "html", HVE = "hve", HWP = "hwp", ICC = "icc", ICNS = "icns", ICO = "ico", ICS = "ics", IGNOREFILE = "ignorefile", IMG = "img", INI = "ini", INTERNETSHORTCUT = "internetshortcut", IOSAPP = "iosapp", IPYNB = "ipynb", ISO = "iso", JAR = "jar", JAVA = "java", JAVABYTECODE = "javabytecode", JAVASCRIPT = "javascript", JINJA = "jinja", JNG = "jng", JNLP = "jnlp", JP2 = "jp2", JPEG = "jpeg", JSON = "json", JSONC = "jsonc", JSONL = "jsonl", JSX = "jsx", JULIA = "julia", JXL = "jxl", KO = "ko", KOTLIN = "kotlin", KS = "ks", LATEX = "latex", LATEXAUX = "latexaux", LESS = "less", LHA = "lha", LICENSE = "license", LISP = "lisp", LITCS = "litcs", LNK = "lnk", LOCK = "lock", LRZ = "lrz", LUA = "lua", LZ = "lz", LZ4 = "lz4", LZX = "lzx", M3U = "m3u", M4 = "m4", MACHO = "macho", MAFF = "maff", MAKEFILE = "makefile", MARKDOWN = "markdown", MATLAB = "matlab", MHT = "mht", MIDI = "midi", MKV = "mkv", MP2 = "mp2", MP3 = "mp3", MP4 = "mp4", MPEGTS = "mpegts", MSCOMPRESS = "mscompress", MSI = "msi", MSIX = "msix", MST = "mst", MUI = "mui", MUM = "mum", MUN = "mun", NIM = "nim", NPY = "npy", NPZ = "npz", NULL = "null", NUPKG = "nupkg", OBJECT = "object", OBJECTIVEC = "objectivec", OCAML = "ocaml", OCX = "ocx", ODEX = "odex", ODIN = "odin", ODP = "odp", ODS = "ods", ODT = "odt", OGG = "ogg", OLE = "ole", ONE = "one", ONNX = "onnx", OOXML = "ooxml", OTF = "otf", OUTLOOK = "outlook", PALMOS = "palmos", PARQUET = "parquet", PASCAL = "pascal", PBM = "pbm", PCAP = "pcap", PDB = "pdb", PDF = "pdf", PEBIN = "pebin", PEM = "pem", PERL = "perl", PGP = "pgp", PHP = "php", PICKLE = "pickle", PNG = "png", PO = "po", POSTSCRIPT = "postscript", POWERSHELL = "powershell", PPT = "ppt", PPTX = "pptx", PRINTFOX = "printfox", PROLOG = "prolog", PROTEINDB = "proteindb", PROTO = "proto", PROTOBUF = "protobuf", PSD = "psd", PUB = "pub", PYTHON = "python", PYTHONBYTECODE = "pythonbytecode", PYTHONPAR = "pythonpar", PYTORCH = "pytorch", QOI = "qoi", QT = "qt", R = "r", RANDOMASCII = "randomascii", RANDOMBYTES = "randombytes", RANDOMTXT = "randomtxt", RAR = "rar", RDF = "rdf", RDP = "rdp", RIFF = "riff", RLIB = "rlib", RLL = "rll", RPM = "rpm", RST = "rst", RTF = "rtf", RUBY = "ruby", RUST = "rust", RZIP = "rzip", SCALA = "scala", SCHEME = "scheme", SCR = "scr", SCRIPTWSF = "scriptwsf", SCSS = "scss", SEVENZIP = "sevenzip", SGML = "sgml", SH3D = "sh3d", SHELL = "shell", SMALI = "smali", SNAP = "snap", SO = "so", SOLIDITY = "solidity", SQL = "sql", SQLITE = "sqlite", SQUASHFS = "squashfs", SRT = "srt", STLBINARY = "stlbinary", STLTEXT = "stltext", SUM = "sum", SVD = "svd", SVG = "svg", SWF = "swf", SWIFT = "swift", SYMLINK = "symlink", SYMLINKTEXT = "symlinktext", SYS = "sys", TAR = "tar", TCL = "tcl", TEXTPROTO = "textproto", TGA = "tga", THUMBSDB = "thumbsdb", TIFF = "tiff", TMDX = "tmdx", TOML = "toml", TORRENT = "torrent", TROFF = "troff", TSV = "tsv", TSX = "tsx", TTF = "ttf", TWIG = "twig", TXT = "txt", TXTASCII = "txtascii", TXTUTF16 = "txtutf16", TXTUTF8 = "txtutf8", TYPESCRIPT = "typescript", UDF = "udf", UNDEFINED = "undefined", UNIXCOMPRESS = "unixcompress", UNKNOWN = "unknown", VBA = "vba", VBE = "vbe", VCARD = "vcard", VCS = "vcs", VCXPROJ = "vcxproj", VERILOG = "verilog", VHD = "vhd", VHDL = "vhdl", VISIO = "visio", VTT = "vtt", VUE = "vue", WAD = "wad", WASM = "wasm", WAV = "wav", WEBM = "webm", WEBP = "webp", WEBTEMPLATE = "webtemplate", WIM = "wim", WINREGISTRY = "winregistry", WMA = "wma", WMF = "wmf", WMV = "wmv", WOFF = "woff", WOFF2 = "woff2", XAR = "xar", XCF = "xcf", XLS = "xls", XLSB = "xlsb", XLSX = "xlsx", XML = "xml", XPI = "xpi", XSD = "xsd", XZ = "xz", YAML = "yaml", YARA = "yara", ZIG = "zig", ZIP = "zip", ZLIBSTREAM = "zlibstream", ZST = "zst", } ================================================ FILE: js/src/content-types-infos.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // NOTE: DO NOT EDIT --- This file is automatically generated by sync.py. import { ContentTypeInfo } from "./content-type-info.js"; import { ContentTypeLabel } from "./content-type-label.js"; export type ContentTypesInfos = Record; export const ContentTypesInfos = { get: (): ContentTypesInfos => ({ [ContentTypeLabel._3DS]: { label: ContentTypeLabel._3DS, is_text: false, }, [ContentTypeLabel._3DSM]: { label: ContentTypeLabel._3DSM, is_text: false, }, [ContentTypeLabel._3DSX]: { label: ContentTypeLabel._3DSX, is_text: false, }, [ContentTypeLabel._3GP]: { label: ContentTypeLabel._3GP, is_text: false, }, [ContentTypeLabel._3MF]: { label: ContentTypeLabel._3MF, is_text: false, }, [ContentTypeLabel.ABNF]: { label: ContentTypeLabel.ABNF, is_text: false, }, [ContentTypeLabel.ACE]: { label: ContentTypeLabel.ACE, is_text: false, }, [ContentTypeLabel.ADA]: { label: ContentTypeLabel.ADA, is_text: false, }, [ContentTypeLabel.AFF]: { label: ContentTypeLabel.AFF, is_text: true, }, [ContentTypeLabel.AI]: { label: ContentTypeLabel.AI, is_text: false, }, [ContentTypeLabel.AIDL]: { label: ContentTypeLabel.AIDL, is_text: true, }, [ContentTypeLabel.ALGOL68]: { label: ContentTypeLabel.ALGOL68, is_text: false, }, [ContentTypeLabel.ANI]: { label: ContentTypeLabel.ANI, is_text: false, }, [ContentTypeLabel.APK]: { label: ContentTypeLabel.APK, is_text: false, }, [ContentTypeLabel.APPLEBPLIST]: { label: ContentTypeLabel.APPLEBPLIST, is_text: false, }, [ContentTypeLabel.APPLEDOUBLE]: { label: ContentTypeLabel.APPLEDOUBLE, is_text: false, }, [ContentTypeLabel.APPLEPLIST]: { label: ContentTypeLabel.APPLEPLIST, is_text: true, }, [ContentTypeLabel.APPLESINGLE]: { label: ContentTypeLabel.APPLESINGLE, is_text: false, }, [ContentTypeLabel.AR]: { label: ContentTypeLabel.AR, is_text: false, }, [ContentTypeLabel.ARC]: { label: ContentTypeLabel.ARC, is_text: false, }, [ContentTypeLabel.ARJ]: { label: ContentTypeLabel.ARJ, is_text: false, }, [ContentTypeLabel.ARROW]: { label: ContentTypeLabel.ARROW, is_text: false, }, [ContentTypeLabel.ASC]: { label: ContentTypeLabel.ASC, is_text: true, }, [ContentTypeLabel.ASD]: { label: ContentTypeLabel.ASD, is_text: false, }, [ContentTypeLabel.ASF]: { label: ContentTypeLabel.ASF, is_text: false, }, [ContentTypeLabel.ASM]: { label: ContentTypeLabel.ASM, is_text: true, }, [ContentTypeLabel.ASP]: { label: ContentTypeLabel.ASP, is_text: true, }, [ContentTypeLabel.AU]: { label: ContentTypeLabel.AU, is_text: false, }, [ContentTypeLabel.AUTOHOTKEY]: { label: ContentTypeLabel.AUTOHOTKEY, is_text: true, }, [ContentTypeLabel.AUTOIT]: { label: ContentTypeLabel.AUTOIT, is_text: true, }, [ContentTypeLabel.AVI]: { label: ContentTypeLabel.AVI, is_text: false, }, [ContentTypeLabel.AVIF]: { label: ContentTypeLabel.AVIF, is_text: false, }, [ContentTypeLabel.AVRO]: { label: ContentTypeLabel.AVRO, is_text: false, }, [ContentTypeLabel.AWK]: { label: ContentTypeLabel.AWK, is_text: true, }, [ContentTypeLabel.AX]: { label: ContentTypeLabel.AX, is_text: false, }, [ContentTypeLabel.BATCH]: { label: ContentTypeLabel.BATCH, is_text: true, }, [ContentTypeLabel.BAZEL]: { label: ContentTypeLabel.BAZEL, is_text: true, }, [ContentTypeLabel.BCAD]: { label: ContentTypeLabel.BCAD, is_text: false, }, [ContentTypeLabel.BIB]: { label: ContentTypeLabel.BIB, is_text: true, }, [ContentTypeLabel.BMP]: { label: ContentTypeLabel.BMP, is_text: false, }, [ContentTypeLabel.BPG]: { label: ContentTypeLabel.BPG, is_text: false, }, [ContentTypeLabel.BPL]: { label: ContentTypeLabel.BPL, is_text: false, }, [ContentTypeLabel.BRAINFUCK]: { label: ContentTypeLabel.BRAINFUCK, is_text: true, }, [ContentTypeLabel.BRF]: { label: ContentTypeLabel.BRF, is_text: false, }, [ContentTypeLabel.BZIP]: { label: ContentTypeLabel.BZIP, is_text: false, }, [ContentTypeLabel.BZIP3]: { label: ContentTypeLabel.BZIP3, is_text: false, }, [ContentTypeLabel.C]: { label: ContentTypeLabel.C, is_text: true, }, [ContentTypeLabel.CAB]: { label: ContentTypeLabel.CAB, is_text: false, }, [ContentTypeLabel.CAD]: { label: ContentTypeLabel.CAD, is_text: false, }, [ContentTypeLabel.CAT]: { label: ContentTypeLabel.CAT, is_text: false, }, [ContentTypeLabel.CDF]: { label: ContentTypeLabel.CDF, is_text: false, }, [ContentTypeLabel.CHM]: { label: ContentTypeLabel.CHM, is_text: false, }, [ContentTypeLabel.CLOJURE]: { label: ContentTypeLabel.CLOJURE, is_text: true, }, [ContentTypeLabel.CMAKE]: { label: ContentTypeLabel.CMAKE, is_text: true, }, [ContentTypeLabel.COBOL]: { label: ContentTypeLabel.COBOL, is_text: true, }, [ContentTypeLabel.COFF]: { label: ContentTypeLabel.COFF, is_text: false, }, [ContentTypeLabel.COFFEESCRIPT]: { label: ContentTypeLabel.COFFEESCRIPT, is_text: true, }, [ContentTypeLabel.COM]: { label: ContentTypeLabel.COM, is_text: false, }, [ContentTypeLabel.CPL]: { label: ContentTypeLabel.CPL, is_text: false, }, [ContentTypeLabel.CPP]: { label: ContentTypeLabel.CPP, is_text: true, }, [ContentTypeLabel.CRT]: { label: ContentTypeLabel.CRT, is_text: false, }, [ContentTypeLabel.CRX]: { label: ContentTypeLabel.CRX, is_text: false, }, [ContentTypeLabel.CS]: { label: ContentTypeLabel.CS, is_text: true, }, [ContentTypeLabel.CSPROJ]: { label: ContentTypeLabel.CSPROJ, is_text: true, }, [ContentTypeLabel.CSS]: { label: ContentTypeLabel.CSS, is_text: true, }, [ContentTypeLabel.CSV]: { label: ContentTypeLabel.CSV, is_text: true, }, [ContentTypeLabel.CTL]: { label: ContentTypeLabel.CTL, is_text: false, }, [ContentTypeLabel.DART]: { label: ContentTypeLabel.DART, is_text: true, }, [ContentTypeLabel.DEB]: { label: ContentTypeLabel.DEB, is_text: false, }, [ContentTypeLabel.DEX]: { label: ContentTypeLabel.DEX, is_text: false, }, [ContentTypeLabel.DEY]: { label: ContentTypeLabel.DEY, is_text: false, }, [ContentTypeLabel.DICOM]: { label: ContentTypeLabel.DICOM, is_text: false, }, [ContentTypeLabel.DIFF]: { label: ContentTypeLabel.DIFF, is_text: true, }, [ContentTypeLabel.DIRECTORY]: { label: ContentTypeLabel.DIRECTORY, is_text: false, }, [ContentTypeLabel.DJANGO]: { label: ContentTypeLabel.DJANGO, is_text: false, }, [ContentTypeLabel.DLL]: { label: ContentTypeLabel.DLL, is_text: false, }, [ContentTypeLabel.DM]: { label: ContentTypeLabel.DM, is_text: true, }, [ContentTypeLabel.DMG]: { label: ContentTypeLabel.DMG, is_text: false, }, [ContentTypeLabel.DMIGD]: { label: ContentTypeLabel.DMIGD, is_text: true, }, [ContentTypeLabel.DMSCRIPT]: { label: ContentTypeLabel.DMSCRIPT, is_text: true, }, [ContentTypeLabel.DOC]: { label: ContentTypeLabel.DOC, is_text: false, }, [ContentTypeLabel.DOCKERFILE]: { label: ContentTypeLabel.DOCKERFILE, is_text: true, }, [ContentTypeLabel.DOCX]: { label: ContentTypeLabel.DOCX, is_text: false, }, [ContentTypeLabel.DOSMBR]: { label: ContentTypeLabel.DOSMBR, is_text: false, }, [ContentTypeLabel.DOTX]: { label: ContentTypeLabel.DOTX, is_text: false, }, [ContentTypeLabel.DSSTORE]: { label: ContentTypeLabel.DSSTORE, is_text: false, }, [ContentTypeLabel.DWG]: { label: ContentTypeLabel.DWG, is_text: false, }, [ContentTypeLabel.DXF]: { label: ContentTypeLabel.DXF, is_text: true, }, [ContentTypeLabel.DYLIB]: { label: ContentTypeLabel.DYLIB, is_text: false, }, [ContentTypeLabel.EBML]: { label: ContentTypeLabel.EBML, is_text: false, }, [ContentTypeLabel.ELF]: { label: ContentTypeLabel.ELF, is_text: false, }, [ContentTypeLabel.ELIXIR]: { label: ContentTypeLabel.ELIXIR, is_text: true, }, [ContentTypeLabel.EMF]: { label: ContentTypeLabel.EMF, is_text: false, }, [ContentTypeLabel.EML]: { label: ContentTypeLabel.EML, is_text: true, }, [ContentTypeLabel.EMPTY]: { label: ContentTypeLabel.EMPTY, is_text: false, }, [ContentTypeLabel.EPUB]: { label: ContentTypeLabel.EPUB, is_text: false, }, [ContentTypeLabel.ERB]: { label: ContentTypeLabel.ERB, is_text: true, }, [ContentTypeLabel.ERLANG]: { label: ContentTypeLabel.ERLANG, is_text: true, }, [ContentTypeLabel.ESE]: { label: ContentTypeLabel.ESE, is_text: false, }, [ContentTypeLabel.EXE]: { label: ContentTypeLabel.EXE, is_text: false, }, [ContentTypeLabel.EXP]: { label: ContentTypeLabel.EXP, is_text: false, }, [ContentTypeLabel.FLAC]: { label: ContentTypeLabel.FLAC, is_text: false, }, [ContentTypeLabel.FLUTTER]: { label: ContentTypeLabel.FLUTTER, is_text: false, }, [ContentTypeLabel.FLV]: { label: ContentTypeLabel.FLV, is_text: false, }, [ContentTypeLabel.FORTRAN]: { label: ContentTypeLabel.FORTRAN, is_text: true, }, [ContentTypeLabel.FPX]: { label: ContentTypeLabel.FPX, is_text: false, }, [ContentTypeLabel.GEMFILE]: { label: ContentTypeLabel.GEMFILE, is_text: true, }, [ContentTypeLabel.GEMSPEC]: { label: ContentTypeLabel.GEMSPEC, is_text: true, }, [ContentTypeLabel.GIF]: { label: ContentTypeLabel.GIF, is_text: false, }, [ContentTypeLabel.GITATTRIBUTES]: { label: ContentTypeLabel.GITATTRIBUTES, is_text: true, }, [ContentTypeLabel.GITMODULES]: { label: ContentTypeLabel.GITMODULES, is_text: true, }, [ContentTypeLabel.GLEAM]: { label: ContentTypeLabel.GLEAM, is_text: true, }, [ContentTypeLabel.GO]: { label: ContentTypeLabel.GO, is_text: true, }, [ContentTypeLabel.GPX]: { label: ContentTypeLabel.GPX, is_text: false, }, [ContentTypeLabel.GRADLE]: { label: ContentTypeLabel.GRADLE, is_text: true, }, [ContentTypeLabel.GROOVY]: { label: ContentTypeLabel.GROOVY, is_text: true, }, [ContentTypeLabel.GZIP]: { label: ContentTypeLabel.GZIP, is_text: false, }, [ContentTypeLabel.H]: { label: ContentTypeLabel.H, is_text: true, }, [ContentTypeLabel.H5]: { label: ContentTypeLabel.H5, is_text: false, }, [ContentTypeLabel.HANDLEBARS]: { label: ContentTypeLabel.HANDLEBARS, is_text: true, }, [ContentTypeLabel.HASKELL]: { label: ContentTypeLabel.HASKELL, is_text: true, }, [ContentTypeLabel.HCL]: { label: ContentTypeLabel.HCL, is_text: true, }, [ContentTypeLabel.HEIF]: { label: ContentTypeLabel.HEIF, is_text: false, }, [ContentTypeLabel.HFS]: { label: ContentTypeLabel.HFS, is_text: false, }, [ContentTypeLabel.HLP]: { label: ContentTypeLabel.HLP, is_text: false, }, [ContentTypeLabel.HPP]: { label: ContentTypeLabel.HPP, is_text: true, }, [ContentTypeLabel.HTA]: { label: ContentTypeLabel.HTA, is_text: false, }, [ContentTypeLabel.HTACCESS]: { label: ContentTypeLabel.HTACCESS, is_text: true, }, [ContentTypeLabel.HTML]: { label: ContentTypeLabel.HTML, is_text: true, }, [ContentTypeLabel.HVE]: { label: ContentTypeLabel.HVE, is_text: false, }, [ContentTypeLabel.HWP]: { label: ContentTypeLabel.HWP, is_text: false, }, [ContentTypeLabel.ICC]: { label: ContentTypeLabel.ICC, is_text: false, }, [ContentTypeLabel.ICNS]: { label: ContentTypeLabel.ICNS, is_text: false, }, [ContentTypeLabel.ICO]: { label: ContentTypeLabel.ICO, is_text: false, }, [ContentTypeLabel.ICS]: { label: ContentTypeLabel.ICS, is_text: true, }, [ContentTypeLabel.IGNOREFILE]: { label: ContentTypeLabel.IGNOREFILE, is_text: true, }, [ContentTypeLabel.IMG]: { label: ContentTypeLabel.IMG, is_text: false, }, [ContentTypeLabel.INI]: { label: ContentTypeLabel.INI, is_text: true, }, [ContentTypeLabel.INTERNETSHORTCUT]: { label: ContentTypeLabel.INTERNETSHORTCUT, is_text: true, }, [ContentTypeLabel.IOSAPP]: { label: ContentTypeLabel.IOSAPP, is_text: false, }, [ContentTypeLabel.IPYNB]: { label: ContentTypeLabel.IPYNB, is_text: true, }, [ContentTypeLabel.ISO]: { label: ContentTypeLabel.ISO, is_text: false, }, [ContentTypeLabel.JAR]: { label: ContentTypeLabel.JAR, is_text: false, }, [ContentTypeLabel.JAVA]: { label: ContentTypeLabel.JAVA, is_text: true, }, [ContentTypeLabel.JAVABYTECODE]: { label: ContentTypeLabel.JAVABYTECODE, is_text: false, }, [ContentTypeLabel.JAVASCRIPT]: { label: ContentTypeLabel.JAVASCRIPT, is_text: true, }, [ContentTypeLabel.JINJA]: { label: ContentTypeLabel.JINJA, is_text: true, }, [ContentTypeLabel.JNG]: { label: ContentTypeLabel.JNG, is_text: false, }, [ContentTypeLabel.JNLP]: { label: ContentTypeLabel.JNLP, is_text: true, }, [ContentTypeLabel.JP2]: { label: ContentTypeLabel.JP2, is_text: false, }, [ContentTypeLabel.JPEG]: { label: ContentTypeLabel.JPEG, is_text: false, }, [ContentTypeLabel.JSON]: { label: ContentTypeLabel.JSON, is_text: true, }, [ContentTypeLabel.JSONC]: { label: ContentTypeLabel.JSONC, is_text: false, }, [ContentTypeLabel.JSONL]: { label: ContentTypeLabel.JSONL, is_text: true, }, [ContentTypeLabel.JSX]: { label: ContentTypeLabel.JSX, is_text: true, }, [ContentTypeLabel.JULIA]: { label: ContentTypeLabel.JULIA, is_text: true, }, [ContentTypeLabel.JXL]: { label: ContentTypeLabel.JXL, is_text: false, }, [ContentTypeLabel.KO]: { label: ContentTypeLabel.KO, is_text: false, }, [ContentTypeLabel.KOTLIN]: { label: ContentTypeLabel.KOTLIN, is_text: true, }, [ContentTypeLabel.KS]: { label: ContentTypeLabel.KS, is_text: true, }, [ContentTypeLabel.LATEX]: { label: ContentTypeLabel.LATEX, is_text: true, }, [ContentTypeLabel.LATEXAUX]: { label: ContentTypeLabel.LATEXAUX, is_text: false, }, [ContentTypeLabel.LESS]: { label: ContentTypeLabel.LESS, is_text: false, }, [ContentTypeLabel.LHA]: { label: ContentTypeLabel.LHA, is_text: false, }, [ContentTypeLabel.LICENSE]: { label: ContentTypeLabel.LICENSE, is_text: true, }, [ContentTypeLabel.LISP]: { label: ContentTypeLabel.LISP, is_text: true, }, [ContentTypeLabel.LITCS]: { label: ContentTypeLabel.LITCS, is_text: false, }, [ContentTypeLabel.LNK]: { label: ContentTypeLabel.LNK, is_text: false, }, [ContentTypeLabel.LOCK]: { label: ContentTypeLabel.LOCK, is_text: true, }, [ContentTypeLabel.LRZ]: { label: ContentTypeLabel.LRZ, is_text: false, }, [ContentTypeLabel.LUA]: { label: ContentTypeLabel.LUA, is_text: true, }, [ContentTypeLabel.LZ]: { label: ContentTypeLabel.LZ, is_text: false, }, [ContentTypeLabel.LZ4]: { label: ContentTypeLabel.LZ4, is_text: false, }, [ContentTypeLabel.LZX]: { label: ContentTypeLabel.LZX, is_text: false, }, [ContentTypeLabel.M3U]: { label: ContentTypeLabel.M3U, is_text: true, }, [ContentTypeLabel.M4]: { label: ContentTypeLabel.M4, is_text: true, }, [ContentTypeLabel.MACHO]: { label: ContentTypeLabel.MACHO, is_text: false, }, [ContentTypeLabel.MAFF]: { label: ContentTypeLabel.MAFF, is_text: false, }, [ContentTypeLabel.MAKEFILE]: { label: ContentTypeLabel.MAKEFILE, is_text: true, }, [ContentTypeLabel.MARKDOWN]: { label: ContentTypeLabel.MARKDOWN, is_text: true, }, [ContentTypeLabel.MATLAB]: { label: ContentTypeLabel.MATLAB, is_text: true, }, [ContentTypeLabel.MHT]: { label: ContentTypeLabel.MHT, is_text: true, }, [ContentTypeLabel.MIDI]: { label: ContentTypeLabel.MIDI, is_text: false, }, [ContentTypeLabel.MKV]: { label: ContentTypeLabel.MKV, is_text: false, }, [ContentTypeLabel.MP2]: { label: ContentTypeLabel.MP2, is_text: false, }, [ContentTypeLabel.MP3]: { label: ContentTypeLabel.MP3, is_text: false, }, [ContentTypeLabel.MP4]: { label: ContentTypeLabel.MP4, is_text: false, }, [ContentTypeLabel.MPEGTS]: { label: ContentTypeLabel.MPEGTS, is_text: false, }, [ContentTypeLabel.MSCOMPRESS]: { label: ContentTypeLabel.MSCOMPRESS, is_text: false, }, [ContentTypeLabel.MSI]: { label: ContentTypeLabel.MSI, is_text: false, }, [ContentTypeLabel.MSIX]: { label: ContentTypeLabel.MSIX, is_text: false, }, [ContentTypeLabel.MST]: { label: ContentTypeLabel.MST, is_text: false, }, [ContentTypeLabel.MUI]: { label: ContentTypeLabel.MUI, is_text: false, }, [ContentTypeLabel.MUM]: { label: ContentTypeLabel.MUM, is_text: true, }, [ContentTypeLabel.MUN]: { label: ContentTypeLabel.MUN, is_text: false, }, [ContentTypeLabel.NIM]: { label: ContentTypeLabel.NIM, is_text: false, }, [ContentTypeLabel.NPY]: { label: ContentTypeLabel.NPY, is_text: false, }, [ContentTypeLabel.NPZ]: { label: ContentTypeLabel.NPZ, is_text: false, }, [ContentTypeLabel.NULL]: { label: ContentTypeLabel.NULL, is_text: false, }, [ContentTypeLabel.NUPKG]: { label: ContentTypeLabel.NUPKG, is_text: false, }, [ContentTypeLabel.OBJECT]: { label: ContentTypeLabel.OBJECT, is_text: false, }, [ContentTypeLabel.OBJECTIVEC]: { label: ContentTypeLabel.OBJECTIVEC, is_text: true, }, [ContentTypeLabel.OCAML]: { label: ContentTypeLabel.OCAML, is_text: true, }, [ContentTypeLabel.OCX]: { label: ContentTypeLabel.OCX, is_text: false, }, [ContentTypeLabel.ODEX]: { label: ContentTypeLabel.ODEX, is_text: false, }, [ContentTypeLabel.ODIN]: { label: ContentTypeLabel.ODIN, is_text: true, }, [ContentTypeLabel.ODP]: { label: ContentTypeLabel.ODP, is_text: false, }, [ContentTypeLabel.ODS]: { label: ContentTypeLabel.ODS, is_text: false, }, [ContentTypeLabel.ODT]: { label: ContentTypeLabel.ODT, is_text: false, }, [ContentTypeLabel.OGG]: { label: ContentTypeLabel.OGG, is_text: false, }, [ContentTypeLabel.OLE]: { label: ContentTypeLabel.OLE, is_text: false, }, [ContentTypeLabel.ONE]: { label: ContentTypeLabel.ONE, is_text: false, }, [ContentTypeLabel.ONNX]: { label: ContentTypeLabel.ONNX, is_text: false, }, [ContentTypeLabel.OOXML]: { label: ContentTypeLabel.OOXML, is_text: false, }, [ContentTypeLabel.OTF]: { label: ContentTypeLabel.OTF, is_text: false, }, [ContentTypeLabel.OUTLOOK]: { label: ContentTypeLabel.OUTLOOK, is_text: false, }, [ContentTypeLabel.PALMOS]: { label: ContentTypeLabel.PALMOS, is_text: false, }, [ContentTypeLabel.PARQUET]: { label: ContentTypeLabel.PARQUET, is_text: false, }, [ContentTypeLabel.PASCAL]: { label: ContentTypeLabel.PASCAL, is_text: true, }, [ContentTypeLabel.PBM]: { label: ContentTypeLabel.PBM, is_text: false, }, [ContentTypeLabel.PCAP]: { label: ContentTypeLabel.PCAP, is_text: false, }, [ContentTypeLabel.PDB]: { label: ContentTypeLabel.PDB, is_text: false, }, [ContentTypeLabel.PDF]: { label: ContentTypeLabel.PDF, is_text: false, }, [ContentTypeLabel.PEBIN]: { label: ContentTypeLabel.PEBIN, is_text: false, }, [ContentTypeLabel.PEM]: { label: ContentTypeLabel.PEM, is_text: true, }, [ContentTypeLabel.PERL]: { label: ContentTypeLabel.PERL, is_text: true, }, [ContentTypeLabel.PGP]: { label: ContentTypeLabel.PGP, is_text: false, }, [ContentTypeLabel.PHP]: { label: ContentTypeLabel.PHP, is_text: true, }, [ContentTypeLabel.PICKLE]: { label: ContentTypeLabel.PICKLE, is_text: false, }, [ContentTypeLabel.PNG]: { label: ContentTypeLabel.PNG, is_text: false, }, [ContentTypeLabel.PO]: { label: ContentTypeLabel.PO, is_text: true, }, [ContentTypeLabel.POSTSCRIPT]: { label: ContentTypeLabel.POSTSCRIPT, is_text: false, }, [ContentTypeLabel.POWERSHELL]: { label: ContentTypeLabel.POWERSHELL, is_text: true, }, [ContentTypeLabel.PPT]: { label: ContentTypeLabel.PPT, is_text: false, }, [ContentTypeLabel.PPTX]: { label: ContentTypeLabel.PPTX, is_text: false, }, [ContentTypeLabel.PRINTFOX]: { label: ContentTypeLabel.PRINTFOX, is_text: false, }, [ContentTypeLabel.PROLOG]: { label: ContentTypeLabel.PROLOG, is_text: true, }, [ContentTypeLabel.PROTEINDB]: { label: ContentTypeLabel.PROTEINDB, is_text: true, }, [ContentTypeLabel.PROTO]: { label: ContentTypeLabel.PROTO, is_text: true, }, [ContentTypeLabel.PROTOBUF]: { label: ContentTypeLabel.PROTOBUF, is_text: false, }, [ContentTypeLabel.PSD]: { label: ContentTypeLabel.PSD, is_text: false, }, [ContentTypeLabel.PUB]: { label: ContentTypeLabel.PUB, is_text: false, }, [ContentTypeLabel.PYTHON]: { label: ContentTypeLabel.PYTHON, is_text: true, }, [ContentTypeLabel.PYTHONBYTECODE]: { label: ContentTypeLabel.PYTHONBYTECODE, is_text: false, }, [ContentTypeLabel.PYTHONPAR]: { label: ContentTypeLabel.PYTHONPAR, is_text: false, }, [ContentTypeLabel.PYTORCH]: { label: ContentTypeLabel.PYTORCH, is_text: false, }, [ContentTypeLabel.QOI]: { label: ContentTypeLabel.QOI, is_text: false, }, [ContentTypeLabel.QT]: { label: ContentTypeLabel.QT, is_text: false, }, [ContentTypeLabel.R]: { label: ContentTypeLabel.R, is_text: true, }, [ContentTypeLabel.RANDOMASCII]: { label: ContentTypeLabel.RANDOMASCII, is_text: true, }, [ContentTypeLabel.RANDOMBYTES]: { label: ContentTypeLabel.RANDOMBYTES, is_text: false, }, [ContentTypeLabel.RANDOMTXT]: { label: ContentTypeLabel.RANDOMTXT, is_text: true, }, [ContentTypeLabel.RAR]: { label: ContentTypeLabel.RAR, is_text: false, }, [ContentTypeLabel.RDF]: { label: ContentTypeLabel.RDF, is_text: true, }, [ContentTypeLabel.RDP]: { label: ContentTypeLabel.RDP, is_text: false, }, [ContentTypeLabel.RIFF]: { label: ContentTypeLabel.RIFF, is_text: false, }, [ContentTypeLabel.RLIB]: { label: ContentTypeLabel.RLIB, is_text: false, }, [ContentTypeLabel.RLL]: { label: ContentTypeLabel.RLL, is_text: false, }, [ContentTypeLabel.RPM]: { label: ContentTypeLabel.RPM, is_text: false, }, [ContentTypeLabel.RST]: { label: ContentTypeLabel.RST, is_text: true, }, [ContentTypeLabel.RTF]: { label: ContentTypeLabel.RTF, is_text: true, }, [ContentTypeLabel.RUBY]: { label: ContentTypeLabel.RUBY, is_text: true, }, [ContentTypeLabel.RUST]: { label: ContentTypeLabel.RUST, is_text: true, }, [ContentTypeLabel.RZIP]: { label: ContentTypeLabel.RZIP, is_text: false, }, [ContentTypeLabel.SCALA]: { label: ContentTypeLabel.SCALA, is_text: true, }, [ContentTypeLabel.SCHEME]: { label: ContentTypeLabel.SCHEME, is_text: false, }, [ContentTypeLabel.SCR]: { label: ContentTypeLabel.SCR, is_text: false, }, [ContentTypeLabel.SCRIPTWSF]: { label: ContentTypeLabel.SCRIPTWSF, is_text: false, }, [ContentTypeLabel.SCSS]: { label: ContentTypeLabel.SCSS, is_text: true, }, [ContentTypeLabel.SEVENZIP]: { label: ContentTypeLabel.SEVENZIP, is_text: false, }, [ContentTypeLabel.SGML]: { label: ContentTypeLabel.SGML, is_text: true, }, [ContentTypeLabel.SH3D]: { label: ContentTypeLabel.SH3D, is_text: false, }, [ContentTypeLabel.SHELL]: { label: ContentTypeLabel.SHELL, is_text: true, }, [ContentTypeLabel.SMALI]: { label: ContentTypeLabel.SMALI, is_text: true, }, [ContentTypeLabel.SNAP]: { label: ContentTypeLabel.SNAP, is_text: false, }, [ContentTypeLabel.SO]: { label: ContentTypeLabel.SO, is_text: false, }, [ContentTypeLabel.SOLIDITY]: { label: ContentTypeLabel.SOLIDITY, is_text: true, }, [ContentTypeLabel.SQL]: { label: ContentTypeLabel.SQL, is_text: true, }, [ContentTypeLabel.SQLITE]: { label: ContentTypeLabel.SQLITE, is_text: false, }, [ContentTypeLabel.SQUASHFS]: { label: ContentTypeLabel.SQUASHFS, is_text: false, }, [ContentTypeLabel.SRT]: { label: ContentTypeLabel.SRT, is_text: true, }, [ContentTypeLabel.STLBINARY]: { label: ContentTypeLabel.STLBINARY, is_text: false, }, [ContentTypeLabel.STLTEXT]: { label: ContentTypeLabel.STLTEXT, is_text: true, }, [ContentTypeLabel.SUM]: { label: ContentTypeLabel.SUM, is_text: true, }, [ContentTypeLabel.SVD]: { label: ContentTypeLabel.SVD, is_text: false, }, [ContentTypeLabel.SVG]: { label: ContentTypeLabel.SVG, is_text: true, }, [ContentTypeLabel.SWF]: { label: ContentTypeLabel.SWF, is_text: false, }, [ContentTypeLabel.SWIFT]: { label: ContentTypeLabel.SWIFT, is_text: true, }, [ContentTypeLabel.SYMLINK]: { label: ContentTypeLabel.SYMLINK, is_text: false, }, [ContentTypeLabel.SYMLINKTEXT]: { label: ContentTypeLabel.SYMLINKTEXT, is_text: true, }, [ContentTypeLabel.SYS]: { label: ContentTypeLabel.SYS, is_text: false, }, [ContentTypeLabel.TAR]: { label: ContentTypeLabel.TAR, is_text: false, }, [ContentTypeLabel.TCL]: { label: ContentTypeLabel.TCL, is_text: true, }, [ContentTypeLabel.TEXTPROTO]: { label: ContentTypeLabel.TEXTPROTO, is_text: true, }, [ContentTypeLabel.TGA]: { label: ContentTypeLabel.TGA, is_text: false, }, [ContentTypeLabel.THUMBSDB]: { label: ContentTypeLabel.THUMBSDB, is_text: false, }, [ContentTypeLabel.TIFF]: { label: ContentTypeLabel.TIFF, is_text: false, }, [ContentTypeLabel.TMDX]: { label: ContentTypeLabel.TMDX, is_text: false, }, [ContentTypeLabel.TOML]: { label: ContentTypeLabel.TOML, is_text: true, }, [ContentTypeLabel.TORRENT]: { label: ContentTypeLabel.TORRENT, is_text: false, }, [ContentTypeLabel.TROFF]: { label: ContentTypeLabel.TROFF, is_text: false, }, [ContentTypeLabel.TSV]: { label: ContentTypeLabel.TSV, is_text: true, }, [ContentTypeLabel.TSX]: { label: ContentTypeLabel.TSX, is_text: true, }, [ContentTypeLabel.TTF]: { label: ContentTypeLabel.TTF, is_text: false, }, [ContentTypeLabel.TWIG]: { label: ContentTypeLabel.TWIG, is_text: true, }, [ContentTypeLabel.TXT]: { label: ContentTypeLabel.TXT, is_text: true, }, [ContentTypeLabel.TXTASCII]: { label: ContentTypeLabel.TXTASCII, is_text: true, }, [ContentTypeLabel.TXTUTF16]: { label: ContentTypeLabel.TXTUTF16, is_text: true, }, [ContentTypeLabel.TXTUTF8]: { label: ContentTypeLabel.TXTUTF8, is_text: true, }, [ContentTypeLabel.TYPESCRIPT]: { label: ContentTypeLabel.TYPESCRIPT, is_text: true, }, [ContentTypeLabel.UDF]: { label: ContentTypeLabel.UDF, is_text: false, }, [ContentTypeLabel.UNDEFINED]: { label: ContentTypeLabel.UNDEFINED, is_text: false, }, [ContentTypeLabel.UNIXCOMPRESS]: { label: ContentTypeLabel.UNIXCOMPRESS, is_text: false, }, [ContentTypeLabel.UNKNOWN]: { label: ContentTypeLabel.UNKNOWN, is_text: false, }, [ContentTypeLabel.VBA]: { label: ContentTypeLabel.VBA, is_text: true, }, [ContentTypeLabel.VBE]: { label: ContentTypeLabel.VBE, is_text: false, }, [ContentTypeLabel.VCARD]: { label: ContentTypeLabel.VCARD, is_text: false, }, [ContentTypeLabel.VCS]: { label: ContentTypeLabel.VCS, is_text: false, }, [ContentTypeLabel.VCXPROJ]: { label: ContentTypeLabel.VCXPROJ, is_text: true, }, [ContentTypeLabel.VERILOG]: { label: ContentTypeLabel.VERILOG, is_text: true, }, [ContentTypeLabel.VHD]: { label: ContentTypeLabel.VHD, is_text: false, }, [ContentTypeLabel.VHDL]: { label: ContentTypeLabel.VHDL, is_text: true, }, [ContentTypeLabel.VISIO]: { label: ContentTypeLabel.VISIO, is_text: false, }, [ContentTypeLabel.VTT]: { label: ContentTypeLabel.VTT, is_text: true, }, [ContentTypeLabel.VUE]: { label: ContentTypeLabel.VUE, is_text: true, }, [ContentTypeLabel.WAD]: { label: ContentTypeLabel.WAD, is_text: false, }, [ContentTypeLabel.WASM]: { label: ContentTypeLabel.WASM, is_text: false, }, [ContentTypeLabel.WAV]: { label: ContentTypeLabel.WAV, is_text: false, }, [ContentTypeLabel.WEBM]: { label: ContentTypeLabel.WEBM, is_text: false, }, [ContentTypeLabel.WEBP]: { label: ContentTypeLabel.WEBP, is_text: false, }, [ContentTypeLabel.WEBTEMPLATE]: { label: ContentTypeLabel.WEBTEMPLATE, is_text: true, }, [ContentTypeLabel.WIM]: { label: ContentTypeLabel.WIM, is_text: false, }, [ContentTypeLabel.WINREGISTRY]: { label: ContentTypeLabel.WINREGISTRY, is_text: true, }, [ContentTypeLabel.WMA]: { label: ContentTypeLabel.WMA, is_text: false, }, [ContentTypeLabel.WMF]: { label: ContentTypeLabel.WMF, is_text: false, }, [ContentTypeLabel.WMV]: { label: ContentTypeLabel.WMV, is_text: false, }, [ContentTypeLabel.WOFF]: { label: ContentTypeLabel.WOFF, is_text: false, }, [ContentTypeLabel.WOFF2]: { label: ContentTypeLabel.WOFF2, is_text: false, }, [ContentTypeLabel.XAR]: { label: ContentTypeLabel.XAR, is_text: false, }, [ContentTypeLabel.XCF]: { label: ContentTypeLabel.XCF, is_text: false, }, [ContentTypeLabel.XLS]: { label: ContentTypeLabel.XLS, is_text: false, }, [ContentTypeLabel.XLSB]: { label: ContentTypeLabel.XLSB, is_text: false, }, [ContentTypeLabel.XLSX]: { label: ContentTypeLabel.XLSX, is_text: false, }, [ContentTypeLabel.XML]: { label: ContentTypeLabel.XML, is_text: true, }, [ContentTypeLabel.XPI]: { label: ContentTypeLabel.XPI, is_text: false, }, [ContentTypeLabel.XSD]: { label: ContentTypeLabel.XSD, is_text: false, }, [ContentTypeLabel.XZ]: { label: ContentTypeLabel.XZ, is_text: false, }, [ContentTypeLabel.YAML]: { label: ContentTypeLabel.YAML, is_text: true, }, [ContentTypeLabel.YARA]: { label: ContentTypeLabel.YARA, is_text: true, }, [ContentTypeLabel.ZIG]: { label: ContentTypeLabel.ZIG, is_text: true, }, [ContentTypeLabel.ZIP]: { label: ContentTypeLabel.ZIP, is_text: false, }, [ContentTypeLabel.ZLIBSTREAM]: { label: ContentTypeLabel.ZLIBSTREAM, is_text: false, }, [ContentTypeLabel.ZST]: { label: ContentTypeLabel.ZST, is_text: false, }, }), }; ================================================ FILE: js/src/magika-options.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. export interface MagikaOptions { modelURL?: string; modelPath?: string; modelConfigURL?: string; modelConfigPath?: string; } ================================================ FILE: js/src/magika-prediction.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { ContentTypeInfo } from "./content-type-info.js"; import { ContentTypeLabel } from "./content-type-label.js"; import { OverwriteReason } from "./overwrite-reason.js"; export interface MagikaPrediction { dl: ContentTypeInfo; output: ContentTypeInfo; score: number; overwrite_reason: OverwriteReason; scores_map?: Partial>; } ================================================ FILE: js/src/magika-result.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { MagikaPrediction } from "./magika-prediction.js"; import { Status } from "./status.js"; export interface MagikaResult { path: string; status: Status; prediction: MagikaPrediction; } ================================================ FILE: js/src/model-config-node.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import * as fs from "fs/promises"; import { ModelConfig } from "./model-config.js"; export class ModelConfigNode extends ModelConfig { async loadFile(configPath: string): Promise { if (this.loaded) { return; } const config = JSON.parse((await fs.readFile(configPath)).toString()); this.setConfig(config); this.loaded = true; } } ================================================ FILE: js/src/model-config.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { ContentTypeLabel } from "./content-type-label.js"; export class ModelConfig { beg_size: number = 0; mid_size: number = 0; end_size: number = 0; use_inputs_at_offsets: boolean = false; medium_confidence_threshold: number = 0; min_file_size_for_dl: number = 0; padding_token: number = -1; block_size: number = 0; target_labels_space: ContentTypeLabel[] = []; thresholds: Partial> = {}; overwrite_map: Partial> = {}; loaded: boolean = false; async loadUrl(configURL: string): Promise { if (this.loaded) { return; } const config = (await (await fetch(configURL)).json()) as Record< string, any >; this.setConfig(config); this.loaded = true; } protected setConfig(config: Record): void { this.beg_size = config.beg_size; this.mid_size = config.mid_size; this.end_size = config.end_size; this.use_inputs_at_offsets = config.use_inputs_at_offsets; this.medium_confidence_threshold = config.medium_confidence_threshold; this.min_file_size_for_dl = config.min_file_size_for_dl; this.padding_token = config.padding_token; this.block_size = config.block_size; this.target_labels_space = []; for (const label of config.target_labels_space as string[]) { this.target_labels_space.push(label as ContentTypeLabel); } for (const [label, th] of Object.entries( config.thresholds as Record, )) { this.thresholds[label as ContentTypeLabel] = th; } for (const [label, target_label] of Object.entries( config.overwrite_map as Record, )) { this.overwrite_map[label as ContentTypeLabel] = target_label as ContentTypeLabel; } if ( !( this.beg_size > 0 && this.mid_size === 0 && this.end_size > 0 && !this.use_inputs_at_offsets && this.medium_confidence_threshold > 0 && this.min_file_size_for_dl > 0 && this.padding_token != -1 && this.block_size > 0 ) ) { throw new Error("Invalid config"); } } } ================================================ FILE: js/src/model-features.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. export class ModelFeatures { beg_ints: Uint16Array; end_ints: Uint16Array; locked: { beg: boolean; end: boolean }; constructor( beg_size: number, mid_size: number, end_size: number, padding_token: number, use_inputs_at_offsets: boolean, ) { if (mid_size != 0) { throw new Error( `Assertion failed: This implementation does not support mid_size (${mid_size}) != 0 model config.`, ); } if (use_inputs_at_offsets) { throw new Error( `Assertion failed: This implementation does not support use_inputs_at_offsets = true model config.`, ); } this.beg_ints = new Uint16Array(beg_size).fill(padding_token); this.end_ints = new Uint16Array(end_size).fill(padding_token); this.locked = { beg: false, end: false }; } withStart(data: Uint8Array, offset: number): this { if (!this.locked.beg) { this.locked.beg = true; this.beg_ints.set(data, offset); } return this; } withEnd(data: Uint8Array, offset: number): this { if (!this.locked.end) { this.locked.end = true; this.end_ints.set(data, offset); } return this; } toArray(): number[] { return [...this.beg_ints, ...this.end_ints]; } } ================================================ FILE: js/src/model-node.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import * as tf from "@tensorflow/tfjs"; import * as tfn from "@tensorflow/tfjs-node"; import { Model } from "./model.js"; export class ModelNode extends Model { async loadFile(modelPath: string): Promise { if (!this.model) { const handler = tfn.io.fileSystem(modelPath); this.model = await tf.loadGraphModel(handler); } } } ================================================ FILE: js/src/model-prediction.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { ContentTypeLabel } from "./content-type-label.js"; export interface ModelPrediction { label: ContentTypeLabel; score: number; scores_map: Partial>; } ================================================ FILE: js/src/model.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import * as tf from "@tensorflow/tfjs"; import { GraphModel } from "@tensorflow/tfjs"; import { ModelConfig } from "./model-config.js"; import { ModelPrediction } from "./model-prediction.js"; import { ModelFeatures } from "./model-features.js"; import { ContentTypeLabel } from "./content-type-label.js"; export class Model { model?: GraphModel; constructor(public model_config: ModelConfig) {} async loadUrl(modelURL: string): Promise { if (!this.model) { this.model = await tf.loadGraphModel(modelURL); } } async predict(features: ModelFeatures): Promise { if (!this.model) { throw new Error("model has not been loaded"); } let features_array = features.toArray(); const modelInput = tf.tensor( [features_array], [1, features_array.length], "int32", ); const modelOutput = tf.squeeze( (await this.model.executeAsync(modelInput)) as any, ); const maxScoreIndexTensor = tf.argMax(modelOutput); const maxScoreIndex = maxScoreIndexTensor.dataSync()[0]; const rawScores = modelOutput.dataSync(); maxScoreIndexTensor.dispose(); modelInput.dispose(); modelOutput.dispose(); const maxScoreLabel = this.model_config.target_labels_space[maxScoreIndex]; const maxScore = rawScores[maxScoreIndex]; if (rawScores.length != this.model_config.target_labels_space.length) { throw new Error( `Assertion failed: Expected rawScores.length (${rawScores.length}) to have the same length of the targets_label_space (${this.model_config.target_labels_space.length})`, ); } let scores_map: Partial> = {}; for (let i = 0; i < rawScores.length; i++) { const label: ContentTypeLabel = this.model_config.target_labels_space[i]; const score: number = rawScores[i]; scores_map[label] = score; } return { label: maxScoreLabel, score: maxScore, scores_map: scores_map }; } } ================================================ FILE: js/src/overwrite-reason.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. export enum OverwriteReason { NONE = "none", LOW_CONFIDENCE = "low_confidence", OVERWRITE_MAP = "overwrite_map", } ================================================ FILE: js/src/prediction-mode.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. export enum PredictionMode { BEST_GUESS = "best_guess", MEDIUM_CONFIDENCE = "medium_confidence", HIGH_CONFIDENCE = "high_confidence", } ================================================ FILE: js/src/status.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. export enum Status { // No errors. OK = "ok", // Used when a file path does not exist. FILE_NOT_FOUND_ERROR = "file-not-found-error", //Used when a file path exists, but there are permission issues, e.g., can't read file. PERMISSION_ERROR = "permission-error", // Represents a generic error-like unknown status. UNKNOWN = "unknown", } ================================================ FILE: js/test/features-extraction-vs-reference.test.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { beforeAll, describe, expect, it } from "@jest/globals"; import { MagikaNode as Magika } from "../magika-node.js"; import * as utils from "./utils.js"; import { ModelFeatures } from "../src/model-features.js"; const FEATURES_EXTRACTION_EXAMPLES: FeaturesExtractionExamples = [ ...parseGzippedFeaturesExtractionExamples(), ]; class TestableMagika extends Magika { public static extractFeaturesFromBytes( fileBytes: Uint8Array, beg_size: number, mid_size: number, end_size: number, padding_token: number, block_size: number, use_inputs_at_offsets: boolean, ): ModelFeatures { return Magika._extractFeaturesFromBytes( fileBytes, beg_size, mid_size, end_size, padding_token, block_size, use_inputs_at_offsets, ); } } describe("Magika -- features extraction vs. reference", () => { let magika: Magika; const repoRootDir = "../"; beforeAll(async () => { magika = await Magika.create(); }); it.each(FEATURES_EXTRACTION_EXAMPLES)( "check features extraction vs. reference", async (example) => { if (example.args.mid_size != 0 || example.args.use_inputs_at_offsets) { // We do not support these settings at the moment. return; } const fileBytes = Buffer.from(example.content_base64, "base64"); const features = TestableMagika.extractFeaturesFromBytes( fileBytes, example.args.beg_size, example.args.mid_size, example.args.end_size, example.args.padding_token, example.args.block_size, example.args.use_inputs_at_offsets, ); expect(features.beg_ints).toEqual(new Uint16Array(example.features.beg)); expect(features.end_ints).toEqual(new Uint16Array(example.features.end)); }, ); }); interface FeaturesExtractionExample { args: FeaturesExtractionExampleArgs; metadata: FeaturesExtractionExampleMetadata; content_base64: string; features: ExampleModelFeatures; } interface FeaturesExtractionExampleArgs { beg_size: number; mid_size: number; end_size: number; block_size: number; padding_token: number; use_inputs_at_offsets: boolean; } interface FeaturesExtractionExampleMetadata { core_content_size: number; left_ws_num: number; right_ws_num: number; } interface ExampleModelFeatures { beg: number[]; mid: number[]; end: number[]; offset_0x8000_0x8007: number[]; offset_0x8800_0x8807: number[]; offset_0x9000_0x9007: number[]; offset_0x9800_0x9807: number[]; } type FeaturesExtractionExamples = FeaturesExtractionExample[]; function parseGzippedFeaturesExtractionExamples(): FeaturesExtractionExamples { const parsedData = utils.parseGzippedJSON( "../tests_data/reference/features_extraction_examples.json.gz", ); const featuresExtractionExamples = parsedData as FeaturesExtractionExamples; for (const example of featuresExtractionExamples) { if ( example.features.beg.length != example.args.beg_size || example.features.end.length != example.args.end_size ) { const error_msg = `Error parsing: ${JSON.stringify(example)}`; throw new Error(error_msg); } } return featuresExtractionExamples; } ================================================ FILE: js/test/inference-vs-reference.test.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { beforeAll, afterAll, describe, expect, it } from "@jest/globals"; import * as fs from "fs"; import { mkdtemp, rm, writeFile } from "fs/promises"; import path from "path"; import * as os from "os"; import { MagikaNode as Magika } from "../magika-node"; import { ContentTypeLabel } from "../src/content-type-label"; import { OverwriteReason } from "../src/overwrite-reason"; import { PredictionMode } from "../src/prediction-mode"; import { Status } from "../src/status"; import * as utils from "./utils"; const EXAMPLES_BY_PATH_INFO: Array<[string, ExampleByPath]> = (() => { const result: Array<[string, ExampleByPath]> = []; for (const example of parseGzippedExamplesByPath()) { result.push([example.path, example]); } return result; })(); const EXAMPLES_BY_CONTENT: ExamplesByContent = [ ...parseGzippedExamplesByContent(), ]; describe("Magika -- inference vs. reference", () => { let magika: Magika; const repoRootDir = "../"; let workdir = ""; beforeAll(async () => { magika = await Magika.create(); workdir = await mkdtemp(path.join(os.tmpdir(), "magika-")); }); afterAll(async () => { // Make sure we would only delete a tmp dir. if (workdir && workdir.startsWith("/tmp/")) { await rm(workdir, { recursive: true, force: true }); } }); it.each(EXAMPLES_BY_PATH_INFO)( 'check inference vs. reference - examples_by_path for "%s"', async (examplePath, exampleByPath) => { if (exampleByPath.prediction_mode != PredictionMode.HIGH_CONFIDENCE) { // We only support HIGH_CONFIDENCE mode for now. return; } const fileBytes = fs.readFileSync(repoRootDir + exampleByPath.path); let tempFilePath = path.join(workdir, "file.bin"); await writeFile(tempFilePath, fileBytes); const result = await magika.identifyBytes(fileBytes); const resultByStream = await magika.identifyStream( fs.createReadStream(tempFilePath), fileBytes.length, ); expect(result).toStrictEqual(resultByStream); expect(result.path).toBe("-"); expect(result.status).toBe(exampleByPath.status); expect(result.prediction.dl.label).toBe(exampleByPath.prediction?.dl); expect(result.prediction.output.label).toBe( exampleByPath.prediction?.output, ); expect(result.prediction.score).toBeCloseTo( exampleByPath.prediction!.score, ); expect(result.prediction.overwrite_reason).toBe( exampleByPath.prediction?.overwrite_reason, ); }, ); it.each(EXAMPLES_BY_CONTENT)( "check inference vs. reference - examples_by_content", async (exampleByContent) => { if (exampleByContent.prediction_mode != PredictionMode.HIGH_CONFIDENCE) { // We only support HIGH_CONFIDENCE mode for now. return; } const fileBytes = Buffer.from(exampleByContent.content_base64, "base64"); let tempFilePath = path.join(workdir, "fileBytes.bin"); await writeFile(tempFilePath, fileBytes); const result = await magika.identifyBytes(fileBytes); const resultByStream = await magika.identifyStream( fs.createReadStream(tempFilePath), fileBytes.length, ); expect(result).toStrictEqual(resultByStream); expect(result.status).toBe(exampleByContent.status); expect(result.prediction.score).toBeCloseTo( exampleByContent.prediction!.score, 1, ); expect(result.prediction.dl.label).toBe(exampleByContent.prediction?.dl); expect(result.prediction.output.label).toBe( exampleByContent.prediction?.output, ); expect(result.prediction.overwrite_reason).toBe( exampleByContent.prediction?.overwrite_reason, ); }, ); }); interface Prediction { dl: ContentTypeLabel; output: ContentTypeLabel; score: number; // Python float maps to TypeScript number overwrite_reason: OverwriteReason; // Keep snake_case to match JSON } interface ExampleByPath { prediction_mode: PredictionMode; path: string; status: Status; prediction: Prediction | null; } type ExamplesByPath = ExampleByPath[]; interface ExampleByContent { prediction_mode: PredictionMode; content_base64: string; status: Status; prediction: Prediction | null; } type ExamplesByContent = ExampleByContent[]; function parseGzippedExamplesByPath(): ExamplesByPath { const parsedData = utils.parseGzippedJSON( "../tests_data/reference/standard_v3_3-inference_examples_by_path.json.gz", ); const examplesByPath = parsedData as ExamplesByPath; for (const exampleByPath of examplesByPath) { if ( !validatePredictionMode(exampleByPath.prediction_mode) || !validatePrediction(exampleByPath.prediction ?? undefined) ) { const error_msg = `Error parsing: ${JSON.stringify(exampleByPath)}`; throw new Error(error_msg); } } return examplesByPath; } function parseGzippedExamplesByContent(): ExamplesByContent { const parsedData = utils.parseGzippedJSON( "../tests_data/reference/standard_v3_3-inference_examples_by_content.json.gz", ); const examplesByContent = parsedData as ExamplesByContent; for (const exampleByContent of examplesByContent) { if ( !validatePredictionMode(exampleByContent.prediction_mode) || !validatePrediction(exampleByContent.prediction ?? undefined) ) { const error_msg = `Error parsing: ${JSON.stringify(exampleByContent)}`; throw new Error(error_msg); } } return examplesByContent; } function validatePredictionMode(prediction_mode: PredictionMode): boolean { return Object.values(PredictionMode).includes(prediction_mode); } function validatePrediction(prediction?: Prediction): boolean { if (prediction === undefined) { return true; } if (!Object.values(ContentTypeLabel).includes(prediction.dl)) { return false; } if (!Object.values(ContentTypeLabel).includes(prediction.output)) { return false; } if (!Object.values(OverwriteReason).includes(prediction.overwrite_reason)) { return false; } return true; } ================================================ FILE: js/test/magika-cli.test.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { describe, expect, it } from "@jest/globals"; import { spawn } from "child_process"; import path from "path"; describe("magika-cli.ts CLI Tests", () => { const scriptPath = path.resolve(__dirname, "../dist/mjs/magika-cli.js"); const nodeExecutable = process.execPath; async function executeCli( args: string[] = [], ): Promise<{ stdout: string; stderr: string; exitCode: number | null }> { return new Promise((resolve, reject) => { const process = spawn(nodeExecutable, [scriptPath, ...args]); let stdout = ""; let stderr = ""; process.stdout.on("data", (data) => { stdout += data.toString(); }); process.stderr.on("data", (data) => { stderr += data.toString(); }); process.on("close", (code) => { resolve({ stdout, stderr, exitCode: code }); }); process.on("error", (err) => { reject(err); }); }); } it("should display help information when no arguments are provided", async () => { const { stdout, stderr, exitCode } = await executeCli(); expect(exitCode).toBe(1); expect(stdout).toContain("Usage: "); expect(stdout).toContain("Options:"); expect(stderr).toContain("error: missing required argument"); // Check that the help is printed only once. const usageOccurrences = (stdout.match(/Usage:/g) || []).length; expect(usageOccurrences).toBe(1); const optionsOccurrences = (stdout.match(/Options:/g) || []).length; expect(optionsOccurrences).toBe(1); }); it("should display help information when --help is passed", async () => { const { stdout, stderr, exitCode } = await executeCli(["--help"]); expect(exitCode).toBe(0); expect(stdout).toContain("Usage: "); expect(stdout).toContain("Options:"); }); it("should process (by path) a specific file and output the expected result", async () => { const filePath = "../README.md"; const expectedLabel = "markdown"; const { stdout, exitCode } = await executeCli([filePath]); expect(exitCode).toBe(0); expect(stdout).toContain(filePath); expect(stdout).toContain(expectedLabel); }); it("should process (by stream) a specific file and output the expected result", async () => { const filePath = "../README.md"; const expectedLabel = "markdown"; const { stdout, exitCode } = await executeCli(["--by-stream", filePath]); expect(exitCode).toBe(0); expect(stdout).toContain(filePath); expect(stdout).toContain(expectedLabel); }); }); ================================================ FILE: js/test/magika.test.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { TfnMock } from "./tfnHook"; // TfnMock must be imported first; leave this line here to avoid imports // sorting. import { afterAll, beforeAll, beforeEach, describe, expect, it, jest, } from "@jest/globals"; import * as fc from "fast-check"; import * as fs from "fs"; import { Dirent, readdirSync } from "fs"; import { mkdtemp, readFile, rm } from "fs/promises"; import * as os from "os"; import * as path from "path"; import { Readable } from "stream"; import { finished } from "stream/promises"; import { ReadableStream } from "stream/web"; import { MagikaNode as Magika } from "../magika-node"; import { ContentTypeLabel } from "../src/content-type-label"; /** * Returns a list of test files and their correct labels. * * @param directory the directory to recursively scan for test files. * @returns the list of file paths and labels. */ const getTestFilesWithLabels = ( directory: string, ): Array<[string, string, Dirent]> => readdirSync(directory, { recursive: true, withFileTypes: true }) .filter((dirent) => dirent.isFile()) .map<[string, string, Dirent]>((dirent) => { const label = dirent.parentPath.split("/").pop() || "UNKNOWN"; const filePath = path.join(dirent.parentPath, dirent.name); return [label, filePath, dirent]; }); /** * Array of all our test files and their labels. */ const BASIC_TEST_FILES: Array<[string, string, Dirent]> = [ ...getTestFilesWithLabels("../tests_data/basic"), ]; describe("Magika class", () => { const workdir = { root: "", model_config: "", model: "", }; beforeAll(async () => { workdir.root = await mkdtemp(path.join(os.tmpdir(), "magika-")); workdir.model_config = path.join(workdir.root, "config.json"); workdir.model = path.join(workdir.root, "model.json"); const model_config = Readable.fromWeb( (await fetch(Magika.MODEL_CONFIG_URL)).body as ReadableStream, ); const model = Readable.fromWeb( (await fetch(Magika.MODEL_URL)).body as ReadableStream, ); await Promise.all([ await finished( model_config.pipe(fs.createWriteStream(workdir.model_config)), ), await finished(model.pipe(fs.createWriteStream(workdir.model))), ]); const weights = JSON.parse((await readFile(workdir.model)).toString()) .weightsManifest.filter( (weights: { paths?: string[] }) => weights?.paths != null, ) .map((weights: { paths: string[] }) => { return weights.paths.map((path) => { return { name: path, url: Magika.MODEL_URL.replace(/model\.json$/, path), }; }); }) .flat(); await Promise.all( weights.map(async (weight: { name: string; url: string }) => { const model_config = Readable.fromWeb( (await fetch(weight.url)).body as ReadableStream, ); await finished( model_config.pipe( fs.createWriteStream(path.join(workdir.root, weight.name)), ), ); }), ); }); beforeEach(async () => { TfnMock.reset(); }); afterAll(async () => { if (workdir.root) { await rm(workdir.root, { recursive: true, force: true }); } }); it("should load default model from url", async () => { const magika = await Magika.create(); expect(magika.model.model).toBeDefined(); expect(magika.model_config.target_labels_space.length).toBeGreaterThan(0); expect(Object.values(TfnMock.accessed).reduce((a, b) => a + b, 0)).toBe(0); }); it("should load model from file path", async () => { const magika = await Magika.create({ modelConfigPath: workdir.model_config, modelPath: workdir.model, }); expect(magika.model.model).toBeDefined(); expect(magika.model_config.target_labels_space.length).toBeGreaterThan(0); expect(TfnMock.accessed.io).toBe(1); expect(Object.values(TfnMock.accessed).reduce((a, b) => a + b, 0)).toBe(1); }); it("scores should be in the expected range", async () => { const magika = await Magika.create(); fc.assert( fc.asyncProperty( fc.array(fc.integer({ min: 0, max: 255 }), { minLength: 0, maxLength: 10, }), async (bytesContent) => { const result = await magika.identifyBytes( Uint8Array.from(bytesContent), ); expect(result.prediction.score).toBeGreaterThanOrEqual(0); expect(result.prediction.score).toBeLessThanOrEqual(1); }, ), ); }); it.each(BASIC_TEST_FILES)( 'by_stream vs by_byte should return the same (correct) features/label for "%s" "%s"', async (label, testPath, testFile) => { const magika = await Magika.create({ modelConfigPath: workdir.model_config, modelPath: workdir.model, }); const featuresMock = jest.spyOn(magika.model, "predict"); // Do predictions by stream and by path. const filePath = path.join(testFile.parentPath, testFile.name); const streamResult = await magika.identifyStream( fs.createReadStream(filePath), (await fs.promises.stat(filePath)).size, ); const fileBytes = await fs.promises.readFile(filePath); const byteResult = await magika.identifyBytes(fileBytes); // Compare the results; they should match between them expect(streamResult).toStrictEqual(byteResult); if (streamResult.prediction.dl.label != ContentTypeLabel.UNDEFINED) { expect(featuresMock.mock.calls[0][0]).toStrictEqual( featuresMock.mock.calls[1][0], ); } // Check that the predictions make the expectations. expect(streamResult.prediction.output.label).toBe(label); // Check properties on the TfnMock object. expect(TfnMock.accessed.io).toBe(1); expect(Object.values(TfnMock.accessed).reduce((a, b) => a + b, 0)).toBe( 1, ); // The predictions are the same via bytes and via stream, let's just take one. const prediction = byteResult.prediction; expect(prediction).not.toBeUndefined(); expect(prediction.dl).not.toBeUndefined(); expect(prediction.output).not.toBeUndefined(); expect(prediction.score).not.toBeUndefined(); if (prediction.dl.label == ContentTypeLabel.UNDEFINED) { // If dl.label == UNDEFINED, scores_map should not be set. expect(prediction.scores_map).toBeUndefined(); } else { // If dl.label is not UNDEFINED, scores_map should be set. expect(prediction.scores_map).not.toBeUndefined(); // Check that the max score and label associated to max score matches // what's returned in the prediction. const scores = Object.values(prediction?.scores_map ?? {}); let curr_max_score = scores[0]; let argmax_idx = 0; for (let i = 1; i < scores.length; i++) { if (scores[i] > curr_max_score) { curr_max_score = scores[i]; argmax_idx = i; } } const predicted_label = magika.model_config.target_labels_space[argmax_idx]; expect(predicted_label).toBe(prediction.dl.label); expect(curr_max_score).toBe(prediction.score); } }, ); it.each(BASIC_TEST_FILES)( 'Magika is agnostic to the format of the input bytes for "%s" "%s"', async (label, testPath, testFile) => { const magika = await Magika.create({ modelConfigPath: workdir.model_config, modelPath: workdir.model, }); const featuresMock = jest.spyOn(magika.model, "predict"); const filePath = path.join(testFile.parentPath, testFile.name); const inputBuffer = await fs.promises.readFile(filePath); const inputUint8 = new Uint8Array(inputBuffer); const resultFromBuffer = await magika.identifyBytes(inputBuffer); const resultFromUint8 = await magika.identifyBytes(inputUint8); expect(resultFromBuffer).toStrictEqual(resultFromUint8); if (resultFromBuffer.prediction.dl.label != ContentTypeLabel.UNDEFINED) { expect(featuresMock.mock.calls[0][0]).toStrictEqual( featuresMock.mock.calls[1][0], ); } }, ); }); ================================================ FILE: js/test/tfnHook.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { jest } from "@jest/globals"; export class TfnMock { static accessed: Record = {}; static mock = jest.mock( "@tensorflow/tfjs-node", () => { const hook = {}; const original = jest.requireActual("@tensorflow/tfjs-node") as any; Object.keys(original as object).forEach((key) => { TfnMock.accessed[key] = 0; Object.defineProperty(hook, key, { configurable: true, // allow spyOn to work enumerable: true, // so the key shows up get(): any { TfnMock.accessed[key] = (TfnMock.accessed[key] || 0) + 1; return original[key]; }, }); }); return hook; }, { virtual: true }, ); static reset() { for (const i in TfnMock.accessed) { TfnMock.accessed[i] = 0; } } } ================================================ FILE: js/test/utils.ts ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import * as fs from "fs"; import * as zlib from "zlib"; export function parseGzippedJSON(filePath: string): Array { const gzippedBuffer = fs.readFileSync(filePath); const jsonBuffer = zlib.gunzipSync(gzippedBuffer); const jsonString = jsonBuffer.toString("utf-8"); const parsedData = JSON.parse(jsonString); if (!Array.isArray(parsedData)) { throw new Error("Parsed JSON is not an array as expected for ExampleList."); } return parsedData as Array; } ================================================ FILE: js/tsconfig.cjs.json ================================================ { "extends": "./tsconfig.json", "compilerOptions": { "module": "commonjs", "moduleResolution": "node", "outDir": "dist/cjs", "target": "es2015" } } ================================================ FILE: js/tsconfig.esm.json ================================================ { "extends": "./tsconfig.json", "compilerOptions": { "module": "nodenext", "moduleResolution": "nodenext", "outDir": "dist/mjs", "target": "esnext" } } ================================================ FILE: js/tsconfig.json ================================================ { "compilerOptions": { "allowSyntheticDefaultImports": true, "baseUrl": ".", "declaration": true, "downlevelIteration": true, "esModuleInterop": true, "inlineSourceMap": false, "lib": ["esnext"], "listEmittedFiles": false, "listFiles": false, "noFallthroughCasesInSwitch": true, "pretty": true, "resolveJsonModule": true, "rootDir": ".", "skipLibCheck": true, "strict": true, "traceResolution": false, "types": ["node", "jest"] }, "compileOnSave": false, "exclude": ["node_modules", "dist"], "include": ["src", "magika-cli.ts", "magika.ts", "magika-node.ts"] } ================================================ FILE: python/.gitignore ================================================ *.pyc __pycache__/ **/.ruff_cache/ .ipynb_checkpoints venv/ .env *.swp *.h5 *.egg-info dist/* *.pickle .s.yml ================================================ FILE: python/.python-version ================================================ 3.12 ================================================ FILE: python/CHANGELOG.md ================================================ # Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). Note that for version number starting with a `0`, i.e., `0.x.y`, a bump of `x` should be considered as a major (and thus potentially breaking) change. See semver guidelines for more details about this. ## [1.0.2] - 2026-02-25 - Mark python 3.14 as supported. - Remove direct dependency on numpy. - Remove dependency on python-dotenv (note: .env files are no longer loaded automatically). - Remove onnxruntime<=1.20.1 Windows pin. ## [1.0.1] - 2025-10-31 - Mark end of experimental phase. No changes from last version. ## [0.6.3] - 2025-10-30 - Pin onnxruntime on Windows ([#1099](https://github.com/google/magika/pull/1099)). - Add docstrings for all relevant modules, classes, and methods. - Improved READMEs and overall [documentation](https://securityresearch.google/magika/cli-and-bindings/python/). ## [0.6.2] - 2025-05-02 - Mark python 3.13 as supported. - New model `standard_v3_3` model, with better support for TypeScript and non-ascii characters in textual files. See [models' CHANGELOG](../assets/models/CHANGELOG.md) for more information. - `identify_stream()` now restores the stream's original position after reading from it, preventing side effects on subsequent stream operations. ([#1020](https://github.com/google/magika/pull/1020)) - Add `asdict()` utility method to `MagikaResult`. - Set `prediction.overwrite_reason` to `Overwrite.NONE` if `output.label` is the same as `dl.label`. ([#1023](https://github.com/google/magika/pull/1023)) - Bugfix: limit the number of bytes we read in case of an input with just many whitespaces. ([#1015](https://github.com/google/magika/pull/1015)) - Bugfix: do not alter warnings' simplefilter as this has visible side effects for other modules. ([#1017](https://github.com/google/magika/pull/1017)) - Bugfix: magika's python client now properly warns for low-confidence predictions. - Bugfix: update Magika's StrEnum string representation to be compatible with standard library. ## [0.6.1] - 2025-03-19 ### Overview Magika v0.6.1 is a significant update featuring a new model with 2x supported content types, a new command line client in Rust, performance improvements, API enhancements, and a few breaking changes. This changelog entry rolls up all changes from v0.5.1, the last stable release. > [!IMPORTANT] > There are a few breaking changes! After reading about the new key features and improvements, we suggest to consult the migration guide below and the [updated documentation](./README.md). ### Key Features and Improvements - **New deep learning model:** We introduce a new model, `standard_v3_2`, which supports 2x content types (200+ in total, see full list [here](../assets/models/standard_v3_2/README.md)), has a similar ~99% average accuracy, and is ~20% faster, with an inference speed of about ~2ms on CPUs (YMMV depending on your testing setup). See [models' CHANGELOG](../assets/models/CHANGELOG.md) for more information. - **New command line client, written in Rust:** We developed a new command line client, written in Rust, which is not affected by the one-time bootstrap overhead caused by the python's interpreter itself. This new client is packaged, pre-compiled, into the `magika` python package. This new client replaces the old client written in Python (but the old Python one is still available as a fallback for those platforms for which we don't have precompiled rust binaries). - **New stream-based identification:** Added `identify_stream(stream: typing.BinaryIO)` API to infer content types from open binary streams. ([#970](https://github.com/google/magika/issues/970)) - **Improved path handling:** `identify_path` and `identify_paths` now accept `Union[str, os.PathLike]` objects. You no longer need to explicitly use `pathlib.Path`. ([#935](https://github.com/google/magika/issues/935)) - **Improved python API:** The new Python APIs offer a number of improvements, including: the inference APIs now return a `MagikaResult`, which is a [`absl::StatusOr`](https://abseil.io/docs/cpp/guides/status)-like object that wraps `MagikaPrediction`, with a clear separation between valid predictions and error situations; the output content types (`label`) are not just `str` anymore, but of type `ContentTypeLabel`, making integrations more robust (`ContentTypeLabel` extends `StrEnum`: thus, they are not just `str`, but you can treat them as such). The `MagikaPrediction` object now has additional `is_text` and `extensions` fields (in addition to the existing `label`, `mime_type`, `group`, and `description`). - **New debugging APIs**: Added new APIs to ease debugging and introspection, such as `get_output_content_types()`, `get_model_content_types()`, `get_module_version()`, and `get_model_name()`. ### Breaking Changes and Migration Guide This release introduces several breaking changes. Please review this guide carefully to update your code: 1. **New `identify_*` API output format:** The inference Python APIs now return a `MagikaResult` object, which is similar to `absl::StatusOr`; This provides a cleaner way to handle errors. `dl.ct_label` and `output.ct_label` are renamed to `dl.label` and `output.label`. `label`s are now of type `ContentTypeLabel`, which extends `StrEnum` (thus, they are not just `str`, but you can treat them as such). The `score` field is now at the top level, alongside `dl` and `output`. The `magic` field has been removed as it was often either incorrect or redundant; use `description` instead. - **Before (v0.5.x and earlier):** ```python import magika m = magika.Magika() result = m.identify_path("my_file.py") print(result.output.ct_label) # Assumed success ``` - **After (v0.6.1):** ```python import magika m = magika.Magika() result = m.identify_path("my_file.py") if result.ok(): print(result.output.label) else: print(f"Error: {result.status}") ``` 2. **CLI Output Format Change (v0.6.0):** The JSON output format of the CLI has changed. Those changes are analogous to the changes to the Python APIs. The `score` field is now at the top level, alongside `dl` and `output`, and is no longer nested within `dl` or `output`. The output also includes `is_text` and `extensions` fields. The `magic` metadata has been removed as it was often either incorrect or redundant; use `description` instead. Moreover, similarly to what happens under the hood with the `StatusOr` pattern, `result.status` indicates whether the prediction was successful, and the prediction results are available under the `result.value` key. - **Before (v0.5.x and earlier):** (Illustrative example - adapt to your specific output) ```json { "path": "code.py", "dl": { "ct_label": "python", "score": 0.9940916895866394, "group": "code", "mime_type": "text/x-python", "magic": "Python script", "description": "Python source" }, "output": { "ct_label": "python", "score": 0.9940916895866394, "group": "code", "mime_type": "text/x-python", "magic": "Python script", "description": "Python source" } } ``` - **After (v0.6.1):** ```json { "path": "code.py", "result": { "status": "ok", "value": { "dl": { "description": "Python source", "extensions": ["py", "pyi"], "group": "code", "is_text": true, "label": "python", "mime_type": "text/x-python" }, "output": { "description": "Python source", "extensions": ["py", "pyi"], "group": "code", "is_text": true, "label": "python", "mime_type": "text/x-python" }, "score": 0.9890000224113464 } } } ``` 3. **`dl.label == ContentTypeLabel.UNDEFINED` when the model is not used:** There are situations in which the deep learning model is not used, for example when the file is too small or empty. In these cases, `dl.label` is now set to `ContentTypeLabel.UNDEFINED` instead of having the full `dl` block being set to `None`. - **Before (v0.5.x and earlier):** ```python # ... (assuming successful result) if prediction.dl is not None: print(prediction.dl.ct_label) ``` - **After (v0.6.1):** ```python # ... (assuming successful result) if prediction.dl.label != magika.ContentTypeLabel.UNDEFINED: print(prediction.dl.label) ``` 4. **Expanded List of Content Types:** The model now supports over 200 content types. - **Migration:** Review the [updated list of supported content types](../assets/models/standard_v3_2/README.md) and adjust any code that relies on specific content type labels returned by previous versions. Labels have _not_ changed, but a file previously detected as `javascript` may not be detected as `typescript`. Consider using `get_output_content_types()` to dynamically retrieve the supported labels. 5. **Pure Python Wheel and Rust Client Fallback:** If you are installing Magika on a platform _without_ pre-built wheels (e.g., Windows on ARM), you will automatically get the pure-python wheel. In this case, the package does _not_ include the Rust binary client, but it does include the old python client as fallback; you can use such old python client with `$ magika-python-client`. ### Full Changelog For a detailed list of all changes, including those from the -rc releases, please refer to the individual changelog entries for each release candidate: - [0.6.1-rc3](https://github.com/google/magika/blob/python-v0.6.1-rc3/python/CHANGELOG.md#061-rc3---2025-03-17) - [0.6.1-rc2](https://github.com/google/magika/blob/python-v0.6.1-rc2/python/CHANGELOG.md#061-rc2---2025-03-11) - [0.6.1-rc1](https://github.com/google/magika/blob/python-v0.6.1-rc1/python/CHANGELOG.md#061-rc1---2025-02-04) - [0.6.1-rc0](https://github.com/google/magika/blob/python-v0.6.1-rc0/python/CHANGELOG.md#061-rc0---2025-01-23) - [0.6.0-rc3](https://github.com/google/magika/blob/python-v0.6.0-rc3/python/CHANGELOG.md#060-rc3---2024-11-20) - [0.6.0-rc2](https://github.com/google/magika/blob/python-v0.6.0-rc2/python/CHANGELOG.md#060-rc2---2024-11-19) - [0.6.0-rc1](https://github.com/google/magika/blob/python-v0.6.0-rc2/python/CHANGELOG.md#060-rc1---2024-10-07) ## [0.5.1] - 2024-03-06 - Add support for python 3.12. Magika now supports python >=3.8 and <3.13. - Fix bugs for features extraction to cover more corner cases. - Remove MIME types from table of supported content types (Relevant for `--list-output-content-types`; see FAQs for context). - Refactor features extraction around a Seekable abstraction; we now have only one reference implementation. - Start groundwork for v2 of features extraction. - Various clean ups and internal refactors. ## [0.5.0] - 2024-02-15 - New public python APIs: `identify_paths`, `identify_path`, `identify_bytes`. - The APIs now return a `MagikaResult` object. - When the model's prediction has low confidence and we return a generic content type, print anyways (with a disclaimer) the model's best guess. - Updated description for "unknown" to "Unknown binary data". - Magika CLI now defaults to "high-confidence" mode. "default" mode is now called "medium-confidence". - Magika CLI `-p/--output-probability` has been renamed to `-s/--output-score` for consistency. - Default model is now called `standard_v1`. - Major refactoring and clean up. ## [0.4.1] - 2024-02-07 - Various improvements and clean ups. ## [0.4.0] - 2023-12-22 ### Changed - Update model to dense_v4_top_20230910. - Package now contains the model itself. - Support reading from stdin: - `$ cat | magika -` - `$ curl | magika -` - Change how we deal with padding, using 256 instead of 0. This boosts precision. - "symlink" output label has been renamed to "symlinktext" to better reflect its nature. - New `--prediction-mode` CLI option to indicate which confidence is required for the predictions. We support three modes: `best-guess`, `default`, `high-confidence`. - Support for directories and symlinks similarly to `file`. - Adapt `-r` / `--recursive` CLI option to be compatible with the new way magika handles directories. - Add special handling for small files. - Magika does not crash anymore when scanning files with permission issues. It now returns "permission_error". - Do not resolve file paths (i.e., relative paths remain relative). - Add --no-dereference CLI option: by default symlinks are dereferenced. this option makes magika not dereferencing symlinks. This is what `file` does. - Clean up and many bug fixes. ## [0.3.1] - 2023-08-23 ### Changed - Removed warnings when using MIME type and compatibility mode. ## [0.3.0] - 2023-08-23 ### Changed - By default, magika now outputs a human-readable output. - Add `-l` / `--label` CLI option to output a stable, content type label. - JSON/JSONL output now shows all metadata about a given content type. - Add metadata about magic and description for each relevant content type. - Logs are now printed to stderr, not stdout. - Add `--generate-report` CLI option to output a JSON report that can be useful for debugging and reporting feedback. - Be more flexible with the required python version (now we require "^3.8" instead of "^3.8,<3.11") - Show a descriptive error in case magika can't find any file to scan (instead of silently exiting). ## [0.2.2] - 2023-08-11 ### Changed - If the prediction score is higher than a given threshold (0.95), consider it regardless of the per-content-type threshold. - Output format is back being just ``; group is displayed only when showing metadata. - Update metadata of some content types. ## [0.2.1] - 2023-08-10 ### Changed - Several small bug fixes. ## [0.2.0] - 2023-08-09 ### Added - Input files are now processed in multiple small batches, instead of one big batch. - Per-content-type threshold based on the 0.005 quantile for recall. - MIME type and "group" metadata for all content types. - Introduce basic support for compatibility mode. - `-c` / `--compatibility-mode` CLI option to enable compatibility mode. - `--no-colors` CLI option to disable colors. - `-b` / `--batch-size` CLI option to specify the batch size. - `--guess` / `--output-highest-probability` CLI option to output the content type with the highest probability regardless of its probability score. - `--version` CLI option to print Magika's version. ### Changed - Output follows the `::` format. - Probability score is not shown by default; enable with `-p`. - Output is colored according to the file content type's group. - Remove dependency from richlogger, add a much simpler logger. ## [0.1.0] - 2023-07-28 - First release. ================================================ FILE: python/README.md ================================================ # Magika Python Package [![image](https://img.shields.io/pypi/v/magika.svg)](https://pypi.python.org/pypi/magika) [![NPM Version](https://img.shields.io/npm/v/magika)](https://npmjs.com/package/magika) [![image](https://img.shields.io/pypi/l/magika.svg)](https://pypi.python.org/pypi/magika) [![image](https://img.shields.io/pypi/pyversions/magika.svg)](https://pypi.python.org/pypi/magika) [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8706/badge)](https://www.bestpractices.dev/en/projects/8706) ![CodeQL](https://github.com/google/magika/workflows/CodeQL/badge.svg) [![Actions status](https://github.com/google/magika/actions/workflows/python-build-and-release-package.yml/badge.svg)](https://github.com/google/magika/actions) [![PyPI Monthly Downloads](https://static.pepy.tech/badge/magika/month)](https://pepy.tech/projects/magika) [![PyPI Downloads](https://static.pepy.tech/badge/magika)](https://pepy.tech/projects/magika) Magika is a novel AI-powered file type detection tool that relies on the recent advance of deep learning to provide accurate detection. Under the hood, Magika employs a custom, highly optimized model that only weighs about a few MBs, and enables precise file identification within milliseconds, even when running on a single CPU. Magika has been trained and evaluated on a dataset of ~100M samples across 200+ content types (covering both binary and textual file formats), and it achieves an average ~99% accuracy on our test set. Use Magika as a command line client or in your Python code! You can find more information on which content types are supported, extended documentation, and bindings for other languages on Magika's website at [https://securityresearch.google/magika/](https://securityresearch.google/magika/). ## Installing Magika Magika is available as `magika` on [PyPI](https://pypi.org/project/magika): To install the most recent stable version: ```shell pip install magika ``` If you intend to use Magika only as a command line, you may want to use `pipx install magika` instead. If you want to test out the latest release candidate, you can install it with `pip install --pre magika`. ## Using Magika as a command-line tool > Beginning with version `0.6.0`, the magika Python package includes a pre-compiled Rust-based command-line tool, replacing the previous Python version. This binary is distributed as platform-specific wheels for most common architectures. For unsupported platforms, a pure-Python wheel is also available, providing the legacy Python client as a fallback. ```shell $ cd tests_data/basic && magika -r * asm/code.asm: Assembly (code) batch/simple.bat: DOS batch file (code) c/code.c: C source (code) css/code.css: CSS source (code) csv/magika_test.csv: CSV document (code) dockerfile/Dockerfile: Dockerfile (code) docx/doc.docx: Microsoft Word 2007+ document (document) epub/doc.epub: EPUB document (document) epub/magika_test.epub: EPUB document (document) flac/test.flac: FLAC audio bitstream data (audio) handlebars/example.handlebars: Handlebars source (code) html/doc.html: HTML document (code) ini/doc.ini: INI configuration file (text) javascript/code.js: JavaScript source (code) jinja/example.j2: Jinja template (code) jpeg/magika_test.jpg: JPEG image data (image) json/doc.json: JSON document (code) latex/sample.tex: LaTeX document (text) makefile/simple.Makefile: Makefile source (code) markdown/README.md: Markdown document (text) [...] ``` ```shell $ magika ./tests_data/basic/python/code.py --json [ { "path": "./tests_data/basic/python/code.py", "result": { "status": "ok", "value": { "dl": { "description": "Python source", "extensions": [ "py", "pyi" ], "group": "code", "is_text": true, "label": "python", "mime_type": "text/x-python" }, "output": { "description": "Python source", "extensions": [ "py", "pyi" ], "group": "code", "is_text": true, "label": "python", "mime_type": "text/x-python" }, "score": 0.753000020980835 } } } ] ``` ```shell $ cat doc.ini | magika - -: INI configuration file (text) ``` ```help $ magika --help Determines the content type of files with deep-learning Usage: magika [OPTIONS] [PATH]... Arguments: [PATH]... List of paths to the files to analyze. Use a dash (-) to read from standard input (can only be used once). Options: -r, --recursive Identifies files within directories instead of identifying the directory itself --no-dereference Identifies symbolic links as is instead of identifying their content by following them --colors Prints with colors regardless of terminal support --no-colors Prints without colors regardless of terminal support -s, --output-score Prints the prediction score in addition to the content type -i, --mime-type Prints the MIME type instead of the content type description -l, --label Prints a simple label instead of the content type description --json Prints in JSON format --jsonl Prints in JSONL format --format Prints using a custom format (use --help for details). The following placeholders are supported: %p The file path %l The unique label identifying the content type %d The description of the content type %g The group of the content type %m The MIME type of the content type %e Possible file extensions for the content type %s The score of the content type for the file %S The score of the content type for the file in percent %b The model output if overruled (empty otherwise) %% A literal % -h, --help Print help (see a summary with '-h') -V, --version Print version ``` Check the [Rust CLI docs](https://securityresearch.google/magika/cli-and-bindings/cli/) for more information. ## Using Magika as a Python module Here is a few examples on how to use the `Magika` Python module: ```python >>> from magika import Magika >>> m = Magika() >>> res = m.identify_bytes(b'function log(msg) {console.log(msg);}') >>> print(res.output.label) javascript ``` ```python >>> from magika import Magika >>> m = Magika() >>> res = m.identify_path('./tests_data/basic/ini/doc.ini') >>> print(res.output.label) ini ``` ```python >>> from magika import Magika >>> m = Magika() >>> with open('./tests_data/basic/ini/doc.ini', 'rb') as f: >>> res = m.identify_stream(f) >>> print(res.output.label) ini ``` ## Core Concepts To get the most out of Magika, it's worth learning about its core concepts. You can read about how Magika works, the models, the supported content types, the prediction modes, and notes on how to understand all parts of the output in the [Core Concepts](https://securityresearch.google/magika/core-concepts) section of Magika's website. ## API documentation The Python API is documented in the [Python bindings](https://securityresearch.google/magika/cli-and-bindings/python/) section of Magika's website, and via docstrings in the `Magika` module source code. ## Research Paper and Citation We describe how we developed Magika and the choices we made in our research paper, which was accepted at the International Conference on Software Engineering (ICSE) 2025. You can find a copy of the paper [here](https://securityresearch.google/magika/2025_icse_magika.pdf). If you use this software for your research, please cite it as: ```bibtex @InProceedings{fratantonio25:magika, author = {Yanick Fratantonio and Luca Invernizzi and Loua Farah and Kurt Thomas and Marina Zhang and Ange Albertini and Francois Galilee and Giancarlo Metitieri and Julien Cretin and Alexandre Petit-Bianco and David Tao and Elie Bursztein}, title = {{Magika: AI-Powered Content-Type Detection}}, booktitle = {Proceedings of the International Conference on Software Engineering (ICSE)}, month = {April}, year = {2025} } ``` ================================================ FILE: python/mypy.ini ================================================ [mypy] show_error_codes = True follow_imports = silent local_partial_types = true strict_equality = true no_implicit_optional = true warn_incomplete_stub = true warn_redundant_casts = true warn_unused_configs = true warn_unused_ignores = true enable_error_code = ignore-without-code, redundant-self, truthy-iterable disable_error_code = annotation-unchecked, import-not-found, import-untyped, type-arg, no-any-unimported extra_checks = false check_untyped_defs = true disallow_incomplete_defs = true disallow_subclassing_any = true disallow_untyped_calls = true disallow_untyped_decorators = true disallow_untyped_defs = true warn_return_any = true warn_unreachable = true allow_redefinition = false strict_optional = true [mypy-magika.*] ignore_missing_imports = true no_implicit_reexport = true disallow_untyped_calls = true disallow_any_unimported = true disallow_untyped_decorators = true strict = true enable_error_code = ignore-without-code, redundant-self, truthy-iterable, possibly-undefined, truthy-bool, truthy-iterable, unused-ignore, mutable-override [mypy-magika.strenum.*] ignore_errors = True [mypy-tests.*] disallow_untyped_defs = false disallow_untyped_calls = false disallow_untyped_decorators = false ================================================ FILE: python/pyproject.toml ================================================ [project] name = "magika" description = "A tool to determine the content type of a file with deep learning" authors = [ {name = "Magika Developers", email = "magika-dev@google.com"}, ] readme = "README.md" license = {"text" = "Apache-2.0"} requires-python = ">=3.8" keywords = ["content type detection", "machine learning"] classifiers = [ "Development Status :: 5 - Production/Stable", "Environment :: Console", "License :: OSI Approved :: Apache Software License", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Intended Audience :: System Administrators", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Operating System :: MacOS", "Operating System :: Microsoft :: Windows", "Operating System :: Unix", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Security", "Topic :: Software Development", "Typing :: Typed", ] version = "1.0.2" dependencies = [ "click>=8.1.7", # Python 3.14+: Min version 1.24.1 "onnxruntime>=1.24.1 ; python_version >= '3.14'", # Python 3.13: Min version 1.21.0 "onnxruntime>=1.21.0 ; python_version == '3.13'", # Python 3.11 - 3.12: Min version 1.17.0 "onnxruntime>=1.17.0 ; python_version >= '3.11' and python_version < '3.13'", # Python 3.10: Support dropped in 1.24.1 (use latest compatible) "onnxruntime>=1.17.0, <1.24.1 ; python_version == '3.10'", # Python 3.9: Support dropped in 1.20.0 "onnxruntime>=1.17.0, <1.20.0 ; python_version <= '3.9'", ] [project.urls] Homepage = "https://github.com/google/magika" Documentation = "https://github.com/google/magika/blob/main/python/README.md" Repository = "https://github.com/google/magika/" Issues = "https://github.com/google/magika/issues" Changelog = "https://github.com/google/magika/blob/main/python/CHANGELOG.md" [dependency-groups] dev = [ "mypy>=1.11.2", "ipython>=8.12.3", "pytest>=8.3.2", "ruff>=0.14.0", "tomli-w>=1.0.0", "tomli>=2.0.1", "tqdm>=4.67.1", "dacite>=1.9.2", "requests>=2.32.4", ] [build-system] requires = ["maturin>=1.12.2"] build-backend = "maturin" [[tool.uv.index]] url = "https://pypi.org/simple" default = true [tool.maturin] python-source = "src" module-name = "magika" bindings = "bin" manifest-path = "../rust/cli/Cargo.toml" locked = true [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or # McCabe complexity (`C901`) by default. select = ["D", "E4", "E7", "E9", "F", "I001"] ignore = ["D105"] [tool.ruff.lint.per-file-ignores] "scripts/*" = ["D"] "tests/*" = ["D"] [tool.ruff.lint.pydocstyle] convention = "google" [tool.ruff.format] docstring-code-format = true ================================================ FILE: python/pytest.ini ================================================ [pytest] log_cli = 1 log_level = WARNING markers = smoketest slow ================================================ FILE: python/scripts/check_changelog.sh ================================================ #!/bin/bash # Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -euo pipefail CHANGED_FILES=$(git diff --name-only "origin/${1:-main}"...HEAD) echo "Checking python/* for undocumented changes..." if echo "$CHANGED_FILES" | grep -qE '^python/.*$'; then if ! echo "$CHANGED_FILES" | grep -qE '^python/CHANGELOG\.md$'; then echo "::warning title=Changelog Missing::Some changes in the Python package are not documented in python/CHANGELOG.md" fi fi ================================================ FILE: python/scripts/check_copyright.py ================================================ # Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import re import subprocess import sys from pathlib import Path import click EXTENSIONS = (".py", ".sh", ".ts", ".rs") RELEVANT_DIRS = ("python/", "js/", "rust/") EXCLUDE_DIRS = ("js/simple_examples",) COPYRIGHT_PATTERN = re.compile(r"Copyright", re.IGNORECASE) @click.command() def main(): """ Check for missing copyright headers in the target files. The command performs the following steps: - Retrieves the list of git-tracked files meeting the criteria. - Checks each file for a valid copyright header. - Prints any files that are missing the header. - Exits with status code 1 if any file is missing the header; otherwise exits successfully. """ error_files = [] for file_path in get_relevant_files_paths(): if not has_copyright(file_path): error_files.append(file_path) if error_files: click.secho("Missing copyright in:", fg="red", bold=True) for file in error_files: click.echo(f"- {file}") sys.exit(1) else: click.secho("All files have valid copyright.", fg="green") def get_relevant_files_paths() -> list[Path]: """Finds relevant, tracked files using Git. Filters the output of `git ls-files` based on three criteria defined in global constants: - File must have an extension in `EXTENSIONS`. - File path must be within a directory in `DIRECTORIES`. - File path must NOT contain the string in `EXCLUDED_PATH`. """ repo_root_dir = Path(__file__).parent.parent.parent.resolve() assert (repo_root_dir / ".git").is_dir() paths = [] try: result = subprocess.run( ["git", "ls-files", str(repo_root_dir)], capture_output=True, text=True, check=True, cwd=str(repo_root_dir), ) for rel_path_str in result.stdout.strip().splitlines(): path = repo_root_dir / rel_path_str if ( path.is_file() and path.stat().st_size > 0 and path.suffix in EXTENSIONS and rel_path_str.startswith(RELEVANT_DIRS) and not rel_path_str.startswith(EXCLUDE_DIRS) ): paths.append(path) except subprocess.CalledProcessError as e: click.secho(f"Git command failed: {e}", fg="red", bold=True) sys.exit(2) return paths def has_copyright(path: Path) -> bool: """Checks if a file contains a copyright notice within the first N lines. Returns True if found, False otherwise. """ with path.open("r", encoding="utf-8") as f: for _ in range(5): line = f.readline() if COPYRIGHT_PATTERN.search(line): return True return False if __name__ == "__main__": main() ================================================ FILE: python/scripts/check_documentation.py ================================================ # Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Performs a number of sanity checks on Magika's documentation.""" from __future__ import annotations import re import sys from dataclasses import dataclass from pathlib import Path import click import requests REPO_ROOT_DIR = Path(__file__).parent.parent.parent assert REPO_ROOT_DIR.is_dir() and (REPO_ROOT_DIR / ".git").is_dir() IGNORE_PREFIX_PATTERNS = [ ".mypy_cache", ".pytest_cache", ".ruff_cache", "python/.venv", "python/dist", "website/node_modules", "website/dist", "js/node_modules", "js/dist", "website-ng/node_modules", "website-ng/dist", ] @click.command() @click.option("-v", "--verbose", is_flag=True) def main(verbose: bool) -> None: with_errors = False success = check_versions_are_up_to_date() if not success: with_errors = True success = check_markdown_links(verbose) if not success: with_errors = True if with_errors: print("There was at least one error.") sys.exit(1) print("Everything looks good.") def check_versions_are_up_to_date() -> bool: """Checks that the mentioned latest versions and models are up to date. Returns True if everything is good, False otherwise.""" # Actual last versions and models rust_cli_latest_stable_version = get_max_stable_version_for_crate("magika-cli") rust_lib_latest_stable_version = get_max_stable_version_for_crate("magika") rust_default_model_name = get_rust_default_model_name() python_latest_stable_version = get_python_latest_stable_version() python_default_model_name = get_python_default_model_name() javascript_latest_stable_version = get_latest_version_for_npm_package("magika") javascript_default_model_name = get_javascript_default_model_name() demo_model_name = get_demo_model_name() expected_table = [ (rust_cli_latest_stable_version, rust_default_model_name), (python_latest_stable_version, python_default_model_name), (javascript_latest_stable_version, javascript_default_model_name), (rust_lib_latest_stable_version, rust_default_model_name), ("-", demo_model_name), ("-", "-"), ] # Extract documented last versions and models bindings_overview_path = ( REPO_ROOT_DIR / "website-ng" / "src" / "content" / "docs" / "cli-and-bindings" / "overview.md" ) assert bindings_overview_path.is_file() lines = bindings_overview_path.read_text().splitlines() parsed_table = [] for line in lines: # This is a hack to parse the table in the binding's overview, but it is # simple and self-contained enough to not cause problems. And we'll # notice immediately if things break. if line.startswith("| ["): cols = line.split("|") latest_version = cols[3].strip(" `") default_model = cols[4].strip() if default_model != "-": default_model = default_model.split("]")[0].split("[")[1].strip(" `") parsed_table.append((latest_version, default_model)) if expected_table == parsed_table: return True else: print( f"ERROR: Found stale information in binding's overview table:\n{expected_table=}\n{parsed_table=}" ) return False def get_python_latest_stable_version() -> str: res = requests.get("https://pypi.org/pypi/magika/json") assert res.status_code == 200 latest_stable_version = res.json().get("info", {}).get("version", None) assert latest_stable_version is not None return latest_stable_version def get_python_default_model_name() -> str: magika_path = REPO_ROOT_DIR / "python" / "src" / "magika" / "magika.py" return extract_one_match_with_regex_from_file( magika_path, '_DEFAULT_MODEL_NAME = "([a-zA-Z0-9_]+)"' ) def get_javascript_default_model_name() -> str: magika_path = REPO_ROOT_DIR / "js" / "magika.ts" return extract_one_match_with_regex_from_file( magika_path, 'static MODEL_VERSION = "([a-zA-Z0-9_]+)";' ) def get_demo_model_name() -> str: """Get the model name used by the demo.""" demo_path = ( REPO_ROOT_DIR / "website-ng" / "src" / "components" / "MagikaDemo.svelte" ) return extract_one_match_with_regex_from_file( demo_path, 'const MAGIKA_MODEL_VERSION = "([a-zA-Z0-9_]+)";' ) def get_rust_default_model_name() -> str: model_symlink_path = REPO_ROOT_DIR / "rust" / "gen" / "model" assert model_symlink_path.is_symlink() return model_symlink_path.readlink().name def check_markdown_links(verbose: bool) -> bool: """Checks that links in Markdown files are OK. Returns True if everything is good, False otherwise.""" with_errors = False for path in enumerate_markdown_files_in_dir(Path(".")): if verbose: print(f"Analyzing file: {path}") for ui in extract_uris_infos_from_file( path, verbose=verbose, ): if not ui.is_valid: with_errors = True print( f"ERROR: {path.relative_to(REPO_ROOT_DIR)} has non-valid uri: {ui.uri}" ) # For python/README.md (which is used on pypi), we also check that # the URIs are either pointing to an external resource or are pure # anchors. if str(path.relative_to(REPO_ROOT_DIR)) == "python/README.md": if not ui.is_external and not ui.is_pure_anchor: with_errors = True print( f"ERROR: {path.relative_to(REPO_ROOT_DIR)}, in python/, has a non-external uri: {ui.uri}" ) # Same for js/README.md, which ends up on npm. if str(path.relative_to(REPO_ROOT_DIR)) == "js/README.md": if not ui.is_external and not ui.is_pure_anchor: with_errors = True print( f"ERROR: {path.relative_to(REPO_ROOT_DIR)}, in python/, has a non-external uri: {ui.uri}" ) success = with_errors is False return success def enumerate_markdown_files_in_dir(rel_dir: Path) -> list[Path]: if rel_dir.is_absolute(): print(f"{rel_dir} is not relative") sys.exit(1) a_dir = REPO_ROOT_DIR / rel_dir assert a_dir.is_dir() paths: list[Path] = [] for path in sorted(a_dir.rglob("*.md")): should_ignore = False for exclude_prefix_pattern in IGNORE_PREFIX_PATTERNS: if str(path.relative_to(REPO_ROOT_DIR)).startswith(exclude_prefix_pattern): should_ignore = True break if not should_ignore: paths.append(path) return paths def extract_uris_infos_from_file(path: Path, verbose: bool) -> list[UriInfo]: uri_regex = r"\[.*?\]\((.*?)\)" uris: list[str] = re.findall(uri_regex, path.read_text()) uris_infos: list[UriInfo] = [] for uri in uris: if verbose: print(f"Analyzing uri: {uri}") is_external = uri.startswith("http://") or uri.startswith("https://") is_valid = None is_pure_anchor = None is_insecure = None if is_external: # We treat links pointing to our own repo in a special way. For # simplicity, we only deal with links pointing to the main branch. repo_main_prefix_url = "https://github.com/google/magika/blob/main/" if uri.startswith(repo_main_prefix_url): rel_path = uri.removeprefix(repo_main_prefix_url) assert rel_path.find("#") == -1, ( "Local links with anchors not supported yet" ) abs_path = REPO_ROOT_DIR / rel_path is_valid = abs_path.is_file() else: # We mark any other external link as valid, as actually checking # it's too much of a pain. is_valid = True is_pure_anchor = False if uri.startswith("http://"): is_insecure = True print(f"WARNING: {uri} is not using https") else: is_insecure = False else: is_insecure = False if uri.startswith("#"): is_valid = True is_pure_anchor = True else: is_pure_anchor = False if Path(uri).is_absolute(): website_files_dir = ( REPO_ROOT_DIR / "website-ng" / "src" / "content" / "docs" / uri.removeprefix("/magika/") ) md_path = website_files_dir.with_suffix(".md") mdx_path = website_files_dir.with_suffix(".mdx") public_path = ( REPO_ROOT_DIR / "website-ng" / "public" / Path(uri).name ) if ( website_files_dir.is_dir() or md_path.is_file() or mdx_path.is_file() or public_path.is_file() ): is_valid = True else: is_valid = False else: if uri.find("#") >= 0: # This URI is not a pure anchor, but it does have an # anchor. We remove it so that we can check whether the # file exists or not. rel_file_path = uri.split("#")[0] else: rel_file_path = uri abs_path = path.parent / rel_file_path is_valid = abs_path.is_file() or abs_path.is_dir() assert is_valid is not None assert is_pure_anchor is not None assert is_insecure is not None uris_infos.append( UriInfo( uri=uri, is_external=is_external, is_valid=is_valid, is_pure_anchor=is_pure_anchor, is_insecure=is_insecure, ) ) return uris_infos def get_max_stable_version_for_crate(crate_name: str) -> str: url = f"https://crates.io/api/v1/crates/{crate_name}" r = requests.get(url) crate_info = r.json() return crate_info["crate"]["max_stable_version"] def get_latest_version_for_npm_package(package_name: str) -> str: url = f"https://registry.npmjs.org/{package_name}/latest" r = requests.get(url) crate_info = r.json() return crate_info["version"] def extract_one_match_with_regex_from_file(path: Path, regex: str) -> str: """Extract one (and only one!) match with a regex from a file. Raises an error in case of zero or more than one hits. """ assert path.is_file() matching_str = None for line in path.read_text().splitlines(): m = re.fullmatch(regex, line.strip()) if m: # If we already found something, there is a bug somewhere assert matching_str is None matching_str = m.group(1) assert matching_str is not None return matching_str @dataclass(kw_only=True) class UriInfo: uri: str is_external: bool is_valid: bool is_pure_anchor: bool is_insecure: bool if __name__ == "__main__": main() ================================================ FILE: python/scripts/check_source.sh ================================================ #!/bin/bash # Copyright 2023-2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # From https://stackoverflow.com/a/246128 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) PYTHON_ROOT_DIR=$SCRIPT_DIR/../ pushd $PYTHON_ROOT_DIR > /dev/null echo "Running ruff..." ruff check echo "Running mypy..." mypy src/magika tests popd > /dev/null ================================================ FILE: python/scripts/generate_reference.py ================================================ # Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import subprocess from pathlib import Path import click python_root_dir = Path(__file__).parent.parent @click.command() def main(): test_scripts_paths = [ python_root_dir / "tests" / "test_features_extraction_vs_reference.py", python_root_dir / "tests" / "test_inference_vs_reference.py", ] for test_script_path in test_scripts_paths: assert test_script_path.is_file() cmd = [ "uv", "run", str(test_script_path), "generate-tests", ] print(f"Running CMD: {' '.join(cmd)}") subprocess.run( cmd, cwd=python_root_dir, check=True, ) print("Everything went good.") if __name__ == "__main__": main() ================================================ FILE: python/scripts/pre_release_check.py ================================================ # Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ It performs a number of checks to determine whether the package is ready for release. """ import re import subprocess import sys import click import magika @click.command() @click.option( "--expected-version", default="", help="Expected version string (e.g., '1.2.3'). If provided, checks will be validated against this value.", ) @click.option( "--report-only", is_flag=True, help="Print errors without failing. (Default: Fails on errors)", ) @click.option( "--check-pip-show-package-version/--no-check-pip-show-package-version", is_flag=True, default=True, help="Enable/disable version check via 'pip show'. (Default: Enabled)", ) @click.option( "--use-python-client", is_flag=True, help="Use the Python client instead of Rust client. (Default: False)", ) def main( expected_version: str, report_only: bool, check_pip_show_package_version: bool, use_python_client: bool, ) -> None: """Checks versions consistency for the `magika` package.""" if report_only: click.echo('Running in "report only" mode.') if not check_pip_show_package_version: click.echo("Skipping checking package version via pip show.") if use_python_client: click.echo("Using python client instead of Rust client.") strict_mode = not report_only if strict_mode: if expected_version == "": click.secho( "ERROR: when not using --report-only, --expected-version is required." ) sys.exit(1) with_errors = False # Get the versions module_version = getattr(magika, "__version__", "") try: instance_version = magika.Magika().get_module_version() except Exception: instance_version = "" if check_pip_show_package_version: pip_show_package_version = get_magika_package_version_via_pip_show() else: pip_show_package_version = "" if use_python_client: cli_version = instance_version else: cli_version = get_rust_cli_version() if module_version == "": click.echo("ERROR: failed to get module_version.") with_errors = True if instance_version == "": click.echo("ERROR: failed to get instance_version.") with_errors = True if check_pip_show_package_version and pip_show_package_version == "": click.echo("ERROR: failed to get pip_show_package_version.") with_errors = True if cli_version == "": click.echo("ERROR: failed to get cli_version.") with_errors = True click.echo( f"Extracted versions: {expected_version=}, {module_version=}, {instance_version=}, {pip_show_package_version=}, {cli_version=}." ) if expected_version != "" and module_version != expected_version: click.echo(f"ERROR: {module_version=} != {expected_version=}") with_errors = True if module_version != instance_version: click.echo(f"ERROR: {instance_version=} != {module_version=}") with_errors = True if check_pip_show_package_version: if module_version != pip_show_package_version: click.echo(f"ERROR: {module_version=} != {pip_show_package_version=}") with_errors = True # From now on, we assume all the python-related versions are the same. If # they are not, we would have at least one error above. if not is_valid_python_version(module_version): click.echo(f"ERROR: {module_version=} is not a valid python version.") with_errors = True if module_version.endswith("-dev") or cli_version.endswith("-dev"): click.echo("ERROR: One of the versions is a -dev version.") with_errors = True if cli_version.endswith("-rc") and not module_version.endswith("-rc"): click.echo("ERROR: The CLI has an -rc version, but the python module does not.") with_errors = True if with_errors: click.secho("There was at least one error.", fg="red") if strict_mode: sys.exit(1) else: click.secho("All tests pass!", fg="green") def get_rust_cli_version() -> str: """Get the version of the Rust CLI `magika`. Returns an empty string ("") if an error is encountered. """ try: result = subprocess.run( ["magika", "--version"], capture_output=True, text=True, check=True ) parts = result.stdout.strip().split() if len(parts) < 2: click.echo("ERROR: Could not parse CLI version output.") return "" cli_version = parts[1] return cli_version except subprocess.CalledProcessError as e: click.echo(f"ERROR: Could not retrieve CLI version: {e}") return "" def get_magika_package_version_via_pip_show() -> str: try: r = subprocess.run( ["python3", "-m", "pip", "show", "magika"], capture_output=True, text=True ) lines = r.stdout.strip().split("\n") for line in lines: if line.startswith("Version: "): return line.split(": ", 1)[1] click.echo( f"ERROR: Could not extract the package version via pip show. Output from pip show:\nstdout={r.stdout}\n\nstderr={r.stderr}" ) return "" except subprocess.CalledProcessError as e: click.echo(f"ERROR: Could not retrieve package version via pip show: {e}") return "" def is_valid_python_version(version: str) -> bool: # Regex from PEP440: '[N!]N(.N)*[{a|b|rc}N][.postN][.devN]' PEP440_CANONICAL_REGEX = re.compile( r""" ^ # Optional Epoch segment (e.g., 1!) (?P\d+!)? # Required Release segment (e.g., 1.2.3) (?P[0-9]+(?:\.[0-9]+)*) # Optional Pre-release segment (e.g., a1, b2, rc3) (?P
    (?:a|b|rc)
    [0-9]+
)?

# Optional Post-release segment (e.g., .post4)
(?P
    (?:\.post[0-9]+)
)?

# Optional Development release segment (e.g., .dev5)
(?P
    (?:\.dev[0-9]+)
)?
$
""",
        re.VERBOSE | re.IGNORECASE,
    )
    return PEP440_CANONICAL_REGEX.fullmatch(version) is not None


def test_is_valid_python_version() -> None:
    assert is_valid_python_version("1.2.3") is True
    assert is_valid_python_version("1.2.3.rc") is False
    assert is_valid_python_version("1.2.3.rc0") is False
    assert is_valid_python_version("1.2.3rc0") is True
    assert is_valid_python_version("1.2.3rc1") is True
    assert is_valid_python_version("1.2.3-dev") is False
    assert is_valid_python_version("1.2.3.dev0") is True
    assert is_valid_python_version("1.2.3-dev0") is False


if __name__ == "__main__":
    main()


================================================
FILE: python/scripts/prepare_pyproject_for_pure_python_wheel.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from pathlib import Path

import tomli
import tomli_w


def main() -> None:
    pyproject_toml_path = Path(__file__).parent.parent / "pyproject.toml"

    pyproject_content = tomli.loads(pyproject_toml_path.read_text())

    # Remove entry about maturin, we don't need it
    _ = pyproject_content["tool"].pop("maturin")

    # Tell uv we want to use the hatchling build system
    pyproject_content["build-system"] = {
        "requires": ["hatchling"],
        "build-backend": "hatchling.build",
    }

    # Make the python's magika client available as a script
    pyproject_content["project"]["scripts"] = {
        "magika-python-client": "magika.cli.magika_client:main",
        "magika": "magika.cli.magika_rust_client_not_found_warning:main",
    }

    pyproject_toml_path.write_text(tomli_w.dumps(pyproject_content))


if __name__ == "__main__":
    main()


================================================
FILE: python/scripts/run_quick_test_magika_cli.py
================================================
#!/usr/bin/env python
# Copyright 2023-2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This script assumes that the `magika` python package has been already installed,
and that the `magika` client is available in the PATH. The script tests that the
`magika` client appears functional.

This script should only rely on dependencies installed with `pip install
magika`; this script is used as part of "build & test package" github action,
and the dev dependencies are not available.
"""

import subprocess
import sys
from pathlib import Path
from typing import Optional

import click


@click.command()
@click.option(
    "--client-path",
    type=click.Path(exists=False, resolve_path=False),
)
def main(client_path: Optional[Path]) -> None:
    """Tests the Rust or Python Magika client. By default, it runs "magika"
    (expected in PATH). Use --client-path to specify a different client
    executable. Note that "client_path" may not point to a valid file, but still
    be a valid target to run as it may be found in the PATH.
    """

    basic_tests_dir = (
        Path(__file__).resolve().parent.parent.parent / "tests_data" / "basic"
    )
    assert basic_tests_dir.is_dir()

    if client_path is None:
        client_path = Path("magika")
    print(f'Testing client: "{client_path}"')

    p = subprocess.run([str(client_path), "--version"], capture_output=True, text=True)
    print(f'Output of "magika --version": {p.stdout.strip()}')

    p = subprocess.run(
        [str(client_path), "-r", "--label", "--no-colors", str(basic_tests_dir)],
        capture_output=True,
        text=True,
    )

    if p.returncode != 0:
        print("ERROR: magika CLI exited with non-zero status.")
        print(f"stdout:\n{p.stdout}\n" + "-" * 40)
        print(f"stderr:\n{p.stderr}\n" + "-" * 40)
        sys.exit(1)

    if p.stderr != "":
        print(f"WARNING: p.stderr not empty: {p.stderr}")

    with_error = False
    lines = p.stdout.split("\n")
    for line in lines:
        line = line.strip()
        if line == "":
            continue
        file_path_str, file_output_str = line.split(": ", 1)
        file_path = Path(file_path_str)
        output_label = file_output_str.strip().split(" ", 1)[0]
        expected_label = file_path.parent.name
        if expected_label != output_label:
            with_error = True
            print(
                f"ERROR: Misprediction for {file_path}: expected_label={expected_label}, output_label={output_label}"
            )

    if with_error:
        print("ERROR: There was at least one misprediction")
        sys.exit(1)

    print("All examples were predicted correctly")


if __name__ == "__main__":
    main()


================================================
FILE: python/scripts/run_quick_test_magika_module.py
================================================
#!/usr/bin/env python
# Copyright 2023-2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This script should only rely on dependencies installed with `pip install
magika`; this script is used as part of "build & test package" github action,
and the dev dependencies are not available.
"""

import statistics
import sys
import time
from pathlib import Path

import click

from magika import ContentTypeLabel, Magika, PredictionMode


@click.command()
@click.option("--print-inference-stats", is_flag=True, help="Print inference stats.")
@click.option("--repeat", default=1, help="Number of times to run the test set.")
def main(print_inference_stats: bool, repeat: int) -> None:
    m = Magika(prediction_mode=PredictionMode.HIGH_CONFIDENCE)

    print(f"Magika instance details: {m}")

    res = m.identify_bytes(b"text")
    assert res.dl.label == ContentTypeLabel.UNDEFINED
    assert res.output.label == ContentTypeLabel.TXT
    assert res.score == 1.0

    res = m.identify_bytes(b"\xff\xff\xff")
    assert res.dl.label == ContentTypeLabel.UNDEFINED
    assert res.output.label == ContentTypeLabel.UNKNOWN
    assert res.score == 1.0

    basic_tests_dir = (
        Path(__file__).parent.parent.parent / "tests_data" / "basic"
    ).resolve()

    files_paths = sorted(filter(lambda p: p.is_file(), basic_tests_dir.rglob("*")))

    latencies = []

    with_error = False
    for i in range(repeat):
        for file_path in files_paths:
            start_time = time.perf_counter()
            res = m.identify_path(file_path)
            end_time = time.perf_counter()
            latencies.append((end_time - start_time) * 1000)

            # Check for misprediction only on the first run.
            if i == 0:
                output_label = res.output.label
                expected_label = file_path.parent.name
                if expected_label != output_label:
                    with_error = True
                    print(
                        f"ERROR: Misprediction for {file_path}: expected_label={expected_label}, output_label={output_label}"
                    )

    if with_error:
        print("ERROR: There was at least one misprediction")
        sys.exit(1)

    print("All examples were predicted correctly")

    if print_inference_stats and latencies:
        print(f"Inference stats over {len(latencies)} files (repeat={repeat}):")
        print(f"  Min: {min(latencies):.4f} ms")
        print(f"  Max: {max(latencies):.4f} ms")
        print(f"  Mean: {statistics.mean(latencies):.4f} ms")
        print(f"  Median: {statistics.median(latencies):.4f} ms")
        print(f"  Total: {sum(latencies):.4f} ms")


if __name__ == "__main__":
    main()


================================================
FILE: python/scripts/sync.py
================================================
#!/usr/bin/env python3
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import enum
import json
import shutil
import sys
from pathlib import Path
from typing import Optional

import click

MODELS_NAMES_TO_INCLUDE_IN_PYTHON_PACKAGE = [
    "standard_v3_3",
]

REPO_ROOT_DIR = Path(__file__).parent.parent.parent
assert REPO_ROOT_DIR.is_dir() and (REPO_ROOT_DIR / ".git").is_dir()

ASSETS_DIR = REPO_ROOT_DIR / "assets"
assert ASSETS_DIR.is_dir()

CONTENT_TYPES_KB_PATH = ASSETS_DIR / "content_types_kb.min.json"
assert CONTENT_TYPES_KB_PATH.is_file()

ASSETS_MODELS_DIR = ASSETS_DIR / "models"
assert ASSETS_MODELS_DIR.is_dir()

PYTHON_ROOT_DIR = REPO_ROOT_DIR / "python"
assert PYTHON_ROOT_DIR.is_dir()

PYTHON_CONTENT_TYPES_KB_PATH = (
    PYTHON_ROOT_DIR / "src" / "magika" / "config" / "content_types_kb.min.json"
)

PYTHON_MODELS_DIR = PYTHON_ROOT_DIR / "src" / "magika" / "models"
assert PYTHON_MODELS_DIR.is_dir()

PYTHON_CONTENT_TYPES_LABELS_PY_PATH = (
    PYTHON_ROOT_DIR / "src" / "magika" / "types" / "content_type_label.py"
)

JS_ROOT_DIR = REPO_ROOT_DIR / "js"
assert PYTHON_ROOT_DIR.is_dir()

PYTHON_CONTENT_TYPES_KB_PATH = (
    PYTHON_ROOT_DIR / "src" / "magika" / "config" / "content_types_kb.min.json"
)

PYTHON_MODELS_DIR = PYTHON_ROOT_DIR / "src" / "magika" / "models"
assert PYTHON_MODELS_DIR.is_dir()

PYTHON_CONTENT_TYPES_LABELS_PY_PATH = (
    PYTHON_ROOT_DIR / "src" / "magika" / "types" / "content_type_label.py"
)


class Target(enum.StrEnum):
    JS = "js"
    PYTHON = "python"


@click.command()
@click.argument("target", type=Target)
@click.option(
    "--models-names",
    "models_names_str",
    help="Comma-separated list of models names to import in the package",
)
def main(target: Target, models_names_str: Optional[str]) -> None:
    if target == Target.PYTHON:
        if models_names_str is None:
            models_names = MODELS_NAMES_TO_INCLUDE_IN_PYTHON_PACKAGE
        else:
            models_names = list(map(lambda s: s.strip(), models_names_str.split(",")))

        print(f"Including these models in the python package: {models_names}")

        update_python_content_type_kb()
        update_python_content_type_label_py()

        print(f"Deleting {PYTHON_MODELS_DIR}")
        shutil.rmtree(PYTHON_MODELS_DIR)
        for model_name in models_names:
            add_model_to_python_package(model_name)

    elif target == Target.JS:
        update_js_content_type_files()

        # FIXME: the model is currently copied manually
        print("WARNING: copying the model is currently NOT supported by this script")


def update_python_content_type_kb() -> None:
    print(
        f"Syncing python's content types KB: {CONTENT_TYPES_KB_PATH} => {PYTHON_CONTENT_TYPES_KB_PATH}"
    )
    PYTHON_CONTENT_TYPES_KB_PATH.parent.mkdir(parents=True, exist_ok=True)
    shutil.copy(CONTENT_TYPES_KB_PATH, PYTHON_CONTENT_TYPES_KB_PATH)


def add_model_to_python_package(model_name: str) -> None:
    assets_model_dir = ASSETS_MODELS_DIR / model_name
    if not assets_model_dir.is_dir():
        print(f'ERROR: model "{model_name} not found')
        sys.exit(1)

    python_model_dir = PYTHON_MODELS_DIR / model_name

    print(f"Adding model {assets_model_dir} => {python_model_dir}")
    shutil.copytree(assets_model_dir, python_model_dir)


CONTENT_TYPE_LABEL_PY_SOURCE_PREFIX = """
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from magika.types.strenum import StrEnum

# NOTE: DO NOT EDIT --- This file is automatically generated.


# This is the list of all possible content types we know about; however, models
# support a smaller subset of them. See model's README.md for details.
class ContentTypeLabel(StrEnum):
"""


def update_python_content_type_label_py() -> None:
    print(f"Updating {PYTHON_CONTENT_TYPES_LABELS_PY_PATH}")

    kb = json.loads(CONTENT_TYPES_KB_PATH.read_text())

    enum_body_lines = []
    for ct_label_str in sorted(kb.keys()):
        if ct_label_str[0].isdigit():
            line = (" " * 4) + f'_{ct_label_str.upper()} = "{ct_label_str}"'
        else:
            line = (" " * 4) + f'{ct_label_str.upper()} = "{ct_label_str}"'
        enum_body_lines.append(line)

    out = (
        CONTENT_TYPE_LABEL_PY_SOURCE_PREFIX.strip()
        + "\n"
        + "\n".join(enum_body_lines)
        + "\n"
    )
    out += (
        "\n"
        + (" " * 4)
        + (
            """
    def __repr__(self) -> str:
        return str(self)
    """.strip()
            + "\n"
        )
    )

    PYTHON_CONTENT_TYPES_LABELS_PY_PATH.write_text(out)


COPYRIGHT_AND_DONOT_EDIT_PREFIX = """
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// NOTE: DO NOT EDIT --- This file is automatically generated by sync.py.
"""


CONTENT_TYPE_LABEL_TS_SOURCE_PREFIX = (
    COPYRIGHT_AND_DONOT_EDIT_PREFIX.strip()
    + "\n\n"
    + """
// This is the list of all possible content types we know about; however, models
// support a smaller subset of them. See model's README.md for details.
export enum ContentTypeLabel {
""".strip()
)


def update_js_content_type_files() -> None:
    # Update content type labels enum
    content_type_label_ts_path = JS_ROOT_DIR / "src" / "content-type-label.ts"

    kb = json.loads(CONTENT_TYPES_KB_PATH.read_text())

    enum_body_lines = []
    for ct_label_str in sorted(kb.keys()):
        if ct_label_str[0].isdigit():
            line = (" " * 2) + f'_{ct_label_str.upper()} = "{ct_label_str}",'
        else:
            line = (" " * 2) + f'{ct_label_str.upper()} = "{ct_label_str}",'
        enum_body_lines.append(line)

    out = (
        CONTENT_TYPE_LABEL_TS_SOURCE_PREFIX.strip()
        + "\n"
        + "\n".join(enum_body_lines)
        + "\n"
        + "}\n"
    ).strip() + "\n"

    content_type_label_ts_path.write_text(out)
    print(f"Updated {content_type_label_ts_path}")

    # Update content types info
    content_types_infos_ts_path = JS_ROOT_DIR / "src" / "content-types-infos.ts"
    content_types_info_content = COPYRIGHT_AND_DONOT_EDIT_PREFIX.strip() + "\n\n"

    content_types_info_content += (
        """
import { ContentTypeInfo } from "./content-type-info";
import { ContentTypeLabel } from "./content-type-label";

export type ContentTypesInfos = Record;

export const ContentTypesInfos = {
  get: (): ContentTypesInfos => ({
""".strip()
        + "\n"
    )
    for ct_label_str, ct_info in sorted(kb.items()):
        if ct_label_str[0].isdigit():
            ct_label_enum = f"_{ct_label_str.upper()}"
        else:
            ct_label_enum = ct_label_str.upper()
        is_text = ct_info["is_text"]
        content_types_info_content += (
            "    "
            + f"""
    [ContentTypeLabel.{ct_label_enum}]: {{
      label: ContentTypeLabel.{ct_label_enum},
      is_text: {"true" if is_text else "false"},
    }},
""".strip()
            + "\n"
        )

    content_types_info_content += "  })\n};\n"

    content_types_infos_ts_path.write_text(content_types_info_content)
    print(f"Updated {content_types_infos_ts_path}")


if __name__ == "__main__":
    main()


================================================
FILE: python/scripts/test_magika_model.py
================================================
#!/usr/bin/env python3
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""
This script tests a given model against the tests cases, check whether the
predictions are correct, and raise an error otherwise.

This is useful when evaluating new models.
"""

import sys
from pathlib import Path

import click

import magika
from magika import colors


@click.command()
@click.argument("model_dir_or_name")
def main(model_dir_or_name: str) -> None:
    if Path(model_dir_or_name).is_dir():
        model_dir = Path(model_dir_or_name)
    else:
        models_dir = Path(magika.__file__).parent / "models"
        model_dir = models_dir / model_dir_or_name

    if not model_dir.is_dir():
        log_error(f"{model_dir_or_name} is not a dir nor a model name")
        sys.exit(1)

    m = magika.Magika(model_dir=model_dir)

    with_error = False

    tests_data_dir = Path(__file__).parent.parent.parent / "tests_data"
    tests_dirs_names = ["basic", "previous_missdetections"]

    for tests_dir_name in tests_dirs_names:
        tests_dir = tests_data_dir / tests_dir_name
        for test_path in tests_dir.rglob("*"):
            if not test_path.is_file():
                continue

            mr = m.identify_path(test_path)
            assert mr.ok

            predicted_content_type = mr.prediction.output.label
            expected_content_type = test_path.parent.name
            if predicted_content_type != expected_content_type:
                with_error = True
                log_error(
                    f'{test_path} predicted as "{predicted_content_type}" (score: {mr.prediction.score:.4f}), expected "{expected_content_type}".'
                )

    if with_error:
        log_error("There was at least one error.")
    else:
        log_ok("All tests examples were predicted correctly.")


def log_ok(msg: str) -> None:
    print(f"{colors.GREEN}{msg}{colors.RESET}")


def log_error(msg: str) -> None:
    print(f"{colors.RED}ERROR: {msg}{colors.RESET}")


if __name__ == "__main__":
    main()


================================================
FILE: python/src/magika/__init__.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D104


from importlib.metadata import PackageNotFoundError, version

from magika.magika import Magika
from magika.types.content_type_info import ContentTypeInfo
from magika.types.content_type_label import ContentTypeLabel
from magika.types.magika_error import MagikaError
from magika.types.magika_prediction import MagikaPrediction
from magika.types.magika_result import MagikaResult
from magika.types.overwrite_reason import OverwriteReason
from magika.types.prediction_mode import PredictionMode
from magika.types.status import Status

try:
    __version__ = version(__name__)
except PackageNotFoundError:
    # Package is not installed (e.g., during development)
    __version__ = "unknown"

__all__ = [
    "ContentTypeInfo",
    "ContentTypeLabel",
    "Magika",
    "MagikaError",
    "MagikaPrediction",
    "MagikaResult",
    "OverwriteReason",
    "PredictionMode",
    "Status",
]


================================================
FILE: python/src/magika/cli/magika_client.py
================================================
#!/usr/bin/env python3
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Python-based command-line client for Magika.

This client serves as a fallback and is maintained for backward compatibility.
It is primarily useful for installations from the pure-Python wheel, especially
on platforms where the Rust-based binary client is not yet available.

For optimal performance, the Rust-based client is recommended.
"""

import importlib.metadata
import json
import logging
import os
import sys
from pathlib import Path
from typing import List, Optional, Tuple

import click

from magika import Magika, MagikaError, PredictionMode, colors
from magika.logger import get_logger
from magika.types import ContentTypeLabel, MagikaResult
from magika.types.overwrite_reason import OverwriteReason

VERSION = importlib.metadata.version("magika")

CONTACT_EMAIL = "magika-dev@google.com"

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])

HELP_EPILOG = f"""
Magika version: "{VERSION}"\f
Default model: "{Magika._get_default_model_name()}"

Send any feedback to {CONTACT_EMAIL} or via GitHub issues.
"""


@click.command(
    context_settings=CONTEXT_SETTINGS,
    epilog=HELP_EPILOG,
)
@click.argument(
    "file",
    type=click.Path(exists=False, readable=False, path_type=Path),
    required=False,
    nargs=-1,
)
@click.option(
    "-r",
    "--recursive",
    is_flag=True,
    help='When passing this option, magika scans every file within directories, instead of outputting "directory"',
)
@click.option("--json", "json_output", is_flag=True, help="Output in JSON format.")
@click.option("--jsonl", "jsonl_output", is_flag=True, help="Output in JSONL format.")
@click.option(
    "-i",
    "--mime-type",
    "mime_output",
    is_flag=True,
    help="Output the MIME type instead of a verbose content type description.",
)
@click.option(
    "-l",
    "--label",
    "label_output",
    is_flag=True,
    help="Output a simple label instead of a verbose content type description. Use --list-output-content-types for the list of supported output.",
)
@click.option(
    "-c",
    "--compatibility-mode",
    "magic_compatibility_mode",
    is_flag=True,
    help="Compatibility mode: output is as close as possible to `file` and colors are disabled.",
)
@click.option(
    "-s",
    "--output-score",
    is_flag=True,
    help="Output the prediction's score in addition to the content type.",
)
@click.option(
    "-m",
    "--prediction-mode",
    "prediction_mode_str",
    type=click.Choice(PredictionMode.get_valid_prediction_modes(), case_sensitive=True),
    default=PredictionMode.HIGH_CONFIDENCE,
)
@click.option(
    "--batch-size", default=32, help="How many files to process in one batch."
)
@click.option(
    "--no-dereference",
    is_flag=True,
    help="This option causes symlinks not to be followed. By default, symlinks are dereferenced.",
)
@click.option(
    "--colors/--no-colors",
    "with_colors",
    is_flag=True,
    default=True,
    help="Enable/disable use of colors.",
)
@click.option("-v", "--verbose", is_flag=True, help="Enable more verbose output.")
@click.option("-vv", "--debug", is_flag=True, help="Enable debug logging.")
@click.option(
    "--version", "output_version", is_flag=True, help="Print the version and exit."
)
@click.option(
    "--model-dir",
    type=click.Path(
        exists=True, file_okay=False, dir_okay=True, resolve_path=True, path_type=Path
    ),
    help="Use a custom model.",
)
def main(
    file: List[Path],
    recursive: bool,
    json_output: bool,
    jsonl_output: bool,
    mime_output: bool,
    label_output: bool,
    magic_compatibility_mode: bool,
    output_score: bool,
    prediction_mode_str: str,
    batch_size: int,
    no_dereference: bool,
    with_colors: bool,
    verbose: bool,
    debug: bool,
    output_version: bool,
    model_dir: Optional[Path],
) -> None:
    """Magika - Determine type of FILEs with deep-learning."""
    # click uses the name of the variable to determine how it will show up in
    # the --help. Since we don't like to see "file_paths" in the help, we name
    # the argument "file" (which is ugly) and we re-assign it as soon as we can.
    files_paths = file

    if magic_compatibility_mode:
        # In compatibility mode we disable colors.
        with_colors = False

    _l = get_logger(use_colors=with_colors)

    if verbose:
        _l.setLevel(logging.INFO)
    if debug:
        _l.setLevel(logging.DEBUG)

    if output_version:
        _l.raw_print_to_stdout("Magika python client")
        _l.raw_print_to_stdout(f"Magika version: {VERSION}")
        _l.raw_print_to_stdout(f"Default model: {Magika._get_default_model_name()}")
        sys.exit(0)

    if len(files_paths) == 0:
        _l.error("You need to pass at least one path, or - to read from stdin.")
        sys.exit(1)

    read_from_stdin = False
    for p in files_paths:
        if str(p) == "-":
            read_from_stdin = True
        elif not p.exists():
            _l.error(f'File or directory "{str(p)}" does not exist.')
            sys.exit(1)
    if read_from_stdin:
        if len(files_paths) > 1:
            _l.error('If you pass "-", you cannot pass anything else.')
            sys.exit(1)
        if recursive:
            _l.error('If you pass "-", recursive scan is not meaningful.')
            sys.exit(1)

    if batch_size <= 0 or batch_size > 512:
        _l.error("Batch size needs to be greater than 0 and less or equal than 512.")
        sys.exit(1)

    if json_output and jsonl_output:
        _l.error("You should use either --json or --jsonl, not both.")
        sys.exit(1)

    if int(mime_output) + int(label_output) + int(magic_compatibility_mode) > 1:
        _l.error("You should use only one of --mime, --label, --compatibility-mode.")
        sys.exit(1)

    if recursive:
        # recursively enumerate files within directories
        expanded_paths = []
        for p in files_paths:
            if p.exists():
                if p.is_file():
                    expanded_paths.append(p)
                elif p.is_dir():
                    expanded_paths.extend(sorted(p.rglob("*")))
            elif str(p) == "-":
                # this is "read from stdin", that's OK
                pass
            else:
                _l.error(f'File or directory "{str(p)}" does not exist.')
                sys.exit(1)
        # the resulting list may still include some directories; thus, we filter them out.
        files_paths: List[Path] = list(filter(lambda x: not x.is_dir(), expanded_paths))  # type: ignore[no-redef]

    _l.info(f"Considering {len(files_paths)} files")
    _l.debug(f"Files: {files_paths}")

    # Select an alternative model checking: 1) CLI option, 2) env variable.
    # If none of these is set, model_dir is left to None, and the Magika module
    # will use the default model.
    if model_dir is None:
        model_dir_str = os.environ.get("MAGIKA_MODEL_DIR")
        if model_dir_str is not None and model_dir_str.strip() != "":
            model_dir = Path(model_dir_str)

    try:
        magika = Magika(
            model_dir=model_dir,
            prediction_mode=PredictionMode(prediction_mode_str),
            no_dereference=no_dereference,
            verbose=verbose,
            debug=debug,
            use_colors=with_colors,
        )
    except MagikaError as mr:
        _l.error(str(mr))
        sys.exit(1)

    start_color = ""
    end_color = ""

    color_by_group = {
        "document": colors.LIGHT_PURPLE,
        "executable": colors.LIGHT_GREEN,
        "archive": colors.LIGHT_RED,
        "audio": colors.YELLOW,
        "image": colors.YELLOW,
        "video": colors.YELLOW,
        "code": colors.LIGHT_BLUE,
    }

    # updated only when we need to output in JSON format
    all_predictions: List[Tuple[Path, MagikaResult]] = []

    batches_num = len(files_paths) // batch_size
    if len(files_paths) % batch_size != 0:
        batches_num += 1
    for batch_idx in range(batches_num):
        batch_files_paths = files_paths[
            batch_idx * batch_size : (batch_idx + 1) * batch_size
        ]

        if _should_read_from_stdin(files_paths):
            batch_predictions = [_get_magika_result_from_stdin(magika)]
        else:
            batch_predictions = magika.identify_paths(batch_files_paths)

        if json_output:
            # we do not stream the output for JSON output
            all_predictions.extend(zip(batch_files_paths, batch_predictions))
        elif jsonl_output:
            for file_path, result in zip(batch_files_paths, batch_predictions):
                _l.raw_print_to_stdout(json.dumps(result.asdict()))
        else:
            for file_path, result in zip(batch_files_paths, batch_predictions):
                if result.ok:
                    if mime_output:
                        # If the user requested the MIME type, we use the mime type
                        # regardless of the compatibility mode.
                        output = result.prediction.output.mime_type
                    elif label_output:
                        output = str(result.prediction.output.label)
                    else:  # human-readable description
                        output = f"{result.prediction.output.description} ({result.prediction.output.group})"

                        if (
                            result.prediction.dl.label != ContentTypeLabel.UNDEFINED
                            and result.prediction.dl.label
                            != result.prediction.output.label
                            and result.prediction.overwrite_reason
                            == OverwriteReason.LOW_CONFIDENCE
                        ):
                            # It seems that we had a low-confidence prediction
                            # from the model. Let's warn the user about our best
                            # bet.
                            output += (
                                " [Low-confidence model best-guess: "
                                f"{result.prediction.dl.description} ({result.prediction.dl.group}), "
                                f"score={result.prediction.score}]"
                            )

                    if with_colors:
                        start_color = color_by_group.get(
                            result.prediction.output.group, colors.WHITE
                        )
                        end_color = colors.RESET
                else:
                    output = result.status
                    start_color = ""
                    end_color = ""

                if output_score and result.ok:
                    score = int(result.prediction.score * 100)
                    _l.raw_print_to_stdout(
                        f"{start_color}{file_path}: {output} {score}%{end_color}"
                    )
                else:
                    _l.raw_print_to_stdout(
                        f"{start_color}{file_path}: {output}{end_color}"
                    )

    if json_output:
        _l.raw_print_to_stdout(
            json.dumps(
                [result.asdict() for (_, result) in all_predictions],
                indent=4,
            )
        )


def _should_read_from_stdin(files_paths: List[Path]) -> bool:
    return len(files_paths) == 1 and str(files_paths[0]) == "-"


def _get_magika_result_from_stdin(magika: Magika) -> MagikaResult:
    content = sys.stdin.buffer.read()
    result = magika.identify_bytes(content)
    return result


if __name__ == "__main__":
    main()


================================================
FILE: python/src/magika/cli/magika_rust_client_not_found_warning.py
================================================
#!/usr/bin/env python
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Placeholder script for the primary 'magika' command-line interface.

This module serves as a fallback entry point for the 'magika' command. **It is
included only in the pure-Python package.** If this script is executed, it
indicates that the user has installed the pure-Python package and not the
package that contains the native binary.

The script explicitly notifies the user that they are not using the binary
client and guides them to use the alternative Python client (`$
magika-python-client`) or to seek support.
"""

import sys


def main() -> None:  # noqa:  D103
    message = """
WARNING: you have attempted to run `$ magika` (the Rust client), but this is not
available in the python package you installed, likely because magika pipeline
does not currently build binary wheels compatible with your platform settings.

If you think this is a problem worth solving, please open an issue at
https://github.com/google/magika.

In the meantime, you can use the old python magika client with `$ magika-python-client`.
"""

    print(message.strip())
    sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: python/src/magika/colors.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D100, D101, D102, D103, D107


# Taken from https://en.wikipedia.org/wiki/ANSI_escape_code

BLACK = "\033[0;30m"
RED = "\033[0;31m"
GREEN = "\033[0;32m"
YELLOW = "\033[0;33m"
BLUE = "\033[0;34m"
PURPLE = "\033[0;35m"
CYAN = "\033[0;36m"
LIGHT_GRAY = "\033[0;37m"

DARK_GRAY = "\033[1;30m"
LIGHT_RED = "\033[1;31m"
LIGHT_GREEN = "\033[1;32m"
LIGHT_YELLOW = "\033[1;33m"
LIGHT_BLUE = "\033[1;34m"
LIGHT_PURPLE = "\033[1;35m"
LIGHT_CYAN = "\033[1;36m"
WHITE = "\033[1;37m"

RESET = "\033[0;39m"


================================================
FILE: python/src/magika/config/content_types_kb.min.json
================================================
{"3gp":{"mime_type":"video/3gpp","group":"video","description":"3GPP multimedia file","extensions":["3gp"],"is_text":false},"3ds":{"mime_type":"application/octet-stream","group":"unknown","description":"Nintendo 3DS roms","extensions":["3ds"],"is_text":false},"3dsx":{"mime_type":"application/octet-stream","group":"unknown","description":"Nintendo 3DS homebrew","extensions":["3dsx"],"is_text":false},"3dsm":{"mime_type":"application/x-3ds","group":"image","description":"3D studio Max","extensions":["3ds"],"is_text":false},"3mf":{"mime_type":"application/vnd.ms-package.3dmanufacturing-3dmodel+xml","group":"image","description":"3D Manufacturing Format","extensions":["3mf"],"is_text":false},"abnf":{"mime_type":"text/plain","group":null,"description":"augmented Backus\u2013Naur form","extensions":["abnf"],"is_text":false},"ace":{"mime_type":"application/x-ace-compressed","group":"archive","description":"ACE archive","extensions":["ace"],"is_text":false},"ada":{"mime_type":"text/x-ada","group":"code","description":"ADA source","extensions":[],"is_text":false},"aff":{"mime_type":"text/plain","group":null,"description":"Hunspell Affix","extensions":["aff"],"is_text":true},"ai":{"mime_type":"application/pdf","group":"document","description":"Adobe Illustrator Artwork","extensions":["ai"],"is_text":false},"aidl":{"mime_type":"text/plain","group":null,"description":"Android Interface Definition Language","extensions":["aidl"],"is_text":true},"algol68":{"mime_type":null,"group":null,"description":null,"extensions":["a68"],"is_text":false},"ani":{"mime_type":"application/x-navi-animation","group":null,"description":"Animated cursor","extensions":["ani"],"is_text":false},"apk":{"mime_type":"application/vnd.android.package-archive","group":"executable","description":"Android package","extensions":["apk"],"is_text":false},"applebplist":{"mime_type":"application/x-bplist","group":"application","description":"Apple binary property list","extensions":["bplist","plist"],"is_text":false},"appledouble":{"mime_type":"multipart/appledouble","group":"unknown","description":"AppleDouble","extensions":[],"is_text":false},"appleplist":{"mime_type":"application/x-plist","group":"application","description":"Apple property list","extensions":["plist"],"is_text":true},"applesingle":{"mime_type":"application/applefile","group":"unknown","description":"AppleSingle","extensions":[],"is_text":false},"ar":{"mime_type":"application/x-archive","group":"archive","description":"AR Archive","extensions":[],"is_text":false},"arc":{"mime_type":"application/x-arc","group":"archive","description":"Arc","extensions":["arc"],"is_text":false},"arj":{"mime_type":"application/arj","group":"archive","description":"Arj","extensions":[],"is_text":false},"arrow":{"mime_type":"vnd.apache.arrow.file","group":null,"description":null,"extensions":[],"is_text":false},"asc":{"mime_type":"application/pgp-signature","group":"text","description":"PGP","extensions":["asc"],"is_text":true},"asd":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"au":{"mime_type":"audio/basic","group":"audio","description":"NeXT/Sun AU","extensions":["au"],"is_text":false},"asf":{"mime_type":"video/x-ms-wma","group":"application","description":"Microsoft Advanced Systems Format","extensions":["asf"],"is_text":false},"asm":{"mime_type":"text/x-asm","group":"code","description":"Assembly","extensions":["s","S","asm"],"is_text":true},"asp":{"mime_type":"text/html","group":"code","description":"ASP source","extensions":["aspx","asp"],"is_text":true},"autohotkey":{"mime_type":"text/plain","group":"code","description":"AutoHotKey script","extensions":[],"is_text":true},"autoit":{"mime_type":"text/plain","group":"code","description":"AutoIt script","extensions":["au3"],"is_text":true},"avi":{"mime_type":"video/x-msvideo","group":"video","description":"Audio Video Interleave","extensions":["avi"],"is_text":false},"avif":{"mime_type":"image/avif","group":"video","description":"AV1 Image File Format","extensions":["avif","avifs"],"is_text":false},"avro":{"mime_type":"application/x-avro-binary","group":null,"description":"Apache Avro binary","extensions":["avro"],"is_text":false},"awk":{"mime_type":"text/plain","group":"code","description":"Awk","extensions":["awk"],"is_text":true},"ax":{"mime_type":"application/x-dosexec","group":"executable","description":"Directshow filter","extensions":["ax"],"is_text":false},"batch":{"mime_type":"text/x-msdos-batch","group":"code","description":"DOS batch file","extensions":["bat"],"is_text":true},"bazel":{"mime_type":"text/plain","group":"code","description":"Bazel build file","extensions":["bzl"],"is_text":true},"bcad":{"mime_type":"application/octet-stream","group":"document","description":"bCAD Drawing","extensions":["bdf"],"is_text":false},"bib":{"mime_type":"text/x-bibtex","group":"text","description":"BibTeX","extensions":["bib"],"is_text":true},"bmp":{"mime_type":"image/bmp","group":"image","description":"BMP image data","extensions":["bmp"],"is_text":false},"bpg":{"mime_type":"image/bpg","group":"image","description":"BPG","extensions":["bpg"],"is_text":false},"bpl":{"mime_type":null,"group":"unknown","description":null,"extensions":["bpl"],"is_text":false},"brainfuck":{"mime_type":"text/x-brainfuck","group":"code","description":"Brainfuck source","extensions":["b","bf"],"is_text":true},"brf":{"mime_type":"text/plain","group":"text","description":"Braille Ready Format","extensions":["brf","bfm"],"is_text":false},"bzip":{"mime_type":"application/x-bzip2","group":"archive","description":"bzip2 compressed data","extensions":["bz2","tbz2","tar.bz2"],"is_text":false},"bzip3":{"mime_type":"application/x-bzip3","group":"archive","description":"bzip3 compressed data","extensions":["bz3"],"is_text":false},"c":{"mime_type":"text/x-c","group":"code","description":"C source","extensions":["c"],"is_text":true},"cab":{"mime_type":"application/vnd.ms-cab-compressed","group":"archive","description":"Microsoft Cabinet archive data","extensions":["cab"],"is_text":false},"cad":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"cat":{"mime_type":"application/octet-stream","group":"application","description":"Windows Catalog file","extensions":["cat"],"is_text":false},"cdf":{"mime_type":null,"group":"archive","description":null,"extensions":[],"is_text":false},"chm":{"mime_type":"application/chm","group":"application","description":"MS Windows HtmlHelp Data","extensions":["chm"],"is_text":false},"clojure":{"mime_type":"text/x-clojure","group":"code","description":"Clojure","extensions":["clj","cljs","cljc","cljr"],"is_text":true},"cmake":{"mime_type":"text/x-cmake","group":"code","description":"CMake build file","extensions":["cmake"],"is_text":true},"cobol":{"mime_type":"text/x-cobol","group":"code","description":"Cobol","extensions":["cbl","cob","cpy","CBL","COB","CPY"],"is_text":true},"coff":{"mime_type":"application/x-coff","group":"executable","description":"Intel 80386 COFF","extensions":["obj","o"],"is_text":false},"coffeescript":{"mime_type":"text/coffeescript","group":"code","description":"CoffeeScript","extensions":["coffee"],"is_text":true},"com":{"mime_type":"application/x-dosexec","group":"executable","description":null,"extensions":[],"is_text":false},"cpl":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["cpl"],"is_text":false},"cpp":{"mime_type":"text/x-c","group":"code","description":"C++ source","extensions":["cc","cpp","cxx","c++","cppm","ixx"],"is_text":true},"crt":{"mime_type":"application/x-x509-ca-cert","group":"text","description":"Certificates (binary format)","extensions":["der","cer","crt"],"is_text":false},"crx":{"mime_type":"application/x-chrome-extension","group":"executable","description":"Google Chrome extension","extensions":["crx"],"is_text":false},"cs":{"mime_type":"text/plain","group":"code","description":"C# source","extensions":["cs","csx"],"is_text":true},"csproj":{"mime_type":"text/plain","group":"code","description":".NET project config","extensions":["csproj"],"is_text":true},"css":{"mime_type":"text/css","group":"code","description":"CSS source","extensions":["css"],"is_text":true},"csv":{"mime_type":"text/csv","group":"code","description":"CSV document","extensions":["csv"],"is_text":true},"ctl":{"mime_type":"application/octet-stream","group":null,"description":null,"extensions":[],"is_text":false},"dart":{"mime_type":"text/plain","group":"code","description":"Dart source","extensions":["dart"],"is_text":true},"deb":{"mime_type":"application/vnd.debian.binary-package","group":"archive","description":"Debian binary package","extensions":["deb"],"is_text":false},"dex":{"mime_type":"application/x-android-dex","group":"executable","description":"Dalvik dex file","extensions":["dex"],"is_text":false},"dey":{"mime_type":"application/x-android-dey","group":"executable","description":"Dalvik dex file","extensions":[],"is_text":false},"dicom":{"mime_type":"application/dicom","group":"image","description":"DICOM","extensions":["dcm"],"is_text":false},"diff":{"mime_type":"text/plain","group":"text","description":"Diff file","extensions":["diff","patch"],"is_text":true},"directory":{"mime_type":"inode/directory","group":"inode","description":"A directory","extensions":[],"is_text":false},"django":{"mime_type":"text/x-django","group":"code","description":"Django source","extensions":[],"is_text":false},"dll":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["dll"],"is_text":false},"dm":{"mime_type":"text/plain","group":"code","description":"Dream Maker","extensions":["dm"],"is_text":true},"dmigd":{"mime_type":"text/plain","group":"text","description":"Dominion Mods","extensions":["dm"],"is_text":true},"dmg":{"mime_type":"application/x-apple-diskimage","group":"archive","description":"Apple disk image","extensions":["dmg"],"is_text":false},"dmscript":{"mime_type":"text/plain","group":"code","description":"Digital Micrograph Script","extensions":["s"],"is_text":true},"doc":{"mime_type":"application/msword","group":"document","description":"Microsoft Word CDF document","extensions":["doc"],"is_text":false},"dockerfile":{"mime_type":"text/x-dockerfile","group":"code","description":"Dockerfile","extensions":[],"is_text":true},"docx":{"mime_type":"application/vnd.openxmlformats-officedocument.wordprocessingml.document","group":"document","description":"Microsoft Word 2007+ document","extensions":["docx","docm"],"is_text":false},"dosmbr":{"mime_type":"application/octet-stream","group":null,"description":"Master boot record","extensions":[],"is_text":false},"dotx":{"mime_type":"application/vnd.openxmlformats-officedocument.wordprocessingml.template","group":"document","description":"Office Word 2007 template","extensions":["dotx"],"is_text":false},"dsstore":{"mime_type":"application/octet-stream","group":"unknown","description":"Application Desktop Services Store","extensions":[],"is_text":false},"dwg":{"mime_type":"image/x-dwg","group":"image","description":"Autocad Drawing","extensions":["dwg"],"is_text":false},"dxf":{"mime_type":"image/vnd.dxf","group":"image","description":"Audocad Drawing Exchange Format","extensions":["dxf"],"is_text":true},"dylib":{"mime_type":"application/x-mach-o","group":"executable","description":"Mach-O executable","extensions":["dylib"],"is_text":false},"ebml":{"mime_type":"application/octet-stream","group":"unknown","description":"Extensible Binary Meta Language","extensions":[],"is_text":false},"elf":{"mime_type":"application/x-executable-elf","group":"executable","description":"ELF executable","extensions":["elf"],"is_text":false},"elixir":{"mime_type":"text/plain","group":"code","description":"Elixir script","extensions":["exs"],"is_text":true},"emf":{"mime_type":"application/octet-stream","group":"application","description":"Windows Enhanced Metafile image data","extensions":["emf"],"is_text":false},"eml":{"mime_type":"message/rfc822","group":"text","description":"RFC 822 mail","extensions":["eml"],"is_text":true},"empty":{"mime_type":"inode/x-empty","group":"inode","description":"Empty file","extensions":[],"is_text":false},"epub":{"mime_type":"application/epub+zip","group":"document","description":"EPUB document","extensions":["epub"],"is_text":false},"erb":{"mime_type":"text/x-ruby","group":"code","description":"Embedded Ruby source","extensions":["erb"],"is_text":true},"erlang":{"mime_type":"text/x-erlang","group":"code","description":"Erlang source","extensions":["erl","hrl"],"is_text":true},"ese":{"mime_type":"application/x-ms-ese","group":null,"description":"ESE Db","extensions":["dat"],"is_text":false},"exe":{"mime_type":"application/x-dosexec","group":"executable","description":"PE executable","extensions":["exe"],"is_text":false},"exp":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"flac":{"mime_type":"audio/flac","group":"audio","description":"FLAC audio bitstream data","extensions":["flac"],"is_text":false},"flutter":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"flv":{"mime_type":"video/x-flv","group":"video","description":"Flash Video","extensions":["flv"],"is_text":false},"fortran":{"mime_type":"text/x-fortran","group":"document","description":"Fortran","extensions":["f90","f95","f03","F90"],"is_text":true},"fpx":{"mime_type":null,"group":"image","description":"Flashpix","extensions":["fpx"],"is_text":false},"gemfile":{"mime_type":"text/plain","group":"code","description":"Gemfile file","extensions":[],"is_text":true},"gemspec":{"mime_type":"text/plain","group":"code","description":"Gemspec file","extensions":["gemspec"],"is_text":true},"gif":{"mime_type":"image/gif","group":"image","description":"GIF image data","extensions":["gif"],"is_text":false},"gitattributes":{"mime_type":"text/plain","group":"code","description":"Gitattributes file","extensions":[],"is_text":true},"gitmodules":{"mime_type":"text/plain","group":"code","description":"Gitmodules file","extensions":[],"is_text":true},"gleam":{"mime_type":null,"group":"code","description":"Gleam source","extensions":["gleam"],"is_text":true},"go":{"mime_type":"text/x-golang","group":"code","description":"Golang source","extensions":["go"],"is_text":true},"gpx":{"mime_type":null,"group":null,"description":"XML document","extensions":["gpx"],"is_text":false},"gradle":{"mime_type":"text/x-groovy","group":"code","description":"Gradle source","extensions":["gradle"],"is_text":true},"groovy":{"mime_type":"text/x-groovy","group":"code","description":"Groovy source","extensions":["groovy"],"is_text":true},"gzip":{"mime_type":"application/gzip","group":"archive","description":"gzip compressed data","extensions":["gz","gzip","tgz","tar.gz"],"is_text":false},"h":{"mime_type":"text/x-c","group":"code","description":"C header source","extensions":["h"],"is_text":true},"h5":{"mime_type":"application/x-hdf5","group":"archive","description":"Hierarchical Data Format v5","extensions":["h5","hdf5"],"is_text":false},"handlebars":{"mime_type":"text/x-handlebars-template","group":"code","description":"Handlebars source","extensions":["hbs","handlebars"],"is_text":true},"haskell":{"mime_type":"text/plain","group":"code","description":"Haskell source","extensions":["hs","lhs"],"is_text":true},"hcl":{"mime_type":"text/x-hcl","group":"code","description":"HashiCorp configuration language","extensions":["hcl"],"is_text":true},"heif":{"mime_type":"image/heic","group":"image","description":"High Efficiency Image File","extensions":["heif","heifs","heic","heics"],"is_text":false},"hfs":{"mime_type":"application/x-hfs","group":null,"description":null,"extensions":["hfs"],"is_text":false},"hlp":{"mime_type":"application/winhlp","group":"application","description":"MS Windows help","extensions":["hlp"],"is_text":false},"hpp":{"mime_type":"text/x-h","group":"code","description":null,"extensions":["hh","hpp","hxx","h++"],"is_text":true},"hta":{"mime_type":"application/hta","group":"code","description":"HTML Application","extensions":["hta"],"is_text":false},"htaccess":{"mime_type":"text/x-apache-conf","group":"code","description":"Apache access configuration","extensions":[],"is_text":true},"html":{"mime_type":"text/html","group":"code","description":"HTML document","extensions":["html","htm","xhtml","xht"],"is_text":true},"hve":{"mime_type":null,"group":"unknown","description":null,"extensions":[],"is_text":false},"hwp":{"mime_type":"application/x-hwp","group":"document","description":"Hangul Word Processor","extensions":["hwp"],"is_text":false},"icc":{"mime_type":"application/vnd.iccprofile","group":null,"description":"ICC profile","extensions":["icc"],"is_text":false},"icns":{"mime_type":"image/x-icns","group":"image","description":"Mac OS X icon","extensions":["icns"],"is_text":false},"ico":{"mime_type":"image/vnd.microsoft.icon","group":"image","description":"MS Windows icon resource","extensions":["ico"],"is_text":false},"ics":{"mime_type":"text/calendar","group":"application","description":"Internet Calendaring and Scheduling","extensions":["ics"],"is_text":true},"ignorefile":{"mime_type":"text/plain","group":"code","description":"Ignorefile","extensions":[],"is_text":true},"img":{"mime_type":null,"group":null,"description":null,"extensions":["img"],"is_text":false},"ini":{"mime_type":"text/plain","group":"text","description":"INI configuration file","extensions":["ini"],"is_text":true},"internetshortcut":{"mime_type":"application/x-mswinurl","group":"application","description":"MS Windows Internet shortcut","extensions":["url"],"is_text":true},"iosapp":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"ipynb":{"mime_type":"application/json","group":"code","description":"Jupyter notebook","extensions":["ipynb"],"is_text":true},"iso":{"mime_type":"application/x-iso9660-image","group":"archive","description":"ISO 9660 CD-ROM filesystem data","extensions":["iso"],"is_text":false},"jar":{"mime_type":"application/java-archive","group":"archive","description":"Java archive data (JAR)","extensions":["jar","klib"],"is_text":false},"java":{"mime_type":"text/x-java","group":"code","description":"Java source","extensions":["java"],"is_text":true},"javabytecode":{"mime_type":"application/x-java-applet","group":"executable","description":"Java compiled bytecode","extensions":["class"],"is_text":false},"javascript":{"mime_type":"application/javascript","group":"code","description":"JavaScript source","extensions":["js","mjs","cjs"],"is_text":true},"jinja":{"mime_type":"text/x-jinja2-template","group":"code","description":"Jinja template","extensions":["jinja","jinja2","j2"],"is_text":true},"jng":{"mime_type":"image/jng","group":"image","description":"JPEG network graphics","extensions":["jng"],"is_text":false},"jnlp":{"mime_type":"application/x-java-jnlp-file","group":"code","description":"Java Network Launch Protocol","extensions":["jnlp"],"is_text":true},"jp2":{"mime_type":"image/jpeg2000","group":"image","description":"jpeg2000","extensions":["jp2"],"is_text":false},"jpeg":{"mime_type":"image/jpeg","group":"image","description":"JPEG image data","extensions":["jpg","jpeg"],"is_text":false},"json":{"mime_type":"application/json","group":"code","description":"JSON document","extensions":["json"],"is_text":true},"jsonc":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"jsonl":{"mime_type":"application/json","group":"code","description":"JSONL document","extensions":["jsonl","jsonld"],"is_text":true},"jsx":{"mime_type":"application/javascript","group":"code","description":"JSX source","extensions":["jsx","mjsx","cjsx"],"is_text":true},"julia":{"mime_type":"text/x-julia","group":"code","description":"Julia source","extensions":["jl"],"is_text":true},"jxl":{"mime_type":"image/jxl","group":"image","description":"JPEG XL","extensions":["jxl"],"is_text":false},"ko":{"mime_type":"application/x-executable-elf","group":"executable","description":"ELF executable, kernel object","extensions":["ko"],"is_text":false},"kotlin":{"mime_type":"text/plain","group":"code","description":"Kotlin source","extensions":["kt","kts"],"is_text":true},"ks":{"mime_type":null,"group":null,"description":"Tyrano","extensions":["ks"],"is_text":true},"latex":{"mime_type":"text/x-tex","group":"text","description":"LaTeX document","extensions":["tex","sty"],"is_text":true},"latexaux":{"mime_type":null,"group":null,"description":null,"extensions":["aux"],"is_text":false},"less":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"lha":{"mime_type":"application/x-lha","group":"archive","description":"LHarc archive","extensions":["lha","lzh"],"is_text":false},"license":{"mime_type":"text/plain","group":"text","description":"License file","extensions":[],"is_text":true},"lisp":{"mime_type":"text/x-lisp","group":"code","description":"Lisp source","extensions":["lisp","lsp","l","cl"],"is_text":true},"litcs":{"mime_type":null,"group":null,"description":"Literate CS","extensions":["litcoffee"],"is_text":false},"lnk":{"mime_type":"application/x-ms-shortcut","group":"application","description":"MS Windows shortcut","extensions":["lnk"],"is_text":false},"lock":{"mime_type":"text/plain","group":"application","description":"Lock file","extensions":["lock"],"is_text":true},"lrz":{"mime_type":"application/x-lrzip","group":null,"description":"LRZip","extensions":["lrz"],"is_text":false},"lua":{"mime_type":"text/plain","group":"code","description":"Lua","extensions":["lua"],"is_text":true},"lz":{"mime_type":"application/x-lzip","group":"archive","description":"LZip","extensions":["lz"],"is_text":false},"lz4":{"mime_type":"application/x-lz4","group":"archive","description":"LZ4","extensions":["lz4"],"is_text":false},"lzx":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"m3u":{"mime_type":"text/plain","group":"application","description":"M3U playlist","extensions":["m3u8","m3u"],"is_text":true},"m4":{"mime_type":"text/plain","group":"code","description":"GNU Macro","extensions":["m4"],"is_text":true},"macho":{"mime_type":"application/x-mach-o","group":"executable","description":"Mach-O executable","extensions":[],"is_text":false},"maff":{"mime_type":"application/x-maff","group":null,"description":null,"extensions":["maff"],"is_text":false},"makefile":{"mime_type":"text/x-makefile","group":"code","description":"Makefile source","extensions":[],"is_text":true},"markdown":{"mime_type":"text/markdown","group":"text","description":"Markdown document","extensions":["md","markdown"],"is_text":true},"matlab":{"mime_type":"text/x-matlab","group":"code","description":"Matlab Source","extensions":["m","matlab"],"is_text":true},"mht":{"mime_type":"application/x-mimearchive","group":"code","description":"MHTML document","extensions":["mht"],"is_text":true},"midi":{"mime_type":"audio/midi","group":"audio","description":"Midi","extensions":["mid"],"is_text":false},"mkv":{"mime_type":"video/x-matroska","group":"video","description":"Matroska","extensions":["mkv"],"is_text":false},"mp2":{"mime_type":null,"group":null,"description":"MP2 stream","extensions":["mp2"],"is_text":false},"mp3":{"mime_type":"audio/mpeg","group":"audio","description":"MP3 media file","extensions":["mp3"],"is_text":false},"mp4":{"mime_type":"video/mp4","group":"video","description":"MP4 media file","extensions":["mp4"],"is_text":false},"mpegts":{"mime_type":"video/MP2T","group":"video","description":"MPEG Transport stream","extensions":["ts","tsv","tsa","m2t"],"is_text":false},"mscompress":{"mime_type":"application/x-ms-compress-szdd","group":"archive","description":"MS Compress archive data","extensions":[],"is_text":false},"msi":{"mime_type":"application/x-msi","group":"archive","description":"Microsoft Installer file","extensions":["msi"],"is_text":false},"msix":{"mime_type":"application/msix","group":"application","description":"Windows app package","extensions":["msix"],"is_text":false},"mst":{"mime_type":null,"group":null,"description":null,"extensions":["mst"],"is_text":false},"mui":{"mime_type":"application/x-dosexec","group":"application","description":"PE Windows executable","extensions":["mui"],"is_text":false},"mum":{"mime_type":"text/xml","group":"application","description":"Windows Update Package file","extensions":["mum"],"is_text":true},"mun":{"mime_type":null,"group":null,"description":null,"extensions":["mun"],"is_text":false},"nim":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"npy":{"mime_type":"application/octet-stream","group":"archive","description":"Numpy Array","extensions":["npy"],"is_text":false},"npz":{"mime_type":"application/octet-stream","group":"archive","description":"Numpy Arrays Archive","extensions":["npz"],"is_text":false},"null":{"mime_type":null,"group":null,"description":null,"extensions":["null"],"is_text":false},"nupkg":{"mime_type":"application/octet-stream","group":null,"description":"NuGet Package","extensions":["nupkg"],"is_text":false},"object":{"mime_type":null,"group":null,"description":null,"extensions":["o"],"is_text":false},"objectivec":{"mime_type":"text/x-objcsrc","group":"code","description":"ObjectiveC source","extensions":["m","mm"],"is_text":true},"ocaml":{"mime_type":"text-ocaml","group":"code","description":"OCaml","extensions":["ml","mli"],"is_text":true},"ocx":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["ocx"],"is_text":false},"odex":{"mime_type":"application/x-executable-elf","group":"executable","description":"ODEX ELF executable","extensions":["odex"],"is_text":false},"odin":{"mime_type":null,"group":"code","description":"Odin","extensions":["odin"],"is_text":true},"odp":{"mime_type":"application/vnd.oasis.opendocument.presentation","group":"document","description":"OpenDocument Presentation","extensions":["odp"],"is_text":false},"ods":{"mime_type":"application/vnd.oasis.opendocument.spreadsheet","group":"document","description":"OpenDocument Spreadsheet","extensions":["ods"],"is_text":false},"odt":{"mime_type":"application/vnd.oasis.opendocument.text","group":"document","description":"OpenDocument Text","extensions":["odt"],"is_text":false},"ogg":{"mime_type":"audio/ogg","group":"audio","description":"Ogg data","extensions":["ogg"],"is_text":false},"ole":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"one":{"mime_type":"application/msonenote","group":"document","description":"One Note","extensions":["one"],"is_text":false},"onnx":{"mime_type":"application/octet-stream","group":"archive","description":"Open Neural Network Exchange","extensions":["onnx"],"is_text":false},"ooxml":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"otf":{"mime_type":"font/otf","group":"font","description":"OpenType font","extensions":["otf"],"is_text":false},"outlook":{"mime_type":"application/vnd.ms-outlook","group":"application","description":"MS Outlook Message","extensions":[],"is_text":false},"palmos":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"parquet":{"mime_type":"application/vnd.apache.parquet","group":"unknown","description":"Apache Parquet","extensions":["pqt","parquet"],"is_text":false},"pascal":{"mime_type":"text/x-pascal","group":"code","description":"Pascal source","extensions":["pas","pp"],"is_text":true},"pbm":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"pcap":{"mime_type":"application/vnd.tcpdump.pcap","group":"application","description":"pcap capture file","extensions":["pcap","pcapng"],"is_text":false},"pdb":{"mime_type":"application/octet-stream","group":"application","description":"Windows Program Database","extensions":["pdb"],"is_text":false},"pdf":{"mime_type":"application/pdf","group":"document","description":"PDF document","extensions":["pdf"],"is_text":false},"pebin":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["exe","dll"],"is_text":false},"pem":{"mime_type":"application/x-pem-file","group":"application","description":"PEM certificate","extensions":["pem","pub","gpg"],"is_text":true},"perl":{"mime_type":"text/x-perl","group":"code","description":"Perl source","extensions":["pl"],"is_text":true},"pgp":{"mime_type":"application/pgp-keys","group":null,"description":"PGP","extensions":["gpg","pgp"],"is_text":false},"php":{"mime_type":"text/x-php","group":"code","description":"PHP source","extensions":["php"],"is_text":true},"pickle":{"mime_type":"application/octet-stream","group":"application","description":"Python pickle","extensions":["pickle","pkl"],"is_text":false},"png":{"mime_type":"image/png","group":"image","description":"PNG image","extensions":["png"],"is_text":false},"po":{"mime_type":"text/gettext-translation","group":"application","description":"Portable Object (PO) for i18n","extensions":["po"],"is_text":true},"postscript":{"mime_type":"application/postscript","group":"document","description":"PostScript document","extensions":["ps"],"is_text":false},"powershell":{"mime_type":"application/x-powershell","group":"code","description":"Powershell source","extensions":["ps1"],"is_text":true},"ppt":{"mime_type":"application/vnd.ms-powerpoint","group":"document","description":"Microsoft PowerPoint CDF document","extensions":["ppt"],"is_text":false},"pptx":{"mime_type":"application/vnd.openxmlformats-officedocument.presentationml.presentation","group":"document","description":"Microsoft PowerPoint 2007+ document","extensions":["pptx","pptm"],"is_text":false},"printfox":{"mime_type":null,"group":null,"description":"c64","extensions":[],"is_text":false},"prolog":{"mime_type":"text/x-prolog","group":"code","description":"Prolog source","extensions":["pl","pro","P"],"is_text":true},"proteindb":{"mime_type":"application/octet-stream","group":"application","description":"Protein DB","extensions":["pdb"],"is_text":true},"proto":{"mime_type":"text/x-proto","group":"code","description":"Protocol buffer definition","extensions":["proto"],"is_text":true},"protobuf":{"mime_type":"application/protobuf","group":"unknown","description":"Protocol buffers","extensions":["protobuf","pb"],"is_text":false},"psd":{"mime_type":"image/vnd.adobe.photoshop","group":"image","description":"Adobe Photoshop","extensions":["psd"],"is_text":false},"pytorch":{"mime_type":"application/octet-stream","group":"application","description":"Pytorch storage file","extensions":["pt","pth"],"is_text":false},"pub":{"mime_type":"application/x-mspublisher","group":null,"description":null,"extensions":["pub"],"is_text":false},"python":{"mime_type":"text/x-python","group":"code","description":"Python source","extensions":["py","pyi"],"is_text":true},"pythonbytecode":{"mime_type":"application/x-bytecode.python","group":"executable","description":"Python compiled bytecode","extensions":["pyc","pyo"],"is_text":false},"pythonpar":{"mime_type":null,"group":null,"description":null,"extensions":["par"],"is_text":false},"qoi":{"mime_type":"image/x-qoi","group":"image","description":"Quite Ok Image","extensions":["qoi"],"is_text":false},"qt":{"mime_type":"video/quicktime","group":"video","description":"QuickTime","extensions":["mov"],"is_text":false},"r":{"mime_type":"text/x-R","group":"code","description":"R (language)","extensions":["R"],"is_text":true},"randomascii":{"mime_type":"text/plain","group":"text","description":"Random ASCII characters","extensions":[],"is_text":true},"randombytes":{"mime_type":"application/octet-stream","group":"unknown","description":"Random bytes","extensions":[],"is_text":false},"randomtxt":{"mime_type":"text/plain","group":"text","description":"Random text","extensions":[],"is_text":true},"rar":{"mime_type":"application/x-rar","group":"archive","description":"RAR archive data","extensions":["rar"],"is_text":false},"rdf":{"mime_type":"application/rdf+xml","group":"text","description":"Resource Description Framework document (RDF)","extensions":["rdf"],"is_text":true},"rdp":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"riff":{"mime_type":"application/x-riff","group":null,"description":null,"extensions":[],"is_text":false},"rlib":{"mime_type":"application/x-archive","group":"archive","description":"rust library","extensions":["rlib"],"is_text":false},"rll":{"mime_type":null,"group":"executable","description":"Resource Library","extensions":["rll"],"is_text":false},"rpm":{"mime_type":"application/x-rpm","group":"archive","description":"RedHat Package Manager archive (RPM)","extensions":["rpm"],"is_text":false},"rst":{"mime_type":"text/x-rst","group":"text","description":"ReStructuredText document","extensions":["rst"],"is_text":true},"rtf":{"mime_type":"text/rtf","group":"text","description":"Rich Text Format document","extensions":["rtf"],"is_text":true},"ruby":{"mime_type":"application/x-ruby","group":"code","description":"Ruby source","extensions":["rb"],"is_text":true},"rust":{"mime_type":"application/x-rust","group":"code","description":"Rust source","extensions":["rs"],"is_text":true},"rzip":{"mime_type":null,"group":null,"description":"Rzip","extensions":["rz"],"is_text":false},"scala":{"mime_type":"application/x-scala","group":"code","description":"Scala source","extensions":["scala"],"is_text":true},"scheme":{"mime_type":"text/x-scheme","group":"code","description":null,"extensions":["scm","ss"],"is_text":false},"scr":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["scr"],"is_text":false},"scriptwsf":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"scss":{"mime_type":"text/x-scss","group":"code","description":"SCSS source","extensions":["scss"],"is_text":true},"sevenzip":{"mime_type":"application/x-7z-compressed","group":"archive","description":"7-zip archive data","extensions":["7z"],"is_text":false},"sgml":{"mime_type":"application/sgml","group":"text","description":"sgml","extensions":["sgml"],"is_text":true},"sh3d":{"mime_type":null,"group":null,"description":null,"extensions":["sh3d"],"is_text":false},"shell":{"mime_type":"text/x-shellscript","group":"code","description":"Shell script","extensions":["sh"],"is_text":true},"smali":{"mime_type":"application/x-smali","group":"code","description":"Smali source","extensions":["smali"],"is_text":true},"snap":{"mime_type":"application/octet-stream","group":"archive","description":"Snap archive","extensions":["snap"],"is_text":false},"so":{"mime_type":"application/x-executable-elf","group":"executable","description":"ELF executable, shared library","extensions":["so"],"is_text":false},"solidity":{"mime_type":null,"group":"code","description":"Solidity source","extensions":["sol"],"is_text":true},"sql":{"mime_type":"application/x-sql","group":"code","description":"SQL source","extensions":["sql"],"is_text":true},"sqlite":{"mime_type":null,"group":"application","description":"SQLITE database","extensions":["sqlite","sqlite3"],"is_text":false},"squashfs":{"mime_type":"application/octet-stream","group":"archive","description":"Squash filesystem","extensions":[],"is_text":false},"srt":{"mime_type":"text/srt","group":"application","description":"SubRip Text Format","extensions":["srt"],"is_text":true},"stlbinary":{"mime_type":"application/sla","group":"image","description":"Stereolithography CAD (binary)","extensions":["stl"],"is_text":false},"stltext":{"mime_type":"application/sla","group":"image","description":"Stereolithography CAD (text)","extensions":["stl"],"is_text":true},"sum":{"mime_type":null,"group":"application","description":"Checksum file","extensions":["sum"],"is_text":true},"svd":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"svg":{"mime_type":"image/svg+xml","group":"image","description":"SVG Scalable Vector Graphics image data","extensions":["svg"],"is_text":true},"swf":{"mime_type":"application/x-shockwave-flash","group":"executable","description":"Small Web File","extensions":["swf"],"is_text":false},"swift":{"mime_type":"text/x-swift","group":"code","description":"Swift","extensions":["swift"],"is_text":true},"symlink":{"mime_type":"inode/symlink","group":"inode","description":"Symbolic link","extensions":[],"is_text":false},"symlinktext":{"mime_type":"text/plain","group":"application","description":"Symbolic link (textual representation)","extensions":[],"is_text":true},"sys":{"mime_type":"application/x-windows-driver","group":"executable","description":"PE Windows executable","extensions":["sys"],"is_text":false},"tar":{"mime_type":"application/x-tar","group":"archive","description":"POSIX tar archive","extensions":["tar"],"is_text":false},"tcl":{"mime_type":"application/x-tcl","group":"code","description":"Tickle","extensions":["tcl"],"is_text":true},"textproto":{"mime_type":"text/plain","group":"code","description":"Text protocol buffer","extensions":["textproto","textpb","pbtxt"],"is_text":true},"tga":{"mime_type":"image/x-tga","group":"image","description":"Targa image data","extensions":["tga"],"is_text":false},"thumbsdb":{"mime_type":"image/vnd.ms-thumb","group":"application","description":"Windows thumbnail cache","extensions":[],"is_text":false},"tiff":{"mime_type":"image/tiff","group":"image","description":"TIFF image data","extensions":["tiff","tif"],"is_text":false},"tmdx":{"mime_type":null,"group":null,"description":null,"extensions":["tmdx","tmvx"],"is_text":false},"toml":{"mime_type":"application/toml","group":"text","description":"Tom's obvious, minimal language","extensions":["toml"],"is_text":true},"torrent":{"mime_type":"application/x-bittorrent","group":"application","description":"BitTorrent file","extensions":["torrent"],"is_text":false},"troff":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"tsv":{"mime_type":"text/tsv","group":"code","description":"TSV document","extensions":["tsv"],"is_text":true},"tsx":{"mime_type":"text/x-typescript","group":"code","description":"TSX source","extensions":["tsx","mtsx","ctsx"],"is_text":true},"ttf":{"mime_type":"font/sfnt","group":"font","description":"TrueType Font data","extensions":["ttf","ttc"],"is_text":false},"twig":{"mime_type":"text/x-twig","group":"code","description":"Twig template","extensions":["twig"],"is_text":true},"txt":{"mime_type":"text/plain","group":"text","description":"Generic text document","extensions":["txt"],"is_text":true},"txtascii":{"mime_type":"text/plain","group":"text","description":"Generic text document encoded in ASCII","extensions":["txt"],"is_text":true},"txtutf16":{"mime_type":"text/plain","group":"text","description":"Generic text document encoded in UTF-16","extensions":["txt"],"is_text":true},"txtutf8":{"mime_type":"text/plain","group":"text","description":"Generic text document encoded in UTF-8","extensions":["txt"],"is_text":true},"typescript":{"mime_type":"application/typescript","group":"code","description":"TypeScript source","extensions":["ts","mts","cts"],"is_text":true},"udf":{"mime_type":"application/x-udf-image","group":null,"description":"Universal Disc Format","extensions":[],"is_text":false},"undefined":{"mime_type":"application/undefined","group":"undefined","description":"Undefined","extensions":[],"is_text":false},"unixcompress":{"mime_type":"application/x-compress","group":null,"description":null,"extensions":["z"],"is_text":false},"unknown":{"mime_type":"application/octet-stream","group":"unknown","description":"Unknown binary data","extensions":[],"is_text":false},"vba":{"mime_type":"text/vbscript","group":"code","description":"MS Visual Basic source (VBA)","extensions":["vbs","vba","vb"],"is_text":true},"vbe":{"mime_type":null,"group":"code","description":"EncryptedVBS","extensions":["vbe"],"is_text":false},"vcard":{"mime_type":"text/vcard","group":null,"description":null,"extensions":["vcard"],"is_text":false},"vcs":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"vcxproj":{"mime_type":"application/xml","group":"code","description":"Visual Studio MSBuild project","extensions":["vcxproj"],"is_text":true},"verilog":{"mime_type":"text/x-verilog","group":"code","description":"Verilog source","extensions":["v","verilog","vlg","vh"],"is_text":true},"vhd":{"mime_type":"application/x-vhd","group":null,"description":"Virtual Hard Disk","extensions":[],"is_text":false},"vhdl":{"mime_type":"text/x-vhdl","group":"code","description":"VHDL source","extensions":["vhd"],"is_text":true},"visio":{"mime_type":"application/vnd.ms-visio.drawing.main+xml","group":"document","description":"Microsoft Visio","extensions":["vsd","vsdm","vsdx","vdw"],"is_text":false},"vtt":{"mime_type":"text/vtt","group":"text","description":"Web Video Text Tracks","extensions":["vtt","webvtt"],"is_text":true},"vue":{"mime_type":"application/javascript","group":"code","description":"Vue source","extensions":["vue"],"is_text":true},"wad":{"mime_type":"application/wad","group":"archive","description":"WAD","extensions":["wad"],"is_text":false},"wasm":{"mime_type":"application/wasm","group":"executable","description":"Web Assembly","extensions":["wasm"],"is_text":false},"wav":{"mime_type":"audio/x-wav","group":"audio","description":"Waveform Audio file (WAV)","extensions":["wav"],"is_text":false},"webm":{"mime_type":"video/webm","group":"video","description":"WebM media file","extensions":["webm"],"is_text":false},"webp":{"mime_type":"image/webp","group":"image","description":"WebP media file","extensions":["webp"],"is_text":false},"webtemplate":{"mime_type":"text/plain","group":"code","description":"Web templating language","extensions":[],"is_text":true},"wim":{"mime_type":"application/x-ms-wim","group":"unknown","description":"Windows Imaging Format","extensions":["wim","swm","esd"],"is_text":false},"winregistry":{"mime_type":"text/x-ms-regedit","group":"application","description":"Windows Registry text","extensions":["reg"],"is_text":true},"wma":{"mime_type":"audio/x-ms-wma","group":"audio","description":"Windows Media Audio","extensions":["wma"],"is_text":false},"wmf":{"mime_type":"image/wmf","group":"image","description":"Windows metafile","extensions":["wmf"],"is_text":false},"wmv":{"mime_type":"video/x-ms-wmv","group":"video","description":"Windows Media Video","extensions":["wmv"],"is_text":false},"woff":{"mime_type":"font/woff","group":"font","description":"Web Open Font Format","extensions":["woff"],"is_text":false},"woff2":{"mime_type":"font/woff2","group":"font","description":"Web Open Font Format v2","extensions":["woff2"],"is_text":false},"xar":{"mime_type":"application/x-xar","group":"archive","description":"XAR archive compressed data","extensions":["pkg","xar"],"is_text":false},"xcf":{"mime_type":"image/x-xcf","group":"image","description":"Gimp image","extensions":["xcf"],"is_text":false},"xls":{"mime_type":"application/vnd.ms-excel","group":"document","description":"Microsoft Excel CDF document","extensions":["xls"],"is_text":false},"xlsb":{"mime_type":"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet","group":"document","description":"Microsoft Excel 2007+ document (binary format)","extensions":["xlsb"],"is_text":false},"xlsx":{"mime_type":"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet","group":"document","description":"Microsoft Excel 2007+ document","extensions":["xlsx","xlsm"],"is_text":false},"xml":{"mime_type":"text/xml","group":"code","description":"XML document","extensions":["xml"],"is_text":true},"xpi":{"mime_type":"application/zip","group":"archive","description":"Compressed installation archive (XPI)","extensions":["xpi"],"is_text":false},"xsd":{"mime_type":null,"group":null,"description":null,"extensions":["xsd"],"is_text":false},"xz":{"mime_type":"application/x-xz","group":"archive","description":"XZ compressed data","extensions":["xz"],"is_text":false},"yaml":{"mime_type":"application/x-yaml","group":"code","description":"YAML source","extensions":["yml","yaml"],"is_text":true},"yara":{"mime_type":"text/x-yara","group":"code","description":"YARA rule","extensions":["yar","yara"],"is_text":true},"zig":{"mime_type":"text/zig","group":"code","description":"Zig source","extensions":["zig"],"is_text":true},"zip":{"mime_type":"application/zip","group":"archive","description":"Zip archive data","extensions":["zip"],"is_text":false},"zlibstream":{"mime_type":"application/zlib","group":"application","description":"zlib compressed data","extensions":[],"is_text":false},"zst":{"mime_type":"application/zstd","group":"archive","description":"Zstandard","extensions":["zst"],"is_text":false}}

================================================
FILE: python/src/magika/logger.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D100, D101, D102, D103, D107

from __future__ import annotations

import logging
import sys
from typing import Optional, TextIO

from magika import colors

_logger: Optional[SimpleLogger] = None


class SimpleLogger:
    """A simple, self-contained logger that writes output to stderr.

    This implementation avoids reliance on external dependencies (e.g., `rich`)
    by sending log messages (debug, info, warning, etc.) directly to standard
    error (`sys.stderr`) by default.
    """

    def __init__(self, use_colors: bool = False):
        self.level = logging.WARNING
        self.use_colors = use_colors

    def setLevel(self, level: int) -> None:
        self.level = level

    def raw_print_to_stdout(self, msg: str) -> None:
        self.raw_print(msg, file=sys.stdout)

    def raw_print(
        self, msg: str, file: Optional[TextIO] = None, flush: bool = True
    ) -> None:
        if file is None:
            # We avoid using a default value for the `file` argument because we
            # need to get the reference to the "current" stderr; if we used a
            # default argument, we would just store the "current at
            # instantiation time" stderr, which may not be the current one.
            # This, in turn, could create problems for testing.
            file = sys.stderr
        print(msg, file=file, flush=flush)

    def debug(self, msg: str) -> None:
        if logging.DEBUG >= self.level:
            if self.use_colors:
                self.raw_print(f"{colors.GREEN}DEBUG: {msg}{colors.RESET}")
            else:
                self.raw_print(f"DEBUG: {msg}")

    def info(self, msg: str) -> None:
        if logging.INFO >= self.level:
            self.raw_print(f"INFO: {msg}")

    def warning(self, msg: str) -> None:
        if logging.WARNING >= self.level:
            if self.use_colors:
                self.raw_print(f"{colors.YELLOW}WARNING: {msg}{colors.RESET}")
            else:
                self.raw_print(f"WARNING: {msg}")

    def error(self, msg: str) -> None:
        if logging.ERROR >= self.level:
            if self.use_colors:
                self.raw_print(f"{colors.RED}ERROR: {msg}{colors.RESET}")
            else:
                self.raw_print(f"ERROR: {msg}")


def get_logger(use_colors: bool = False) -> SimpleLogger:
    global _logger

    if _logger is None:
        _logger = SimpleLogger(use_colors=use_colors)

    return _logger


================================================
FILE: python/src/magika/magika.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Magika (the Python library).

This module provides the `Magika` class, the main entry point for using Magika
to identify file content types.
"""

import io
import json
import logging
import os
import time
from pathlib import Path
from typing import BinaryIO, Dict, List, Optional, Sequence, Set, Tuple, Union

import onnxruntime as rt

from magika.logger import get_logger
from magika.types import (
    ContentTypeInfo,
    ContentTypeLabel,
    MagikaError,
    MagikaPrediction,
    MagikaResult,
    ModelConfig,
    ModelFeatures,
    ModelOutput,
    OverwriteReason,
    PredictionMode,
    Seekable,
    Status,
)

_DEFAULT_MODEL_NAME = "standard_v3_3"


class Magika:
    """Main Magika class for content type identification.

    This class provides methods to identify the content type of files, bytes,
    and streams.
    """

    def __init__(
        self,
        model_dir: Optional[Path] = None,
        prediction_mode: PredictionMode = PredictionMode.HIGH_CONFIDENCE,
        no_dereference: bool = False,
        verbose: bool = False,
        debug: bool = False,
        use_colors: bool = False,
    ) -> None:
        """Initializes the Magika instance.

        Args:
            model_dir: Path to the directory containing the model and its
                configuration. If None, the default model is used.
            prediction_mode: The prediction mode to use.  Defaults to
                PredictionMode.HIGH_CONFIDENCE.
            no_dereference: If True, do not follow symlinks.  Defaults to False.
            verbose: If True, enable verbose logging. Defaults to False.
            debug: If True, enable debug logging. Defaults to False.
            use_colors: If True, use colors in the logger.  Defaults to False.
        """
        self._log = get_logger(use_colors=use_colors)

        if verbose:
            self._log.setLevel(logging.INFO)

        if debug:
            self._log.setLevel(logging.DEBUG)

        if model_dir is not None:
            self._model_dir = model_dir
        else:
            # use default model
            self._model_dir = (
                Path(__file__).parent / "models" / self._get_default_model_name()
            )

        self._model_path = self._model_dir / "model.onnx"
        self._model_config_path = self._model_dir / "config.min.json"

        if not self._model_dir.is_dir():
            raise MagikaError(f"model dir not found at {str(self._model_dir)}")
        if not self._model_path.is_file():
            raise MagikaError(f"model not found at {str(self._model_path)}")
        if not self._model_config_path.is_file():
            raise MagikaError(
                f"model config not found at {str(self._model_config_path)}"
            )

        self._model_config: ModelConfig = Magika._load_model_config(
            self._model_config_path
        )

        self._target_labels_space = list(
            map(str, self._model_config.target_labels_space)
        )

        self._prediction_mode = prediction_mode

        self._no_dereference = no_dereference

        content_types_kb_path = (
            Path(__file__).parent / "config" / "content_types_kb.min.json"
        )
        self._cts_infos = Magika._load_content_types_kb(content_types_kb_path)

        self._onnx_session = self._init_onnx_session()

    def __repr__(self) -> str:
        return str(self)

    def __str__(self) -> str:
        return f'Magika(module_version="{self.get_module_version()}", model_name="{self.get_model_name()}")'

    def get_module_version(self) -> str:
        """Gets the version of the Magika Python module."""
        return str(__import__(self.__module__).__version__)

    def get_model_name(self) -> str:
        """Gets the name of the loaded model."""
        return self._model_dir.name

    def identify_path(self, path: Union[str, os.PathLike]) -> MagikaResult:
        """Identify the content type of a file given its path."""
        if isinstance(path, str) or isinstance(path, os.PathLike):
            path = Path(path)
        else:
            raise TypeError(
                f"Path '{path}' is invalid: input path should be of type `Union[str, os.PathLike]`"
            )

        return self._get_result_from_path(path)

    def identify_paths(
        self, paths: Sequence[Union[str, os.PathLike]]
    ) -> List[MagikaResult]:
        """Identify the content type of a list of files given their paths."""
        if not isinstance(paths, Sequence):
            raise TypeError("Input paths should be of type Sequence[Path]")

        paths_ = []
        for path in paths:
            if isinstance(path, str) or isinstance(path, os.PathLike):
                paths_.append(Path(path))
            else:
                raise TypeError(
                    f"Input '{path}' is invalid: input path should be of type `Union[str, os.PathLike]`"
                )

        return self._get_results_from_paths(paths_)

    def identify_bytes(self, content: bytes) -> MagikaResult:
        """Identify the content type of raw bytes."""
        if not isinstance(content, bytes):
            raise TypeError(
                f"Input content should be of type 'bytes', not {type(content)}."
            )

        return self._get_result_from_seekable(Seekable(io.BytesIO(content)))

    def identify_stream(self, stream: BinaryIO) -> MagikaResult:
        """Identify the content type of a BinaryIO stream.

        Identifies the content type from an already-open binary file-like object
        (e.g., the output of `open(file_path, 'rb')`). Note: 1) Magika will
        `seek()` around the stream; 2) the stream _is not closed_ (closing it is
        the responsibility of the caller).
        """
        if not isinstance(stream, io.IOBase) or not stream.readable():  # type: ignore[unreachable]
            raise TypeError("Input stream must be a readable BinaryIO object.")

        # Explicitly test for the most common error so that we can return an
        # helpful error message.
        if isinstance(stream, io.TextIOBase):  # type: ignore[unreachable]
            raise TypeError(
                "Input stream must be opened in bytes mode, not in text mode."
            )

        if not isinstance(stream, io.BufferedIOBase):
            raise TypeError("Input stream must be a readable BinaryIO object.")

        if (
            not hasattr(stream, "seek")
            or not hasattr(stream, "read")
            or not hasattr(stream, "tell")
        ):
            raise TypeError("Input stream must have seek, read, and tell methods.")

        try:
            current_position = stream.tell()
            result = self._get_result_from_seekable(Seekable(stream))
        finally:
            # seek to the previous position even in case of exceptions
            stream.seek(current_position)
        return result

    def get_output_content_types(self) -> List[ContentTypeLabel]:
        """This method returns the list of all possible output content types.

        I.e., all possible values for `MagikaResult.prediction.output.label`.
        This considers the list of possible outputs from the model itself, but
        also keeps into account additional configuration such as `override_map`
        and special content types such as `empty` or `symlink`.

        Consult the documentation for more details.
        """
        target_labels_space = self._model_config.target_labels_space
        overwrite_map = self._model_config.overwrite_map

        output_content_types: Set[ContentTypeLabel] = {
            ContentTypeLabel.DIRECTORY,
            ContentTypeLabel.EMPTY,
            ContentTypeLabel.SYMLINK,
            ContentTypeLabel.TXT,
            ContentTypeLabel.UNKNOWN,
        }
        for ct in target_labels_space:
            # Check if we would overwrite this target label; if not, use the
            # target label itself.
            output_ct = overwrite_map.get(ct, ct)
            output_content_types.add(output_ct)

        return sorted(output_content_types)

    def get_model_content_types(self) -> List[ContentTypeLabel]:
        """This method returns the list of all possible output of the model.

        I.e., all possible values for `MagikaResult.prediction.dl.label`. Note
        that, in general, the list of "model outputs" is different than the
        "tool outputs" as in some cases the model is not even used, or the
        model's output is overwritten due to a low-confidence score, or other
        reasons. This API is useful mostly for debugging purposes; the vast
        majority of client should use `get_output_content_types()`.

        Consult the documentation for more details.
        """
        model_content_types: Set[ContentTypeLabel] = {
            ContentTypeLabel.UNDEFINED,
        }
        model_content_types.update(self._model_config.target_labels_space)
        return sorted(model_content_types)

    @staticmethod
    def _get_default_model_name() -> str:
        """Returns the default model name.

        This method is static so that it can be used by external clients/tests
        without the need to instantiate a Magika object.
        """
        return _DEFAULT_MODEL_NAME

    @staticmethod
    def _load_content_types_kb(
        content_types_kb_json_path: Path,
    ) -> Dict[ContentTypeLabel, ContentTypeInfo]:
        TXT_MIME_TYPE = "text/plain"
        UNKNOWN_MIME_TYPE = "application/octet-stream"
        UNKNOWN_GROUP = "unknown"

        out = {}
        for ct_name, ct_info in json.loads(
            content_types_kb_json_path.read_text()
        ).items():
            is_text = ct_info["is_text"]
            if is_text:
                default_mime_type = TXT_MIME_TYPE
            else:
                default_mime_type = UNKNOWN_MIME_TYPE
            mime_type = (
                default_mime_type
                if ct_info["mime_type"] is None
                else ct_info["mime_type"]
            )
            group = UNKNOWN_GROUP if ct_info["group"] is None else ct_info["group"]
            description = (
                ct_name if ct_info["description"] is None else ct_info["description"]
            )
            extensions = ct_info["extensions"]
            out[ContentTypeLabel(ct_name)] = ContentTypeInfo(
                label=ContentTypeLabel(ct_name),
                mime_type=mime_type,
                group=group,
                description=description,
                extensions=extensions,
                is_text=is_text,
            )
        return out

    @staticmethod
    def _load_model_config(model_config_path: Path) -> ModelConfig:
        config = json.loads(model_config_path.read_text())

        return ModelConfig(
            beg_size=config["beg_size"],
            mid_size=config["mid_size"],
            end_size=config["end_size"],
            use_inputs_at_offsets=config["use_inputs_at_offsets"],
            medium_confidence_threshold=config["medium_confidence_threshold"],
            min_file_size_for_dl=config["min_file_size_for_dl"],
            padding_token=config["padding_token"],
            block_size=config["block_size"],
            target_labels_space=[
                ContentTypeLabel(ct_str) for ct_str in config["target_labels_space"]
            ],
            thresholds={
                ContentTypeLabel(k): v for k, v in config["thresholds"].items()
            },
            overwrite_map={
                ContentTypeLabel(k): ContentTypeLabel(v)
                for k, v in config["overwrite_map"].items()
            },
        )

    def _init_onnx_session(self) -> rt.InferenceSession:
        start_time = time.time()
        rt.disable_telemetry_events()

        onnx_session = rt.InferenceSession(
            self._model_path,
            providers=["CPUExecutionProvider"],
        )
        elapsed_time = 1000 * (time.time() - start_time)
        self._log.debug(
            f'ONNX DL model "{self._model_path}" loaded in {elapsed_time:.03f} ms'
        )
        return onnx_session

    def _get_ct_info(self, content_type: ContentTypeLabel) -> ContentTypeInfo:
        return self._cts_infos[content_type]

    def _get_results_from_paths(self, paths: List[Path]) -> List[MagikaResult]:
        """Get results for a list of paths.

        Given a list of paths, returns a list of MagikaResult objects, which
        contain relevant information, such as: file path, the output of the DL
        model, the confidence score, the output of the tool, and associated
        metadata. The order of the predictions matches the order of the input
        paths.
        """
        # We do a first pass on all files: we collect features for the files
        # that need to be analyzed with the DL model, and we already determine
        # the output for the remaining ones.

        # We use a "str" instead of Path because it makes it easier later on to
        # serialize.
        all_outputs: Dict[str, MagikaResult] = {}  # {path: , ...}

        # We use a list and not the dict because that's what we need later on
        # for inference.
        all_features: List[Tuple[Path, ModelFeatures]] = []

        self._log.debug(
            f"Processing input files and extracting features for {len(paths)} samples"
        )
        start_time = time.time()
        for path in paths:
            output, features = self._get_result_or_features_from_path(path)
            if output is not None:
                all_outputs[str(path)] = output
            else:
                assert features is not None
                all_features.append((path, features))
        elapsed_time = 1000 * (time.time() - start_time)
        self._log.debug(f"First pass and features extracted in {elapsed_time:.03f} ms")

        # Get the outputs via DL for the files that need it.
        for path_str, result in self._get_results_from_features(all_features).items():
            all_outputs[path_str] = result

        # Finally, we collect the predictions in a final list, sorted by the
        # initial paths list (and not by insertion order).
        sorted_outputs = []
        for path in paths:
            sorted_outputs.append(all_outputs[str(path)])
        return sorted_outputs

    def _get_result_from_path(self, path: Path) -> MagikaResult:
        return self._get_results_from_paths([path])[0]

    def _get_result_from_seekable(self, seekable: Seekable) -> MagikaResult:
        result, features = self._get_result_or_features_from_seekable(seekable)
        if result is not None:
            return result
        assert features is not None
        return self._get_results_from_features([(Path("-"), features)])["-"]

    @staticmethod
    def _extract_features_from_seekable(
        seekable: Seekable,
        beg_size: int,
        mid_size: int,
        end_size: int,
        padding_token: int,
        block_size: int,
        use_inputs_at_offsets: bool,
    ) -> ModelFeatures:
        """Extract features from an input seekable.

        This implements features extraction v2 from a seekable, which is an
        abstraction about anything that has a size and that can be "read_at" a
        specific offset, such as a file or a buffer. This is implemented so that
        we do not need to load the entire file in memory or scan the entire
        buffer.

        High-level overview on what we do:
        - We read (at most) `block_size` bytes from the beginning and from the
        end.
        - We normalize these bytes by stripping whitespaces.
        - We consider `beg_size` and `end_size` bytes as `beg` and `end`
        features. If we don't have enough bytes, we use `padding_token` as
        padding.

        See comments below for the specifics and handling of corner cases.

        NOTE: This implementation does not support extraction of `mid` features
        and `use_inputs_at_offsets`.
        """
        assert beg_size < block_size
        assert mid_size == 0
        assert end_size < block_size
        assert not use_inputs_at_offsets

        # we read at most block_size bytes
        bytes_num_to_read = min(block_size, seekable.size)

        if beg_size > 0:
            # Read at most `block_size` bytes from the beginning; `lstrip()``
            # them (or `strip()` them if the file size is less or equal than
            # `block_size`); take at most `beg_size` bytes, and optionally pad
            # them with `padding_token` to get to a list of `beg_size` integers.
            beg_content = seekable.read_at(0, bytes_num_to_read)
            beg_content = beg_content.lstrip()
            beg_ints = Magika._get_beg_ints_with_padding(
                beg_content, beg_size, padding_token
            )
        else:
            beg_ints = []

        if end_size > 0:
            # Read at most `block_size` bytes from the end; `rstrip()`` them (or
            # `strip()` them if the file size is less or equal than
            # `block_size`); take at most `end_size` bytes (from the end), and
            # optionally pad them (at the beginning) with `padding_token` to get
            # to a list of `end_size` integers.
            end_content = seekable.read_at(
                seekable.size - bytes_num_to_read, bytes_num_to_read
            )
            end_content = end_content.rstrip()
            end_ints = Magika._get_end_ints_with_padding(
                end_content, end_size, padding_token
            )
        else:
            end_ints = []

        return ModelFeatures(
            beg=beg_ints,
            mid=[],
            end=end_ints,
            offset_0x8000_0x8007=[],
            offset_0x8800_0x8807=[],
            offset_0x9000_0x9007=[],
            offset_0x9800_0x9807=[],
        )

    @staticmethod
    def _get_beg_ints_with_padding(
        beg_content: bytes, beg_size: int, padding_token: int
    ) -> List[int]:
        """Take an (already-stripped) buffer as input and extract beg ints.

        This returns a list of integers whose length is exactly beg_size. If
        the buffer is bigger than required, take only the initial portion. If
        the buffer is shorter, add padding at the end.
        """
        if beg_size < len(beg_content):
            # we don't need so many bytes
            beg_content = beg_content[0:beg_size]

        beg_ints = list(map(int, beg_content))

        if len(beg_ints) < beg_size:
            # we don't have enough ints, add padding
            beg_ints = beg_ints + ([padding_token] * (beg_size - len(beg_ints)))

        assert len(beg_ints) == beg_size

        return beg_ints

    @staticmethod
    def _get_end_ints_with_padding(
        end_content: bytes, end_size: int, padding_token: int
    ) -> List[int]:
        """Take an (already-stripped) buffer as input and extract end ints.

        This returns a list of integers whose length is exactly end_size. If the
        buffer is bigger than required, take only the last portion. If the
        buffer is shorter, add padding at the beginning.
        """
        if end_size < len(end_content):
            # we don't need so many bytes
            end_content = end_content[len(end_content) - end_size : len(end_content)]

        end_ints = list(map(int, end_content))

        if len(end_ints) < end_size:
            # we don't have enough ints, add padding
            end_ints = ([padding_token] * (end_size - len(end_ints))) + end_ints

        assert len(end_ints) == end_size

        return end_ints

    def _get_model_outputs_from_features(
        self, all_features: List[Tuple[Path, ModelFeatures]]
    ) -> List[Tuple[Path, ModelOutput]]:
        raw_preds = self._get_raw_predictions(all_features)

        outputs = []
        for (path, _), preds in zip(all_features, raw_preds):
            target_label_idx = max(range(len(preds)), key=preds.__getitem__)
            score = preds[target_label_idx]
            label = self._target_labels_space[target_label_idx]
            outputs.append(
                (path, ModelOutput(label=ContentTypeLabel(label), score=score))
            )
        return outputs

    def _get_results_from_features(
        self, all_features: List[Tuple[Path, ModelFeatures]]
    ) -> Dict[str, MagikaResult]:
        # We now do inference for those files that need it.

        if len(all_features) == 0:
            # nothing to be done
            return {}

        results: Dict[str, MagikaResult] = {}

        for path, model_output in self._get_model_outputs_from_features(all_features):
            # In additional to the content type label from the DL model, we
            # also allow for other logic to overwrite such result. For
            # debugging and information purposes, the JSON output stores
            # both the raw DL model output and the final output we return to
            # the user.

            output_label, overwrite_reason = (
                self._get_output_label_from_dl_label_and_score(
                    model_output.label, model_output.score
                )
            )

            results[str(path)] = self._get_result_from_labels_and_score(
                path=path,
                dl_label=model_output.label,
                output_label=output_label,
                score=model_output.score,
                overwrite_reason=overwrite_reason,
            )

        return results

    def _get_output_label_from_dl_label_and_score(
        self, dl_label: ContentTypeLabel, score: float
    ) -> Tuple[ContentTypeLabel, OverwriteReason]:
        overwrite_reason = OverwriteReason.NONE

        # Overwrite dl_label if specified in the overwrite_map model config.
        output_label = self._model_config.overwrite_map.get(dl_label, dl_label)
        if output_label != dl_label:
            overwrite_reason = OverwriteReason.OVERWRITE_MAP

        # The following code checks whether the score is "high enough", where
        # "high enough" depends on the selected prediction mode. If the score is
        # high enough, we return the (potentially overwritten) model prediction;
        # if it is not, we return a generic content type, such as TXT or
        # UNKNOWN.
        if self._prediction_mode == PredictionMode.BEST_GUESS:
            # We take the (potentially overwritten) model prediction, no matter
            # what the score is.
            pass
        elif (
            self._prediction_mode == PredictionMode.HIGH_CONFIDENCE
            and score
            >= self._model_config.thresholds.get(
                dl_label, self._model_config.medium_confidence_threshold
            )
        ):
            # The model score is higher than the per-content-type
            # high-confidence threshold, so we keep it (note that the model
            # prediction may have been overwritten).
            pass
        elif (
            self._prediction_mode == PredictionMode.MEDIUM_CONFIDENCE
            and score >= self._model_config.medium_confidence_threshold
        ):
            # The model score is higher than the generic medium-confidence
            # threshold, so we keep it (note that the model prediction may have
            # been overwritten).
            pass
        else:
            # We are not in a condition to trust the model, we opt to return
            # generic labels. Note that here we use an implicit assumption that
            # the model has, at the very least, got the binary vs. text category
            # right. This allows us to pick between unknown and txt without the
            # need to read or scan the file bytes once again.
            overwrite_reason = OverwriteReason.LOW_CONFIDENCE
            if self._get_ct_info(output_label).is_text:
                output_label = ContentTypeLabel.TXT
            else:
                output_label = ContentTypeLabel.UNKNOWN
            if dl_label == output_label:
                # overwrite_reason is useful to convey to clients why the output
                # predicted is different than the model predicted type; if those
                # two are the same, the model predicted type has not actually
                # been overwritten, so we set this to NONE.
                overwrite_reason = OverwriteReason.NONE

        return output_label, overwrite_reason

    def _get_result_from_labels_and_score(
        self,
        path: Path,
        dl_label: ContentTypeLabel,
        output_label: ContentTypeLabel,
        score: float,
        overwrite_reason: OverwriteReason = OverwriteReason.NONE,
    ) -> MagikaResult:
        return MagikaResult(
            path=path,
            prediction=MagikaPrediction(
                dl=self._get_ct_info(dl_label),
                output=self._get_ct_info(output_label),
                score=score,
                overwrite_reason=overwrite_reason,
            ),
        )

    def _get_result_or_features_from_path(
        self, path: Path
    ) -> Tuple[Optional[MagikaResult], Optional[ModelFeatures]]:
        """Given a path, we return either a MagikaOutput or a MagikaFeatures.

        There are some files and corner cases for which we do not need to use
        deep learning to get the output; in these cases, we already return a
        MagikaOutput object.

        For some other files, we do need to use deep learning, in which case we
        return a MagikaFeatures object. Note that for now we just collect the
        features instead of already performing inference because we want to use
        batching.
        """
        if self._no_dereference and path.is_symlink():
            result = self._get_result_from_labels_and_score(
                path=path,
                dl_label=ContentTypeLabel.UNDEFINED,
                output_label=ContentTypeLabel.SYMLINK,
                score=1.0,
            )
            return result, None

        if not path.exists():
            return MagikaResult(path=path, status=Status.FILE_NOT_FOUND_ERROR), None

        if path.is_file():
            if not os.access(path, os.R_OK):
                return MagikaResult(path=path, status=Status.PERMISSION_ERROR), None

            else:
                # There are no additional path-specific corner cases, we can
                # treat the input path as a stream.
                with open(path, "rb") as stream:
                    return self._get_result_or_features_from_seekable(
                        Seekable(stream), path
                    )

        elif path.is_dir():
            result = self._get_result_from_labels_and_score(
                path=path,
                dl_label=ContentTypeLabel.UNDEFINED,
                output_label=ContentTypeLabel.DIRECTORY,
                score=1.0,
            )
            return result, None

        else:
            result = self._get_result_from_labels_and_score(
                path=path,
                dl_label=ContentTypeLabel.UNDEFINED,
                output_label=ContentTypeLabel.UNKNOWN,
                score=1.0,
            )
            return result, None

        raise Exception("unreachable")

    def _get_result_or_features_from_seekable(
        self, seekable: Seekable, path: Path = Path("-")
    ) -> Tuple[Optional[MagikaResult], Optional[ModelFeatures]]:
        """Get result or features from a seekable object.

        Given a Seekable object (which is a wrapper of BinaryIO), we return
        either a MagikaOutput or a MagikaFeatures.

        There are some corner cases for which we do not need to use deep
        learning to get the output; in these cases, we return directly a
        MagikaOutput object.

        For all other cases, we do need to use deep learning, in which case we
        return a MagikaFeatures object. Note that for now we just collect the
        features instead of already performing inference because we want to use
        batching.
        """
        if seekable.size == 0:
            result = self._get_result_from_labels_and_score(
                path=path,
                dl_label=ContentTypeLabel.UNDEFINED,
                output_label=ContentTypeLabel.EMPTY,
                score=1.0,
            )
            return result, None

        elif seekable.size < self._model_config.min_file_size_for_dl:
            content = seekable.read_at(0, seekable.size)
            result = self._get_result_from_few_bytes(content, path=path)
            return result, None

        else:
            file_features = Magika._extract_features_from_seekable(
                seekable,
                self._model_config.beg_size,
                self._model_config.mid_size,
                self._model_config.end_size,
                self._model_config.padding_token,
                self._model_config.block_size,
                self._model_config.use_inputs_at_offsets,
            )
            # Check whether we have enough bytes for a meaningful
            # detection, and not just padding.
            if (
                file_features.beg[self._model_config.min_file_size_for_dl - 1]
                == self._model_config.padding_token
            ):
                # If the n-th token is padding, then it means that,
                # post-stripping, we do not have enough meaningful
                # bytes.
                bytes_to_read = min(seekable.size, self._model_config.block_size)
                content = seekable.read_at(0, bytes_to_read)
                result = self._get_result_from_few_bytes(content, path=path)
                return result, None

            else:
                # We have enough bytes, return the features for a model
                # prediction.
                return None, file_features

        raise Exception("unreachable")

    def _get_result_from_few_bytes(
        self, content: bytes, path: Path = Path("-")
    ) -> MagikaResult:
        assert len(content) <= 4 * self._model_config.block_size
        label = self._get_label_from_few_bytes(content)
        return self._get_result_from_labels_and_score(
            path=path,
            dl_label=ContentTypeLabel.UNDEFINED,
            output_label=label,
            score=1.0,
        )

    def _get_label_from_few_bytes(self, content: bytes) -> ContentTypeLabel:
        try:
            label = ContentTypeLabel.TXT
            _ = content.decode("utf-8")
        except UnicodeDecodeError:
            label = ContentTypeLabel.UNKNOWN
        return label

    def _get_raw_predictions(
        self, features: List[Tuple[Path, ModelFeatures]]
    ) -> List[List[float]]:
        """Get raw predictions from features.

        Given a list of (path, features), return a (files_num, features_size)
        matrix encoding the predictions.
        """
        start_time = time.time()
        X_bytes = []
        for _, fs in features:
            sample_bytes = []
            if self._model_config.beg_size > 0:
                sample_bytes.extend(fs.beg[: self._model_config.beg_size])
            if self._model_config.mid_size > 0:
                sample_bytes.extend(fs.mid[: self._model_config.mid_size])
            if self._model_config.end_size > 0:
                sample_bytes.extend(fs.end[-self._model_config.end_size :])
            X_bytes.append(sample_bytes)

        elapsed_time = 1000 * (time.time() - start_time)
        self._log.debug(f"DL input prepared in {elapsed_time:.03f} ms")

        raw_predictions_list = []
        samples_num = len(X_bytes)

        max_internal_batch_size = 1000
        batches_num = samples_num // max_internal_batch_size
        if samples_num % max_internal_batch_size != 0:
            batches_num += 1

        for batch_idx in range(batches_num):
            self._log.debug(
                f"Getting raw predictions for (internal) batch {batch_idx + 1}/{batches_num}"
            )
            start_idx = batch_idx * max_internal_batch_size
            end_idx = min((batch_idx + 1) * max_internal_batch_size, samples_num)

            batch_features = X_bytes[start_idx:end_idx]

            start_time = time.time()
            # onnxruntime accepts simple list of lists of ints/floats for input "bytes"
            # It returns a list of numpy arrays (usually one per output node).
            batch_raw_predictions_np = self._onnx_session.run(
                ["target_label"], {"bytes": batch_features}
            )[0]
            # Convert numpy array to list of lists of floats immediately
            batch_raw_predictions = batch_raw_predictions_np.tolist()

            elapsed_time = 1000 * (time.time() - start_time)
            self._log.debug(f"DL raw prediction in {elapsed_time:.03f} ms")

            raw_predictions_list.extend(batch_raw_predictions)
        return raw_predictions_list


================================================
FILE: python/src/magika/models/standard_v3_3/README.md
================================================
# Model documentation

## Table of Contents

1. [List of possible outputs](#list-of-possible-outputs)
1. [List of possible model's outputs](#list-of-possible-models-outputs)

## List of possible outputs

This is the full list of all possible tool's outputs (which are different than the possible raw output of the model, see table below). E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.output.label`.

| Index   |      Content Type Label      | Description |
|----------|:-------------:|------|
| 1 | 3gp | 3GPP multimedia file |
| 2 | ace | ACE archive |
| 3 | ai | Adobe Illustrator Artwork |
| 4 | aidl | Android Interface Definition Language |
| 5 | apk | Android package |
| 6 | applebplist | Apple binary property list |
| 7 | appleplist | Apple property list |
| 8 | asm | Assembly |
| 9 | asp | ASP source |
| 10 | autohotkey | AutoHotKey script |
| 11 | autoit | AutoIt script |
| 12 | awk | Awk |
| 13 | batch | DOS batch file |
| 14 | bazel | Bazel build file |
| 15 | bib | BibTeX |
| 16 | bmp | BMP image data |
| 17 | bzip | bzip2 compressed data |
| 18 | c | C source |
| 19 | cab | Microsoft Cabinet archive data |
| 20 | cat | Windows Catalog file |
| 21 | chm | MS Windows HtmlHelp Data |
| 22 | clojure | Clojure |
| 23 | cmake | CMake build file |
| 24 | cobol | Cobol |
| 25 | coff | Intel 80386 COFF |
| 26 | coffeescript | CoffeeScript |
| 27 | cpp | C++ source |
| 28 | crt | Certificates (binary format) |
| 29 | crx | Google Chrome extension |
| 30 | cs | C# source |
| 31 | csproj | .NET project config |
| 32 | css | CSS source |
| 33 | csv | CSV document |
| 34 | dart | Dart source |
| 35 | deb | Debian binary package |
| 36 | dex | Dalvik dex file |
| 37 | dicom | DICOM |
| 38 | diff | Diff file |
| 39 | directory | A directory |
| 40 | dm | Dream Maker |
| 41 | dmg | Apple disk image |
| 42 | doc | Microsoft Word CDF document |
| 43 | dockerfile | Dockerfile |
| 44 | docx | Microsoft Word 2007+ document |
| 45 | dsstore | Application Desktop Services Store |
| 46 | dwg | Autocad Drawing |
| 47 | dxf | Audocad Drawing Exchange Format |
| 48 | elf | ELF executable |
| 49 | elixir | Elixir script |
| 50 | emf | Windows Enhanced Metafile image data |
| 51 | eml | RFC 822 mail |
| 52 | empty | Empty file |
| 53 | epub | EPUB document |
| 54 | erb | Embedded Ruby source |
| 55 | erlang | Erlang source |
| 56 | flac | FLAC audio bitstream data |
| 57 | flv | Flash Video |
| 58 | fortran | Fortran |
| 59 | gemfile | Gemfile file |
| 60 | gemspec | Gemspec file |
| 61 | gif | GIF image data |
| 62 | gitattributes | Gitattributes file |
| 63 | gitmodules | Gitmodules file |
| 64 | go | Golang source |
| 65 | gradle | Gradle source |
| 66 | groovy | Groovy source |
| 67 | gzip | gzip compressed data |
| 68 | h5 | Hierarchical Data Format v5 |
| 69 | handlebars | Handlebars source |
| 70 | haskell | Haskell source |
| 71 | hcl | HashiCorp configuration language |
| 72 | hlp | MS Windows help |
| 73 | htaccess | Apache access configuration |
| 74 | html | HTML document |
| 75 | icns | Mac OS X icon |
| 76 | ico | MS Windows icon resource |
| 77 | ics | Internet Calendaring and Scheduling |
| 78 | ignorefile | Ignorefile |
| 79 | ini | INI configuration file |
| 80 | internetshortcut | MS Windows Internet shortcut |
| 81 | ipynb | Jupyter notebook |
| 82 | iso | ISO 9660 CD-ROM filesystem data |
| 83 | jar | Java archive data (JAR) |
| 84 | java | Java source |
| 85 | javabytecode | Java compiled bytecode |
| 86 | javascript | JavaScript source |
| 87 | jinja | Jinja template |
| 88 | jp2 | jpeg2000 |
| 89 | jpeg | JPEG image data |
| 90 | json | JSON document |
| 91 | jsonl | JSONL document |
| 92 | julia | Julia source |
| 93 | kotlin | Kotlin source |
| 94 | latex | LaTeX document |
| 95 | lha | LHarc archive |
| 96 | lisp | Lisp source |
| 97 | lnk | MS Windows shortcut |
| 98 | lua | Lua |
| 99 | m3u | M3U playlist |
| 100 | m4 | GNU Macro |
| 101 | macho | Mach-O executable |
| 102 | makefile | Makefile source |
| 103 | markdown | Markdown document |
| 104 | matlab | Matlab Source |
| 105 | mht | MHTML document |
| 106 | midi | Midi |
| 107 | mkv | Matroska |
| 108 | mp3 | MP3 media file |
| 109 | mp4 | MP4 media file |
| 110 | mscompress | MS Compress archive data |
| 111 | msi | Microsoft Installer file |
| 112 | mum | Windows Update Package file |
| 113 | npy | Numpy Array |
| 114 | npz | Numpy Arrays Archive |
| 115 | nupkg | NuGet Package |
| 116 | objectivec | ObjectiveC source |
| 117 | ocaml | OCaml |
| 118 | odp | OpenDocument Presentation |
| 119 | ods | OpenDocument Spreadsheet |
| 120 | odt | OpenDocument Text |
| 121 | ogg | Ogg data |
| 122 | one | One Note |
| 123 | onnx | Open Neural Network Exchange |
| 124 | otf | OpenType font |
| 125 | outlook | MS Outlook Message |
| 126 | parquet | Apache Parquet |
| 127 | pascal | Pascal source |
| 128 | pcap | pcap capture file |
| 129 | pdb | Windows Program Database |
| 130 | pdf | PDF document |
| 131 | pebin | PE Windows executable |
| 132 | pem | PEM certificate |
| 133 | perl | Perl source |
| 134 | php | PHP source |
| 135 | pickle | Python pickle |
| 136 | png | PNG image |
| 137 | po | Portable Object (PO) for i18n |
| 138 | postscript | PostScript document |
| 139 | powershell | Powershell source |
| 140 | ppt | Microsoft PowerPoint CDF document |
| 141 | pptx | Microsoft PowerPoint 2007+ document |
| 142 | prolog | Prolog source |
| 143 | proteindb | Protein DB |
| 144 | proto | Protocol buffer definition |
| 145 | psd | Adobe Photoshop |
| 146 | python | Python source |
| 147 | pythonbytecode | Python compiled bytecode |
| 148 | pytorch | Pytorch storage file |
| 149 | qt | QuickTime |
| 150 | r | R (language) |
| 151 | rar | RAR archive data |
| 152 | rdf | Resource Description Framework document (RDF) |
| 153 | rpm | RedHat Package Manager archive (RPM) |
| 154 | rst | ReStructuredText document |
| 155 | rtf | Rich Text Format document |
| 156 | ruby | Ruby source |
| 157 | rust | Rust source |
| 158 | scala | Scala source |
| 159 | scss | SCSS source |
| 160 | sevenzip | 7-zip archive data |
| 161 | sgml | sgml |
| 162 | shell | Shell script |
| 163 | smali | Smali source |
| 164 | snap | Snap archive |
| 165 | solidity | Solidity source |
| 166 | sql | SQL source |
| 167 | sqlite | SQLITE database |
| 168 | squashfs | Squash filesystem |
| 169 | srt | SubRip Text Format |
| 170 | stlbinary | Stereolithography CAD (binary) |
| 171 | stltext | Stereolithography CAD (text) |
| 172 | sum | Checksum file |
| 173 | svg | SVG Scalable Vector Graphics image data |
| 174 | swf | Small Web File |
| 175 | swift | Swift |
| 176 | symlink | Symbolic link |
| 177 | tar | POSIX tar archive |
| 178 | tcl | Tickle |
| 179 | textproto | Text protocol buffer |
| 180 | tga | Targa image data |
| 181 | thumbsdb | Windows thumbnail cache |
| 182 | tiff | TIFF image data |
| 183 | toml | Tom's obvious, minimal language |
| 184 | torrent | BitTorrent file |
| 185 | tsv | TSV document |
| 186 | ttf | TrueType Font data |
| 187 | twig | Twig template |
| 188 | txt | Generic text document |
| 189 | typescript | TypeScript source |
| 190 | unknown | Unknown binary data |
| 191 | vba | MS Visual Basic source (VBA) |
| 192 | vcxproj | Visual Studio MSBuild project |
| 193 | verilog | Verilog source |
| 194 | vhdl | VHDL source |
| 195 | vtt | Web Video Text Tracks |
| 196 | vue | Vue source |
| 197 | wasm | Web Assembly |
| 198 | wav | Waveform Audio file (WAV) |
| 199 | webm | WebM media file |
| 200 | webp | WebP media file |
| 201 | winregistry | Windows Registry text |
| 202 | wmf | Windows metafile |
| 203 | woff | Web Open Font Format |
| 204 | woff2 | Web Open Font Format v2 |
| 205 | xar | XAR archive compressed data |
| 206 | xls | Microsoft Excel CDF document |
| 207 | xlsb | Microsoft Excel 2007+ document (binary format) |
| 208 | xlsx | Microsoft Excel 2007+ document |
| 209 | xml | XML document |
| 210 | xpi | Compressed installation archive (XPI) |
| 211 | xz | XZ compressed data |
| 212 | yaml | YAML source |
| 213 | yara | YARA rule |
| 214 | zig | Zig source |
| 215 | zip | Zip archive data |
| 216 | zlibstream | zlib compressed data |


## List of possible model's outputs

This is the full list of all possible model's output. E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.dl.label`. Note that, in general, the list of "model outputs" is different than the "tool outputs" as in some cases the model is not even used, or the model's output is overwritten due to a low-confidence score or other reasons. This list is useful mostly for debugging purposes; the vast majority of client should just consult the table above.

| Index   |      Content Type Label      | Description |
|----------|:-------------:|------|
| 1 | 3gp | 3GPP multimedia file |
| 2 | ace | ACE archive |
| 3 | ai | Adobe Illustrator Artwork |
| 4 | aidl | Android Interface Definition Language |
| 5 | apk | Android package |
| 6 | applebplist | Apple binary property list |
| 7 | appleplist | Apple property list |
| 8 | asm | Assembly |
| 9 | asp | ASP source |
| 10 | autohotkey | AutoHotKey script |
| 11 | autoit | AutoIt script |
| 12 | awk | Awk |
| 13 | batch | DOS batch file |
| 14 | bazel | Bazel build file |
| 15 | bib | BibTeX |
| 16 | bmp | BMP image data |
| 17 | bzip | bzip2 compressed data |
| 18 | c | C source |
| 19 | cab | Microsoft Cabinet archive data |
| 20 | cat | Windows Catalog file |
| 21 | chm | MS Windows HtmlHelp Data |
| 22 | clojure | Clojure |
| 23 | cmake | CMake build file |
| 24 | cobol | Cobol |
| 25 | coff | Intel 80386 COFF |
| 26 | coffeescript | CoffeeScript |
| 27 | cpp | C++ source |
| 28 | crt | Certificates (binary format) |
| 29 | crx | Google Chrome extension |
| 30 | cs | C# source |
| 31 | csproj | .NET project config |
| 32 | css | CSS source |
| 33 | csv | CSV document |
| 34 | dart | Dart source |
| 35 | deb | Debian binary package |
| 36 | dex | Dalvik dex file |
| 37 | dicom | DICOM |
| 38 | diff | Diff file |
| 39 | dm | Dream Maker |
| 40 | dmg | Apple disk image |
| 41 | doc | Microsoft Word CDF document |
| 42 | dockerfile | Dockerfile |
| 43 | docx | Microsoft Word 2007+ document |
| 44 | dsstore | Application Desktop Services Store |
| 45 | dwg | Autocad Drawing |
| 46 | dxf | Audocad Drawing Exchange Format |
| 47 | elf | ELF executable |
| 48 | elixir | Elixir script |
| 49 | emf | Windows Enhanced Metafile image data |
| 50 | eml | RFC 822 mail |
| 51 | epub | EPUB document |
| 52 | erb | Embedded Ruby source |
| 53 | erlang | Erlang source |
| 54 | flac | FLAC audio bitstream data |
| 55 | flv | Flash Video |
| 56 | fortran | Fortran |
| 57 | gemfile | Gemfile file |
| 58 | gemspec | Gemspec file |
| 59 | gif | GIF image data |
| 60 | gitattributes | Gitattributes file |
| 61 | gitmodules | Gitmodules file |
| 62 | go | Golang source |
| 63 | gradle | Gradle source |
| 64 | groovy | Groovy source |
| 65 | gzip | gzip compressed data |
| 66 | h5 | Hierarchical Data Format v5 |
| 67 | handlebars | Handlebars source |
| 68 | haskell | Haskell source |
| 69 | hcl | HashiCorp configuration language |
| 70 | hlp | MS Windows help |
| 71 | htaccess | Apache access configuration |
| 72 | html | HTML document |
| 73 | icns | Mac OS X icon |
| 74 | ico | MS Windows icon resource |
| 75 | ics | Internet Calendaring and Scheduling |
| 76 | ignorefile | Ignorefile |
| 77 | ini | INI configuration file |
| 78 | internetshortcut | MS Windows Internet shortcut |
| 79 | ipynb | Jupyter notebook |
| 80 | iso | ISO 9660 CD-ROM filesystem data |
| 81 | jar | Java archive data (JAR) |
| 82 | java | Java source |
| 83 | javabytecode | Java compiled bytecode |
| 84 | javascript | JavaScript source |
| 85 | jinja | Jinja template |
| 86 | jp2 | jpeg2000 |
| 87 | jpeg | JPEG image data |
| 88 | json | JSON document |
| 89 | jsonl | JSONL document |
| 90 | julia | Julia source |
| 91 | kotlin | Kotlin source |
| 92 | latex | LaTeX document |
| 93 | lha | LHarc archive |
| 94 | lisp | Lisp source |
| 95 | lnk | MS Windows shortcut |
| 96 | lua | Lua |
| 97 | m3u | M3U playlist |
| 98 | m4 | GNU Macro |
| 99 | macho | Mach-O executable |
| 100 | makefile | Makefile source |
| 101 | markdown | Markdown document |
| 102 | matlab | Matlab Source |
| 103 | mht | MHTML document |
| 104 | midi | Midi |
| 105 | mkv | Matroska |
| 106 | mp3 | MP3 media file |
| 107 | mp4 | MP4 media file |
| 108 | mscompress | MS Compress archive data |
| 109 | msi | Microsoft Installer file |
| 110 | mum | Windows Update Package file |
| 111 | npy | Numpy Array |
| 112 | npz | Numpy Arrays Archive |
| 113 | nupkg | NuGet Package |
| 114 | objectivec | ObjectiveC source |
| 115 | ocaml | OCaml |
| 116 | odp | OpenDocument Presentation |
| 117 | ods | OpenDocument Spreadsheet |
| 118 | odt | OpenDocument Text |
| 119 | ogg | Ogg data |
| 120 | one | One Note |
| 121 | onnx | Open Neural Network Exchange |
| 122 | otf | OpenType font |
| 123 | outlook | MS Outlook Message |
| 124 | parquet | Apache Parquet |
| 125 | pascal | Pascal source |
| 126 | pcap | pcap capture file |
| 127 | pdb | Windows Program Database |
| 128 | pdf | PDF document |
| 129 | pebin | PE Windows executable |
| 130 | pem | PEM certificate |
| 131 | perl | Perl source |
| 132 | php | PHP source |
| 133 | pickle | Python pickle |
| 134 | png | PNG image |
| 135 | po | Portable Object (PO) for i18n |
| 136 | postscript | PostScript document |
| 137 | powershell | Powershell source |
| 138 | ppt | Microsoft PowerPoint CDF document |
| 139 | pptx | Microsoft PowerPoint 2007+ document |
| 140 | prolog | Prolog source |
| 141 | proteindb | Protein DB |
| 142 | proto | Protocol buffer definition |
| 143 | psd | Adobe Photoshop |
| 144 | python | Python source |
| 145 | pythonbytecode | Python compiled bytecode |
| 146 | pytorch | Pytorch storage file |
| 147 | qt | QuickTime |
| 148 | r | R (language) |
| 149 | randombytes | Random bytes |
| 150 | randomtxt | Random text |
| 151 | rar | RAR archive data |
| 152 | rdf | Resource Description Framework document (RDF) |
| 153 | rpm | RedHat Package Manager archive (RPM) |
| 154 | rst | ReStructuredText document |
| 155 | rtf | Rich Text Format document |
| 156 | ruby | Ruby source |
| 157 | rust | Rust source |
| 158 | scala | Scala source |
| 159 | scss | SCSS source |
| 160 | sevenzip | 7-zip archive data |
| 161 | sgml | sgml |
| 162 | shell | Shell script |
| 163 | smali | Smali source |
| 164 | snap | Snap archive |
| 165 | solidity | Solidity source |
| 166 | sql | SQL source |
| 167 | sqlite | SQLITE database |
| 168 | squashfs | Squash filesystem |
| 169 | srt | SubRip Text Format |
| 170 | stlbinary | Stereolithography CAD (binary) |
| 171 | stltext | Stereolithography CAD (text) |
| 172 | sum | Checksum file |
| 173 | svg | SVG Scalable Vector Graphics image data |
| 174 | swf | Small Web File |
| 175 | swift | Swift |
| 176 | tar | POSIX tar archive |
| 177 | tcl | Tickle |
| 178 | textproto | Text protocol buffer |
| 179 | tga | Targa image data |
| 180 | thumbsdb | Windows thumbnail cache |
| 181 | tiff | TIFF image data |
| 182 | toml | Tom's obvious, minimal language |
| 183 | torrent | BitTorrent file |
| 184 | tsv | TSV document |
| 185 | ttf | TrueType Font data |
| 186 | twig | Twig template |
| 187 | txt | Generic text document |
| 188 | typescript | TypeScript source |
| 189 | undefined | Undefined |
| 190 | vba | MS Visual Basic source (VBA) |
| 191 | vcxproj | Visual Studio MSBuild project |
| 192 | verilog | Verilog source |
| 193 | vhdl | VHDL source |
| 194 | vtt | Web Video Text Tracks |
| 195 | vue | Vue source |
| 196 | wasm | Web Assembly |
| 197 | wav | Waveform Audio file (WAV) |
| 198 | webm | WebM media file |
| 199 | webp | WebP media file |
| 200 | winregistry | Windows Registry text |
| 201 | wmf | Windows metafile |
| 202 | woff | Web Open Font Format |
| 203 | woff2 | Web Open Font Format v2 |
| 204 | xar | XAR archive compressed data |
| 205 | xls | Microsoft Excel CDF document |
| 206 | xlsb | Microsoft Excel 2007+ document (binary format) |
| 207 | xlsx | Microsoft Excel 2007+ document |
| 208 | xml | XML document |
| 209 | xpi | Compressed installation archive (XPI) |
| 210 | xz | XZ compressed data |
| 211 | yaml | YAML source |
| 212 | yara | YARA rule |
| 213 | zig | Zig source |
| 214 | zip | Zip archive data |
| 215 | zlibstream | zlib compressed data |>

================================================
FILE: python/src/magika/models/standard_v3_3/config.min.json
================================================
{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"crt":0.9,"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.75,"ocaml":0.9,"pascal":0.95,"r":0.9,"rst":0.9,"sql":0.9,"tsv":0.9,"zig":0.9},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"protection":"none","aes_key_hex":"","version_major":3}


================================================
FILE: python/src/magika/models/standard_v3_3/metadata.json
================================================
{"epoch_num":"91"}


================================================
FILE: python/src/magika/py.typed
================================================


================================================
FILE: python/src/magika/types/__init__.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D104


from magika.types.content_type_info import ContentTypeInfo  # noqa: F401
from magika.types.content_type_label import ContentTypeLabel  # noqa: F401
from magika.types.magika_error import MagikaError  # noqa: F401
from magika.types.magika_prediction import MagikaPrediction  # noqa: F401
from magika.types.magika_result import MagikaResult  # noqa: F401
from magika.types.model import (  # noqa: F401
    ModelConfig,
    ModelFeatures,
    ModelOutput,
)
from magika.types.overwrite_reason import OverwriteReason  # noqa: F401
from magika.types.prediction_mode import PredictionMode  # noqa: F401
from magika.types.seekable import Seekable  # noqa: F401
from magika.types.status import Status  # noqa: F401

__all__ = [
    "ContentTypeInfo",
    "ContentTypeLabel",
    "MagikaError",
    "MagikaPrediction",
    "MagikaResult",
    "ModelConfig",
    "ModelFeatures",
    "ModelOutput",
    "OverwriteReason",
    "PredictionMode",
    "Seekable",
    "Status",
]


================================================
FILE: python/src/magika/types/content_type_info.py
================================================
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Module defining the ContentTypeInfo dataclass."""

import warnings
from dataclasses import dataclass
from typing import List

from magika.logger import get_logger
from magika.types.content_type_label import ContentTypeLabel


@dataclass(frozen=True)
class ContentTypeInfo:
    """Dataclass holding information about a content type.

    Attributes:
        label: The ContentTypeLabel enum value.
        mime_type: The mime type associated to the content type.
        group: A high-level category for the content type (e.g., "document",
            "image").
        description: A human-readable description.
        extensions: A list of common file extensions.
        is_text: A boolean indicating if the content type is text-based.
    """

    label: ContentTypeLabel
    mime_type: str
    group: str
    description: str
    extensions: List[str]
    is_text: bool

    @property
    def ct_label(self) -> str:
        """DEPRECATED: Returns the string value of the content type label.

        Warns:
            DeprecationWarning: This property is deprecated. Use `.label`
                instead.
        """
        warnings.warn(
            "`.ct_label` is deprecated and will be removed in a future version. Use `.label` instead. Consult the documentation for more information.",
            category=DeprecationWarning,
            stacklevel=2,
        )
        return str(self.label)

    @property
    def score(self) -> float:
        """UNSUPPORTED: This property is no longer supported and raises an error.

        Raises:
            AttributeError: This property is unsupported. The score is now on
                the MagikaResult object.
        """
        error_msg = "Unsupported field error: `.score.` is not stored anymore in the `dl` or `output` objects; it is now stored in `MagikaResult`. Consult the documentation for more information."
        log = get_logger()
        log.error(error_msg)
        raise AttributeError(error_msg)

    @property
    def magic(self) -> str:
        """DEPRECATED: Returns the description of the content type.

        Warns:
            DeprecationWarning: This property is deprecated. Use
                `.description` instead.
        """
        warnings.warn(
            "`.magic` is deprecated and will be removed in a future version. Use `.description` instead. Consult the documentation for more information.",
            category=DeprecationWarning,
            stacklevel=2,
        )
        return self.description


================================================
FILE: python/src/magika/types/content_type_label.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Enumeration of all known content type labels."""

from magika.types.strenum import StrEnum

# NOTE: DO NOT EDIT --- This file is automatically generated.


# This is the list of all possible content types we know about; however, models
# support a smaller subset of them. See model's README.md for details.
class ContentTypeLabel(StrEnum):
    """A string-based enumeration of all possible content type labels.

    This enum provides a standardized set of identifiers for content types
    recognized by Magika.
    """

    _3DS = "3ds"
    _3DSM = "3dsm"
    _3DSX = "3dsx"
    _3GP = "3gp"
    _3MF = "3mf"
    ABNF = "abnf"
    ACE = "ace"
    ADA = "ada"
    AFF = "aff"
    AI = "ai"
    AIDL = "aidl"
    ALGOL68 = "algol68"
    ANI = "ani"
    APK = "apk"
    APPLEBPLIST = "applebplist"
    APPLEDOUBLE = "appledouble"
    APPLEPLIST = "appleplist"
    APPLESINGLE = "applesingle"
    AR = "ar"
    ARC = "arc"
    ARJ = "arj"
    ARROW = "arrow"
    ASC = "asc"
    ASD = "asd"
    ASF = "asf"
    ASM = "asm"
    ASP = "asp"
    AU = "au"
    AUTOHOTKEY = "autohotkey"
    AUTOIT = "autoit"
    AVI = "avi"
    AVIF = "avif"
    AVRO = "avro"
    AWK = "awk"
    AX = "ax"
    BATCH = "batch"
    BAZEL = "bazel"
    BCAD = "bcad"
    BIB = "bib"
    BMP = "bmp"
    BPG = "bpg"
    BPL = "bpl"
    BRAINFUCK = "brainfuck"
    BRF = "brf"
    BZIP = "bzip"
    BZIP3 = "bzip3"
    C = "c"
    CAB = "cab"
    CAD = "cad"
    CAT = "cat"
    CDF = "cdf"
    CHM = "chm"
    CLOJURE = "clojure"
    CMAKE = "cmake"
    COBOL = "cobol"
    COFF = "coff"
    COFFEESCRIPT = "coffeescript"
    COM = "com"
    CPL = "cpl"
    CPP = "cpp"
    CRT = "crt"
    CRX = "crx"
    CS = "cs"
    CSPROJ = "csproj"
    CSS = "css"
    CSV = "csv"
    CTL = "ctl"
    DART = "dart"
    DEB = "deb"
    DEX = "dex"
    DEY = "dey"
    DICOM = "dicom"
    DIFF = "diff"
    DIRECTORY = "directory"
    DJANGO = "django"
    DLL = "dll"
    DM = "dm"
    DMG = "dmg"
    DMIGD = "dmigd"
    DMSCRIPT = "dmscript"
    DOC = "doc"
    DOCKERFILE = "dockerfile"
    DOCX = "docx"
    DOSMBR = "dosmbr"
    DOTX = "dotx"
    DSSTORE = "dsstore"
    DWG = "dwg"
    DXF = "dxf"
    DYLIB = "dylib"
    EBML = "ebml"
    ELF = "elf"
    ELIXIR = "elixir"
    EMF = "emf"
    EML = "eml"
    EMPTY = "empty"
    EPUB = "epub"
    ERB = "erb"
    ERLANG = "erlang"
    ESE = "ese"
    EXE = "exe"
    EXP = "exp"
    FLAC = "flac"
    FLUTTER = "flutter"
    FLV = "flv"
    FORTRAN = "fortran"
    FPX = "fpx"
    GEMFILE = "gemfile"
    GEMSPEC = "gemspec"
    GIF = "gif"
    GITATTRIBUTES = "gitattributes"
    GITMODULES = "gitmodules"
    GLEAM = "gleam"
    GO = "go"
    GPX = "gpx"
    GRADLE = "gradle"
    GROOVY = "groovy"
    GZIP = "gzip"
    H = "h"
    H5 = "h5"
    HANDLEBARS = "handlebars"
    HASKELL = "haskell"
    HCL = "hcl"
    HEIF = "heif"
    HFS = "hfs"
    HLP = "hlp"
    HPP = "hpp"
    HTA = "hta"
    HTACCESS = "htaccess"
    HTML = "html"
    HVE = "hve"
    HWP = "hwp"
    ICC = "icc"
    ICNS = "icns"
    ICO = "ico"
    ICS = "ics"
    IGNOREFILE = "ignorefile"
    IMG = "img"
    INI = "ini"
    INTERNETSHORTCUT = "internetshortcut"
    IOSAPP = "iosapp"
    IPYNB = "ipynb"
    ISO = "iso"
    JAR = "jar"
    JAVA = "java"
    JAVABYTECODE = "javabytecode"
    JAVASCRIPT = "javascript"
    JINJA = "jinja"
    JNG = "jng"
    JNLP = "jnlp"
    JP2 = "jp2"
    JPEG = "jpeg"
    JSON = "json"
    JSONC = "jsonc"
    JSONL = "jsonl"
    JSX = "jsx"
    JULIA = "julia"
    JXL = "jxl"
    KO = "ko"
    KOTLIN = "kotlin"
    KS = "ks"
    LATEX = "latex"
    LATEXAUX = "latexaux"
    LESS = "less"
    LHA = "lha"
    LICENSE = "license"
    LISP = "lisp"
    LITCS = "litcs"
    LNK = "lnk"
    LOCK = "lock"
    LRZ = "lrz"
    LUA = "lua"
    LZ = "lz"
    LZ4 = "lz4"
    LZX = "lzx"
    M3U = "m3u"
    M4 = "m4"
    MACHO = "macho"
    MAFF = "maff"
    MAKEFILE = "makefile"
    MARKDOWN = "markdown"
    MATLAB = "matlab"
    MHT = "mht"
    MIDI = "midi"
    MKV = "mkv"
    MP2 = "mp2"
    MP3 = "mp3"
    MP4 = "mp4"
    MPEGTS = "mpegts"
    MSCOMPRESS = "mscompress"
    MSI = "msi"
    MSIX = "msix"
    MST = "mst"
    MUI = "mui"
    MUM = "mum"
    MUN = "mun"
    NIM = "nim"
    NPY = "npy"
    NPZ = "npz"
    NULL = "null"
    NUPKG = "nupkg"
    OBJECT = "object"
    OBJECTIVEC = "objectivec"
    OCAML = "ocaml"
    OCX = "ocx"
    ODEX = "odex"
    ODIN = "odin"
    ODP = "odp"
    ODS = "ods"
    ODT = "odt"
    OGG = "ogg"
    OLE = "ole"
    ONE = "one"
    ONNX = "onnx"
    OOXML = "ooxml"
    OTF = "otf"
    OUTLOOK = "outlook"
    PALMOS = "palmos"
    PARQUET = "parquet"
    PASCAL = "pascal"
    PBM = "pbm"
    PCAP = "pcap"
    PDB = "pdb"
    PDF = "pdf"
    PEBIN = "pebin"
    PEM = "pem"
    PERL = "perl"
    PGP = "pgp"
    PHP = "php"
    PICKLE = "pickle"
    PNG = "png"
    PO = "po"
    POSTSCRIPT = "postscript"
    POWERSHELL = "powershell"
    PPT = "ppt"
    PPTX = "pptx"
    PRINTFOX = "printfox"
    PROLOG = "prolog"
    PROTEINDB = "proteindb"
    PROTO = "proto"
    PROTOBUF = "protobuf"
    PSD = "psd"
    PUB = "pub"
    PYTHON = "python"
    PYTHONBYTECODE = "pythonbytecode"
    PYTHONPAR = "pythonpar"
    PYTORCH = "pytorch"
    QOI = "qoi"
    QT = "qt"
    R = "r"
    RANDOMASCII = "randomascii"
    RANDOMBYTES = "randombytes"
    RANDOMTXT = "randomtxt"
    RAR = "rar"
    RDF = "rdf"
    RDP = "rdp"
    RIFF = "riff"
    RLIB = "rlib"
    RLL = "rll"
    RPM = "rpm"
    RST = "rst"
    RTF = "rtf"
    RUBY = "ruby"
    RUST = "rust"
    RZIP = "rzip"
    SCALA = "scala"
    SCHEME = "scheme"
    SCR = "scr"
    SCRIPTWSF = "scriptwsf"
    SCSS = "scss"
    SEVENZIP = "sevenzip"
    SGML = "sgml"
    SH3D = "sh3d"
    SHELL = "shell"
    SMALI = "smali"
    SNAP = "snap"
    SO = "so"
    SOLIDITY = "solidity"
    SQL = "sql"
    SQLITE = "sqlite"
    SQUASHFS = "squashfs"
    SRT = "srt"
    STLBINARY = "stlbinary"
    STLTEXT = "stltext"
    SUM = "sum"
    SVD = "svd"
    SVG = "svg"
    SWF = "swf"
    SWIFT = "swift"
    SYMLINK = "symlink"
    SYMLINKTEXT = "symlinktext"
    SYS = "sys"
    TAR = "tar"
    TCL = "tcl"
    TEXTPROTO = "textproto"
    TGA = "tga"
    THUMBSDB = "thumbsdb"
    TIFF = "tiff"
    TMDX = "tmdx"
    TOML = "toml"
    TORRENT = "torrent"
    TROFF = "troff"
    TSV = "tsv"
    TSX = "tsx"
    TTF = "ttf"
    TWIG = "twig"
    TXT = "txt"
    TXTASCII = "txtascii"
    TXTUTF16 = "txtutf16"
    TXTUTF8 = "txtutf8"
    TYPESCRIPT = "typescript"
    UDF = "udf"
    UNDEFINED = "undefined"
    UNIXCOMPRESS = "unixcompress"
    UNKNOWN = "unknown"
    VBA = "vba"
    VBE = "vbe"
    VCARD = "vcard"
    VCS = "vcs"
    VCXPROJ = "vcxproj"
    VERILOG = "verilog"
    VHD = "vhd"
    VHDL = "vhdl"
    VISIO = "visio"
    VTT = "vtt"
    VUE = "vue"
    WAD = "wad"
    WASM = "wasm"
    WAV = "wav"
    WEBM = "webm"
    WEBP = "webp"
    WEBTEMPLATE = "webtemplate"
    WIM = "wim"
    WINREGISTRY = "winregistry"
    WMA = "wma"
    WMF = "wmf"
    WMV = "wmv"
    WOFF = "woff"
    WOFF2 = "woff2"
    XAR = "xar"
    XCF = "xcf"
    XLS = "xls"
    XLSB = "xlsb"
    XLSX = "xlsx"
    XML = "xml"
    XPI = "xpi"
    XSD = "xsd"
    XZ = "xz"
    YAML = "yaml"
    YARA = "yara"
    ZIG = "zig"
    ZIP = "zip"
    ZLIBSTREAM = "zlibstream"
    ZST = "zst"

    def __repr__(self) -> str:
        return str(self)


================================================
FILE: python/src/magika/types/magika_error.py
================================================
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D100, D101


class MagikaError(Exception):
    pass


================================================
FILE: python/src/magika/types/magika_prediction.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Module defining the MagikaPrediction dataclass."""

from __future__ import annotations

from dataclasses import dataclass

from magika.types.content_type_info import ContentTypeInfo
from magika.types.overwrite_reason import OverwriteReason


@dataclass(frozen=True)
class MagikaPrediction:
    """Encodes the detailed result of Magika's content type inference.

    This dataclass holds both the raw Deep Learning model's prediction and the
    final, potentially modified, output prediction.

    Attributes:
        dl: The raw prediction from the Deep Learning (DL) model.
        output: The final, consolidated content type prediction, which may
            differ from `dl` due to heuristics or post-processing.
        score: The confidence score (0.0 to 1.0) associated with the final
            prediction.
        overwrite_reason: The reason the `output` might have overridden the
            raw `dl` prediction.
    """

    dl: ContentTypeInfo
    output: ContentTypeInfo
    score: float
    overwrite_reason: OverwriteReason


================================================
FILE: python/src/magika/types/magika_result.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Defines MagikaResult, which encodes the result of a scan."""

import dataclasses
from pathlib import Path
from typing import Any, Dict, Optional

from magika.types.content_type_info import ContentTypeInfo
from magika.types.magika_prediction import MagikaPrediction
from magika.types.status import Status


class MagikaResult:
    """Encodes the result of a content type inference scan."""

    def __init__(
        self,
        *,
        path: Path,
        status: Status = Status.OK,
        prediction: Optional[MagikaPrediction] = None,
    ):
        """Initializes a new MagikaResult object.

        Args:
            path: The file path that was analyzed.
            status: The status of the analysis operation (e.g., OK, FILE_NOT_FOUND_ERROR).
            prediction: The prediction details if the analysis was successful; None otherwise.
        """
        self._path = path
        self._status = status
        self._prediction = prediction

    def __post_init__(self) -> None:
        assert self._path is not None
        if self._status == Status.OK:
            if self._prediction is None:
                raise ValueError("prediction must be set when status == OK")
        else:
            if self._prediction is not None:
                raise ValueError("prediction cannot be set when status != OK")

    @property
    def path(self) -> Path:
        """The file path that was analyzed.

        Returns:
            The Path object representing the analyzed file.
        """
        return self._path

    @property
    def ok(self) -> bool:
        """Checks if the analysis was successful.

        Returns:
            True if status is OK, False otherwise.
        """
        return self._status == Status.OK

    @property
    def status(self) -> Status:
        """The operational status of the analysis.

        Returns:
            The Status enumeration value (e.g., Status.OK, Status.FILE_NOT_FOUND_ERROR).
        """
        return self._status

    @property
    def prediction(self) -> MagikaPrediction:
        """The detailed content type prediction result.

        Returns:
            The prediction object containing content type information.

        Raises:
            ValueError: If the status is not OK.
        """
        if self.ok:
            assert self._prediction is not None
            return self._prediction
        raise ValueError("prediction is not set when status != OK")

    # In the vast majority of cases, Magika would return with status == OK ( for
    # `identify_bytes()` there is not even a code path that would return an
    # error). To optimize for such frequent scenario, we add the following
    # properties to forward the underlying value. Clients that want to make use
    # of the full power of the absl-like StatusOr pattern can still do so, but
    # we do not force all clients, regardless of their complexity or
    # criticality, to use the more verbose `mr.prediction.output`.
    @property
    def dl(self) -> ContentTypeInfo:
        """The predicted content type from the Deep Learning (dl) model.

        Note: This is a convenience property, equivalent to `self.prediction.dl`.

        Returns:
            The ContentTypeInfo for the dl model's prediction.
        """
        return self.prediction.dl

    @property
    def output(self) -> ContentTypeInfo:
        """The final, consolidated output content type prediction.

        Note: This is a convenience property, equivalent to `self.prediction.output`.

        Returns:
            The ContentTypeInfo for the final prediction.
        """
        return self.prediction.output

    @property
    def score(self) -> float:
        """The confidence score of the final prediction.

        Note: This is a convenience property, equivalent to `self.prediction.score`.

        Returns:
            The confidence score as a float (0.0 to 1.0).
        """
        return self.prediction.score

    def asdict(self) -> Dict:
        """Serializes the MagikaResult object into a dictionary.

        The dictionary includes the file path, status, and the full prediction
        details if the status is OK.

        Returns:
            A dictionary representation of the result.
        """
        out: Dict[str, Any] = {
            "path": str(self.path),
            "status": self.status,
        }
        if self.ok:
            out["prediction"] = dataclasses.asdict(self.prediction)
        return out

    def __repr__(self) -> str:
        return str(self)

    def __str__(self) -> str:
        if self.ok:
            return f"MagikaResult(path={self.path}, status={self.status}, prediction={self.prediction})"
        else:
            return f"MagikaResult(path={self.path}, status={self.status})"


================================================
FILE: python/src/magika/types/model.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D100, D101, D102, D103, D107


from dataclasses import dataclass
from typing import Dict, List

from magika.types.content_type_label import ContentTypeLabel


@dataclass(frozen=True)
class ModelFeatures:
    beg: List[int]
    mid: List[int]
    end: List[int]
    # for ISO
    offset_0x8000_0x8007: List[int]
    offset_0x8800_0x8807: List[int]
    offset_0x9000_0x9007: List[int]
    # for UDF
    offset_0x9800_0x9807: List[int]


@dataclass(frozen=True)
class ModelOutput:
    label: ContentTypeLabel
    score: float


@dataclass(frozen=True)
class ModelConfig:
    beg_size: int
    mid_size: int
    end_size: int
    use_inputs_at_offsets: bool
    medium_confidence_threshold: float
    min_file_size_for_dl: int
    padding_token: int
    block_size: int
    target_labels_space: List[ContentTypeLabel]
    thresholds: Dict[ContentTypeLabel, float]
    overwrite_map: Dict[ContentTypeLabel, ContentTypeLabel]


================================================
FILE: python/src/magika/types/overwrite_reason.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D100


import enum

from magika.types.strenum import LowerCaseStrEnum


class OverwriteReason(LowerCaseStrEnum):
    """Enum to represent possible reasons for overriding the model's prediction.

    Consult the documentation for additional context.
    """

    NONE = enum.auto()
    LOW_CONFIDENCE = enum.auto()
    OVERWRITE_MAP = enum.auto()


================================================
FILE: python/src/magika/types/prediction_mode.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D100

from __future__ import annotations

import enum
from typing import List

from magika.types.strenum import LowerCaseStrEnum


class PredictionMode(LowerCaseStrEnum):
    """Enum to represent the possible prediction modes.

    Consult the documentation for additional context.
    """

    BEST_GUESS = enum.auto()
    MEDIUM_CONFIDENCE = enum.auto()
    HIGH_CONFIDENCE = enum.auto()

    @staticmethod
    def get_valid_prediction_modes() -> List[str]:
        """Get a list of valid prediction modes."""
        return [pm for pm in PredictionMode]


================================================
FILE: python/src/magika/types/seekable.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D100, D101, D102, D103, D107

import io
from typing import BinaryIO


class Seekable:
    def __init__(self, stream: BinaryIO) -> None:
        self._stream = stream
        stream.seek(0, io.SEEK_END)
        self._size = stream.tell()

    @property
    def size(self) -> int:
        return self._size

    def read_at(self, offset: int, size: int) -> bytes:
        if size == 0:
            return b""

        assert offset + size <= self.size
        self._stream.seek(offset)
        return self._stream.read(size)


================================================
FILE: python/src/magika/types/status.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D100

import enum

from magika.types.strenum import LowerCaseStrEnum


class Status(LowerCaseStrEnum):
    """Enum to represent the possible status outcomes of a content type inference result."""

    OK = enum.auto()

    # Used when a file path does not exist.
    FILE_NOT_FOUND_ERROR = enum.auto()

    # Used when a file path exists, but there are permission issues, e.g., can't
    # read file.
    PERMISSION_ERROR = enum.auto()

    # Represents a generic error-like unknown status.
    UNKNOWN = enum.auto()


================================================
FILE: python/src/magika/types/strenum.py
================================================
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa: D100, D101, D102, D103, D107

"""Backport of StrEnum.

StrEnum was introduced in python 3.11, but we do not rely on it because we aim
at supporting python since version 3.8.

The following code has been taken (and adapted) from:
https://github.com/irgeek/StrEnum/blob/master/strenum/__init__.py#L21
"""

from __future__ import annotations

import enum
from typing import Union


class StrEnum(str, enum.Enum):
    """StrEnum is a Python ``enum.Enum`` that inherits from ``str``.

    The default ``auto()`` behavior uses the lower-case version of the name.
    This is meant to reflect the behavior of `enum.StrEnum`, available from
    Python 3.11.
    """

    def __new__(cls, value: Union[str, StrEnum], *args, **kwargs):  # type: ignore[no-untyped-def]
        if not isinstance(value, (str, enum.auto)):
            raise TypeError(
                f"Values of StrEnums must be strings: {value!r} is a {type(value)}"
            )
        return super().__new__(cls, value, *args, **kwargs)

    def __str__(self) -> str:
        return str(self.value)

    def _generate_next_value_(name, *_):  # type: ignore[no-untyped-def,override]
        return name


class LowerCaseStrEnum(StrEnum):
    def _generate_next_value_(name, *_):  # type: ignore[no-untyped-def,override]
        return name.lower()


================================================
FILE: python/tests/__init__.py
================================================


================================================
FILE: python/tests/test_features_extraction_vs_reference.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import base64
import io
import json
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import List, Tuple

import click
import dacite
from tqdm import tqdm

from magika import Magika
from magika.types import ModelFeatures, Seekable

try:
    from tests import utils as test_utils
except ImportError:
    # Hack to support both `uv run pytest tests/` and `uv run ./tests/test_...
    # `
    import sys

    sys.path.append(str(Path(__file__).parent.parent))
    from tests import utils as test_utils


@click.group()
def cli():
    pass


@cli.command()
@click.option("--debug/--no-debug", is_flag=True, default=True)
def run_tests(debug: bool) -> None:
    test_features_extraction_vs_reference(debug=debug)


@cli.command()
@click.option("--test-mode", is_flag=True)
def generate_tests(test_mode: bool) -> None:
    _generate_reference_features_extraction(test_mode=test_mode)


def test_features_extraction_vs_reference(debug: bool = False) -> None:
    examples = _get_examples_from_reference()
    if debug:
        print(f"Loaded {len(examples)} tests cases")

    for example in tqdm(examples, disable=not debug):
        example_content = base64.b64decode(example.content_base64)

        features = Magika._extract_features_from_seekable(
            Seekable(io.BytesIO(example_content)),
            beg_size=example.args.beg_size,
            mid_size=example.args.mid_size,
            end_size=example.args.end_size,
            padding_token=example.args.padding_token,
            block_size=example.args.block_size,
            use_inputs_at_offsets=example.args.use_inputs_at_offsets,
        )
        _check_features_vs_reference_example_features(
            features, example.features, debug=debug
        )


def test_reference_generation() -> None:
    _generate_reference_features_extraction(test_mode=True)


def _generate_reference_features_extraction(test_mode: bool) -> None:
    print("Generating reference features extraction tests cases...")
    tests_cases = _generate_reference_features_extraction_tests_cases()
    print(f"Generated {len(tests_cases)} tests cases")
    _dump_reference_features_extraction_examples(tests_cases, test_mode=test_mode)


def _dump_reference_features_extraction_examples(
    examples: List[FeaturesExtractionExample],
    test_mode: bool,
) -> None:
    reference_features_extraction_examples_path = (
        test_utils.get_reference_features_extraction_examples_path()
    )

    if test_mode:
        print('WARNING: running in "test_mode", not writing examples to file')
    else:
        reference_features_extraction_examples_path.parent.mkdir(
            parents=True, exist_ok=True
        )
        reference_features_extraction_examples_path.write_bytes(
            test_utils.gzip_compress(
                json.dumps([asdict(example) for example in examples]).encode("ascii")
            )
        )
        print(f"Wrote tests cases to {reference_features_extraction_examples_path}")


def _generate_reference_features_extraction_tests_cases() -> List[
    FeaturesExtractionExample
]:
    tests_cases_inputs: List[
        Tuple[FeaturesExtractionExampleArgs, FeaturesExtractionExampleMetadata, bytes]
    ] = _generate_reference_features_extraction_tests_cases_inputs()

    tests_cases = []
    for test_args, test_metadata, test_content in tests_cases_inputs:
        features = Magika._extract_features_from_seekable(
            Seekable(io.BytesIO(test_content)),
            test_args.beg_size,
            test_args.mid_size,
            test_args.end_size,
            test_args.padding_token,
            test_args.block_size,
            test_args.use_inputs_at_offsets,
        )

        example = FeaturesExtractionExample(
            args=test_args,
            metadata=test_metadata,
            content_base64=base64.b64encode(test_content).decode("ascii"),
            features=features,
        )

        tests_cases.append(example)

    return tests_cases


def _generate_reference_features_extraction_tests_cases_inputs() -> List[
    Tuple[FeaturesExtractionExampleArgs, FeaturesExtractionExampleMetadata, bytes]
]:
    beg_size = 128
    mid_size = 0
    end_size = 64
    block_size = 512
    padding_token = 256
    use_inputs_at_offsets = False

    assert mid_size == 0
    assert use_inputs_at_offsets is False
    assert beg_size < block_size
    assert mid_size < block_size
    assert end_size < block_size

    ws_num_options = [
        0,
        1,
        10,
        beg_size - 1,
        beg_size,
        beg_size + 1,
        end_size - 1,
        end_size,
        end_size + 1,
        beg_size + end_size - 1,
        beg_size + end_size,
        beg_size + end_size + 1,
        block_size - 1,
        block_size,
        block_size + 1,
        2 * block_size - 1,
        2 * block_size,
        2 * block_size + 1,
        4 * block_size - 1,
        4 * block_size,
        4 * block_size + 1,
    ]

    content_size_options = list(ws_num_options)

    tests_cases_inputs = []
    for core_content_size in content_size_options:
        for left_ws_num in ws_num_options:
            for right_ws_num in ws_num_options:
                test_args = FeaturesExtractionExampleArgs(
                    beg_size=beg_size,
                    mid_size=mid_size,
                    end_size=end_size,
                    block_size=block_size,
                    padding_token=padding_token,
                    use_inputs_at_offsets=use_inputs_at_offsets,
                )
                test_metadata = FeaturesExtractionExampleMetadata(
                    core_content_size=core_content_size,
                    left_ws_num=left_ws_num,
                    right_ws_num=right_ws_num,
                )

                content = _generate_content_from_metadata(test_metadata)
                tests_cases_inputs.append((test_args, test_metadata, content))

    return tests_cases_inputs


def _generate_content_from_metadata(
    test_info: FeaturesExtractionExampleMetadata,
) -> bytes:
    """Generate content with a given "core size", with n left and right
    whitespaces, and the core content. with_ws_near_beg and with_ws_near_end
    specify if we need to put spaces near the beg/end, e.g., "A AAA". This is
    useful to test that we don't strip whitespaces that we are not supposed to
    strip."""

    content = bytearray(
        test_utils.generate_pattern(test_info.core_content_size, only_printable=True)
    )

    if test_info.core_content_size >= 5:
        # inject characters that other implementations may mistakenly strip
        content[0] = ord("\x00")
        content[1] = ord(" ")
        content[-2] = ord(" ")
        content[-1] = ord("\x00")

    return (
        test_utils.generate_whitespaces(test_info.left_ws_num)
        + bytes(content)
        + test_utils.generate_whitespaces(test_info.right_ws_num)
    )


def _get_examples_from_reference() -> List[FeaturesExtractionExample]:
    ref_features_extraction_examples_path = (
        test_utils.get_reference_features_extraction_examples_path()
    )

    return [
        dacite.from_dict(FeaturesExtractionExample, example)
        for example in json.loads(
            test_utils.gzip_decompress(
                ref_features_extraction_examples_path.read_bytes()
            )
        )
    ]


def _check_features_vs_reference_example_features(
    features: ModelFeatures, example_features: ModelFeatures, debug: bool = False
) -> None:
    with_error = False
    if features.beg != example_features.beg:
        with_error = True
        if debug:
            print("beg does not match")
    if features.mid != example_features.mid:
        with_error = True
        if debug:
            print("mid does not match")
    if features.end != example_features.end:
        with_error = True
        if debug:
            print("end does not match")
    try:
        assert features == example_features
    except AssertionError:
        with_error = True
        if debug:
            print("other fields do not match")

    if with_error:
        raise Exception


@dataclass
class FeaturesExtractionExample:
    """Data model for features_extraction_examples.json.gz."""

    args: FeaturesExtractionExampleArgs
    metadata: FeaturesExtractionExampleMetadata
    content_base64: str
    features: ModelFeatures


@dataclass
class FeaturesExtractionExampleArgs:
    beg_size: int
    mid_size: int
    end_size: int
    block_size: int
    padding_token: int
    use_inputs_at_offsets: bool


@dataclass
class FeaturesExtractionExampleMetadata:
    core_content_size: int
    left_ws_num: int
    right_ws_num: int


if __name__ == "__main__":
    cli()


================================================
FILE: python/tests/test_inference_vs_reference.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import base64
import enum
import json
import random
import tempfile
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Dict, Generator, List, Optional, Set, Tuple

import click
import dacite
import pytest
from tqdm import tqdm

from magika import ContentTypeLabel, Magika, PredictionMode
from magika.types import MagikaResult, OverwriteReason
from magika.types.status import Status

try:
    from tests import utils as test_utils
except ImportError:
    # Hack to support both `uv run pytest tests/` and `uv run ./tests/test_...
    # `
    import sys

    sys.path.append(str(Path(__file__).parent.parent))
    from tests import utils as test_utils


@click.group()
def cli():
    pass


@cli.command()
@click.option("--debug/--no-debug", is_flag=True, default=True)
def run_tests(debug: bool) -> None:
    test_inference_vs_reference(debug=debug)


@cli.command()
@click.option("--test-mode", is_flag=True)
def generate_tests(test_mode: bool) -> None:
    _generate_reference_for_inference(test_mode=test_mode)


def test_inference_vs_reference(debug: bool = False) -> None:
    repo_root_dir = test_utils.get_repo_root_dir()

    magika_by_prediction_mode: Dict[PredictionMode, Magika] = {}
    for prediction_mode in [
        PredictionMode.HIGH_CONFIDENCE,
        PredictionMode.MEDIUM_CONFIDENCE,
        PredictionMode.BEST_GUESS,
    ]:
        magika_by_prediction_mode[prediction_mode] = Magika(
            prediction_mode=prediction_mode
        )

    model_name = magika_by_prediction_mode[
        PredictionMode.HIGH_CONFIDENCE
    ].get_model_name()

    examples_by_path = _get_examples_by_path(model_name)
    if debug:
        print(f"Loaded {len(examples_by_path)} examples by path")
    for example in tqdm(examples_by_path, disable=not debug):
        m = magika_by_prediction_mode[example.prediction_mode]
        abs_path = repo_root_dir / example.path
        result = m.identify_path(abs_path)
        _check_result_vs_reference_example(
            result, abs_path, example.status, example.prediction
        )

        result = m.identify_bytes(abs_path.read_bytes())
        _check_result_vs_reference_example(
            result, Path("-"), example.status, example.prediction
        )

        with open(abs_path, "rb") as f:
            result = m.identify_stream(f)
            _check_result_vs_reference_example(
                result, Path("-"), example.status, example.prediction
            )

    examples_by_content = _get_examples_by_content(model_name)
    if debug:
        print(f"Loaded {len(examples_by_content)} examples by content")
    for example in tqdm(examples_by_content, disable=not debug):
        m = magika_by_prediction_mode[example.prediction_mode]
        example_content = base64.b64decode(example.content_base64)
        result = m.identify_bytes(example_content)
        _check_result_vs_reference_example(
            result, Path("-"), example.status, example.prediction
        )

        with tempfile.TemporaryDirectory() as td:
            tf_path = Path(td) / "file.bin"
            tf_path.write_bytes(example_content)

            result = m.identify_path(tf_path)
            _check_result_vs_reference_example(
                result, tf_path, example.status, example.prediction
            )

            with open(tf_path, "rb") as f:
                result = m.identify_stream(f)
                _check_result_vs_reference_example(
                    result, Path("-"), example.status, example.prediction
                )


def test_reference_generation() -> None:
    # This is useful to exercise the various paths to make sure the reference
    # generation stays up to date.
    _generate_reference_for_inference(test_mode=True)


def _get_examples_by_path(model_name: str) -> List[ExampleByPath]:
    reference_for_inference_examples_by_path = (
        test_utils.get_reference_for_inference_examples_by_path_path(model_name)
    )
    return [
        dacite.from_dict(
            ExampleByPath,
            entry,
            config=dacite.Config(
                cast=[ContentTypeLabel, OverwriteReason, PredictionMode, Status]
            ),
        )
        for entry in json.loads(
            test_utils.gzip_decompress(
                reference_for_inference_examples_by_path.read_bytes()
            )
        )
    ]


def _get_examples_by_content(model_name: str) -> List[ExampleByContent]:
    reference_for_inference_examples_by_content = (
        test_utils.get_reference_for_inference_examples_by_content_path(model_name)
    )
    return [
        dacite.from_dict(
            ExampleByContent,
            entry,
            config=dacite.Config(
                cast=[ContentTypeLabel, OverwriteReason, PredictionMode, Status]
            ),
        )
        for entry in json.loads(
            test_utils.gzip_decompress(
                reference_for_inference_examples_by_content.read_bytes()
            )
        )
    ]


def _generate_reference_for_inference(test_mode: bool) -> None:
    model_name = Magika._get_default_model_name()
    examples_by_path = _generate_examples_by_path(model_name)
    _dump_examples_by_path(model_name, examples_by_path, test_mode=test_mode)
    examples_by_content = _generate_examples_by_content(model_name, test_mode=test_mode)
    _dump_examples_by_content(model_name, examples_by_content, test_mode=test_mode)


def _generate_examples_by_path(
    model_name: str,
) -> List[ExampleByPath]:
    print(f'Generating examples by path for model "{model_name}"...')

    repo_root_dir = test_utils.get_repo_root_dir()
    tests_paths = test_utils.get_basic_test_files_paths()
    examples_by_path = []

    for prediction_mode in [
        PredictionMode.HIGH_CONFIDENCE,
        PredictionMode.MEDIUM_CONFIDENCE,
        PredictionMode.BEST_GUESS,
    ]:
        m = Magika(prediction_mode=prediction_mode)
        assert m.get_model_name() == model_name

        for test_path in tqdm(tests_paths):
            result = m.identify_path(test_path)
            if result.ok:
                example = ExampleByPath(
                    prediction_mode=prediction_mode,
                    path=str(test_path.resolve().relative_to(repo_root_dir)),
                    status=result.status,
                    prediction=Prediction(
                        dl=result.prediction.dl.label,
                        output=result.prediction.output.label,
                        score=result.prediction.score,
                        overwrite_reason=result.prediction.overwrite_reason,
                    ),
                )
            else:
                example = ExampleByPath(
                    prediction_mode=prediction_mode,
                    path=str(test_path),
                    status=result.status,
                    prediction=None,
                )
            examples_by_path.append(example)

    return examples_by_path


def _generate_examples_by_content(
    model_name: str, test_mode: bool
) -> List[ExampleByContent]:
    random.seed(42)

    print(f'Generating examples by content for model "{model_name}"...')

    # First we generate corner cases examples by content, without caring about
    # the prediction mode. In fact, at the example generation phase, we only
    # care about the model prediction, which is not affected by the prediction
    # mode. Then, we generate the reference by looping over possible prediction
    # modes and all the cornern case examples.

    magika = Magika()
    assert magika.get_model_name() == model_name

    content_list = []
    content_list.append(b"")
    for size in [
        magika._model_config.min_file_size_for_dl - 1,
        magika._model_config.min_file_size_for_dl,
        magika._model_config.min_file_size_for_dl + 1,
        magika._model_config.beg_size - 1,
        magika._model_config.beg_size,
        magika._model_config.beg_size + 1,
        magika._model_config.end_size - 1,
        magika._model_config.end_size,
        magika._model_config.end_size + 1,
        magika._model_config.beg_size + magika._model_config.end_size - 1,
        magika._model_config.beg_size + magika._model_config.end_size,
        magika._model_config.beg_size + magika._model_config.end_size + 1,
        magika._model_config.block_size - 1,
        magika._model_config.block_size,
        magika._model_config.block_size + 1,
    ]:
        content_list.append(test_utils.generate_pattern(size, only_printable=True))
        content_list.append(test_utils.generate_pattern(size, only_printable=False))

    # We now generate additional examples to check for additional corner cases,
    # related to prediction mode, thresholds, and overwrite map. We use a
    # fuzzing-like approach to generate weird samples at random, we then check
    # each of them to fill what we need for the test suite.
    collector = CornerCaseCollector(magika)
    generator = collector.get_corner_case_candidates_generator()
    for candidate_idx, (source_info, content) in enumerate(generator):
        is_useful, result, cc_info = collector.inspect_content(content)
        if is_useful:
            print(
                source_info,
                result.dl.label,
                result.score,
                result.output.label,
                cc_info,
                collector.get_missing_examples_num(),
            )

            content_list.append(content)

            if collector.is_complete():
                break

        if test_mode:
            if candidate_idx >= 100:
                # In "test_mode", we exit after evaluating 100 samples, even if
                # we are not done
                break

    if not collector.is_complete():
        if test_mode:
            print(
                'WARNING: running in "test_mode", exiting corner cases generation early'
            )
        else:
            print(
                f"ERROR: Missing {collector.get_missing_examples_num()} corner cases:"
            )
            for corner_case_info in collector._missing_corner_cases:
                print(f"\t{corner_case_info}")
            sys.exit(1)

    examples_by_content = []
    for prediction_mode in [
        PredictionMode.HIGH_CONFIDENCE,
        PredictionMode.MEDIUM_CONFIDENCE,
        PredictionMode.BEST_GUESS,
    ]:
        magika = Magika(prediction_mode=prediction_mode)
        for content in content_list:
            result = magika.identify_bytes(content)
            if result.ok:
                example = ExampleByContent(
                    prediction_mode=prediction_mode,
                    content_base64=base64.b64encode(content).decode("ascii"),
                    status=result.status,
                    prediction=Prediction(
                        dl=result.prediction.dl.label,
                        output=result.prediction.output.label,
                        score=result.prediction.score,
                        overwrite_reason=result.prediction.overwrite_reason,
                    ),
                )
            else:
                example = ExampleByContent(
                    prediction_mode=prediction_mode,
                    content_base64=base64.b64encode(content).decode("ascii"),
                    status=result.status,
                    prediction=None,
                )
            examples_by_content.append(example)

    return examples_by_content


def _dump_examples_by_path(
    model_name: str,
    examples_by_path: List[ExampleByPath],
    test_mode: bool,
) -> None:
    examples_by_path_path = (
        test_utils.get_reference_for_inference_examples_by_path_path(model_name)
    )

    if test_mode:
        print(
            f'WARNING: running in "test_mode", not writing examples by path to {examples_by_path_path}'
        )
    else:
        examples_by_path_path.parent.mkdir(parents=True, exist_ok=True)
        examples_by_path_path.write_bytes(
            test_utils.gzip_compress(
                json.dumps(
                    [asdict(example) for example in examples_by_path],
                    separators=(",", ":"),
                ).encode("ascii")
            )
        )
        print(
            f"Wrote {len(examples_by_path)} examples by path to {examples_by_path_path}"
        )


def _dump_examples_by_content(
    model_name: str,
    examples_by_content: List[ExampleByContent],
    test_mode: bool,
) -> None:
    examples_by_content_path = (
        test_utils.get_reference_for_inference_examples_by_content_path(model_name)
    )

    if test_mode:
        print(
            f'WARNING: running in "test_mode", not writing examples by content to {examples_by_content_path}'
        )
    else:
        examples_by_content_path.parent.mkdir(parents=True, exist_ok=True)

        examples_by_content_path.write_bytes(
            test_utils.gzip_compress(
                json.dumps(
                    [asdict(example) for example in examples_by_content],
                    separators=(",", ":"),
                ).encode("ascii"),
            )
        )
        print(
            f"Wrote {len(examples_by_content)} examples by content to {examples_by_content_path}"
        )


@dataclass(frozen=True)
class CornerCaseInfo:
    label_category: LabelCategory
    with_threshold: bool
    with_overwrite: bool
    score_range: ScoreRange

    def __repr__(self) -> str:
        return (
            f"{self.__class__.__name__}("
            f"{self.label_category},"
            f"{'TH' if self.with_threshold else 'NO_TH'},"
            f"{'OW' if self.with_overwrite else 'NO_OW'},"
            f"{self.score_range})"
        )


class LabelCategory(enum.Enum):
    GENERIC_TEXT = enum.auto()
    GENERIC_BINARY = enum.auto()
    NON_GENERIC_TEXT = enum.auto()
    NON_GENERIC_BINARY = enum.auto()


class ScoreRange(enum.Enum):
    LT_050 = enum.auto()
    GE_050 = enum.auto()
    GE_050_LT_T = enum.auto()
    GE_T = enum.auto()


class CornerCaseCollector:
    def __init__(self, magika: Magika):
        self._magika = magika
        self._missing_corner_cases: Set[CornerCaseInfo] = set()
        # fmt: off
        self._missing_corner_cases.update({
            # NON_GENERIC_TEXT
            CornerCaseInfo(LabelCategory.NON_GENERIC_TEXT, False, False, ScoreRange.LT_050),
            CornerCaseInfo(LabelCategory.NON_GENERIC_TEXT, False, False, ScoreRange.GE_050),
            CornerCaseInfo(LabelCategory.NON_GENERIC_TEXT, True, False, ScoreRange.LT_050),
            CornerCaseInfo(LabelCategory.NON_GENERIC_TEXT, True, False, ScoreRange.GE_050_LT_T),
            CornerCaseInfo(LabelCategory.NON_GENERIC_TEXT, True, False, ScoreRange.GE_T),
            CornerCaseInfo(LabelCategory.NON_GENERIC_TEXT, False, True, ScoreRange.LT_050),
            CornerCaseInfo(LabelCategory.NON_GENERIC_TEXT, False, True, ScoreRange.GE_050),
            # NON_GENERIC_BINARY
            CornerCaseInfo(LabelCategory.NON_GENERIC_BINARY, False, False, ScoreRange.LT_050),
            CornerCaseInfo(LabelCategory.NON_GENERIC_BINARY, False, False, ScoreRange.GE_050),
            CornerCaseInfo(LabelCategory.NON_GENERIC_BINARY, True, False, ScoreRange.LT_050),
            CornerCaseInfo(LabelCategory.NON_GENERIC_BINARY, True, False, ScoreRange.GE_050_LT_T),
            CornerCaseInfo(LabelCategory.NON_GENERIC_BINARY, True, False, ScoreRange.GE_T),
            CornerCaseInfo(LabelCategory.NON_GENERIC_BINARY, False, True, ScoreRange.LT_050),
            CornerCaseInfo(LabelCategory.NON_GENERIC_BINARY, False, True, ScoreRange.GE_050),
        })
        self._missing_corner_cases.update({
            CornerCaseInfo(LabelCategory.GENERIC_TEXT, False, False, ScoreRange.LT_050),
            CornerCaseInfo(LabelCategory.GENERIC_TEXT, False, False, ScoreRange.GE_050),
            # No GENERIC_BINARY (aka UNKNOWN) because the model would never output that
        })
        # fmt: on

    def inspect_content(
        self, content: bytes
    ) -> Tuple[bool, MagikaResult, CornerCaseInfo]:
        res = self._magika.identify_bytes(content)
        cce = self._get_cornern_case_example(res.dl.label, res.score)
        if cce in self._missing_corner_cases:
            self._missing_corner_cases.remove(cce)
            return True, res, cce
        return False, res, cce

    def is_complete(self) -> bool:
        return self.get_missing_examples_num() == 0

    def get_missing_examples(self) -> Set[CornerCaseInfo]:
        return self._missing_corner_cases

    def get_missing_examples_num(self) -> int:
        return len(self._missing_corner_cases)

    def _get_cornern_case_example(
        self, dl_label: ContentTypeLabel, score: float
    ) -> CornerCaseInfo:
        return CornerCaseInfo(
            label_category=self._get_label_category(dl_label),
            with_threshold=self._has_threshold(dl_label),
            with_overwrite=self._has_overwrite(dl_label),
            score_range=self._get_score_range(dl_label, score),
        )

    def _get_label_category(self, dl_label: ContentTypeLabel) -> LabelCategory:
        m = {
            # is_generic, is_text
            (True, True): LabelCategory.GENERIC_TEXT,
            (True, False): LabelCategory.GENERIC_BINARY,
            (False, True): LabelCategory.NON_GENERIC_TEXT,
            (False, False): LabelCategory.NON_GENERIC_BINARY,
        }
        return m[
            self._is_generic(dl_label),
            self._is_text(dl_label),
        ]

    def _is_generic(self, dl_label: ContentTypeLabel) -> bool:
        return dl_label in [ContentTypeLabel.TXT, ContentTypeLabel.UNKNOWN]

    def _is_text(self, dl_label: ContentTypeLabel) -> bool:
        return self._magika._cts_infos[dl_label].is_text

    def _has_threshold(self, dl_label: ContentTypeLabel) -> bool:
        return dl_label in self._magika._model_config.thresholds.keys()

    def _get_threshold(self, dl_label: ContentTypeLabel) -> float:
        return self._magika._model_config.thresholds[dl_label]

    def _has_overwrite(self, dl_label: ContentTypeLabel) -> bool:
        return dl_label in self._magika._model_config.overwrite_map.keys()

    def _get_score_range(self, dl_label: ContentTypeLabel, score: float) -> ScoreRange:
        if score < 0.50:
            return ScoreRange.LT_050
        else:
            if self._has_threshold(dl_label):
                if score < self._get_threshold(dl_label):
                    return ScoreRange.GE_050_LT_T
                else:
                    return ScoreRange.GE_T
            else:
                return ScoreRange.GE_050

    def get_corner_case_candidates_generator(
        self,
    ) -> Generator[Tuple[str, bytes], None, None]:
        beg_size = self._magika._model_config.beg_size
        end_size = self._magika._model_config.end_size

        print("Using random bytes")
        for n in range(1_000):
            if random.random() < 0.5:
                yield (
                    "randomtxt",
                    test_utils.get_random_ascii_bytes(
                        random.randrange(8, beg_size + end_size)
                    ),
                )
            else:
                yield (
                    "randombytes",
                    test_utils.get_random_bytes(
                        random.randrange(8, beg_size + end_size)
                    ),
                )

        base_examples = []
        base_examples.append(
            ("randomtxt", test_utils.get_random_ascii_bytes(beg_size + end_size))
        )
        base_examples.append(
            ("randombytes", test_utils.get_random_bytes(beg_size + end_size))
        )
        for example_path in test_utils.get_basic_test_files_paths():
            example_content = example_path.read_bytes()
            if len(example_content) < beg_size + end_size:
                base_content = example_content
            else:
                base_content = b""
                if beg_size > 0:
                    example_content += base_content[:beg_size]
                if end_size > 0:
                    example_content += base_content[-end_size:]
            base_example = (str(example_path), base_content)
            yield base_example
            base_examples.append(base_example)

        for base_source, base_content in base_examples:
            print(f"Using {base_source} as base")
            for only_printable in [True, False]:
                for n in range(
                    0,
                    min(
                        beg_size,
                        end_size,
                        len(base_content),
                    ),
                ):
                    patched_content = bytearray(base_content[:])
                    patched_content[0:n] = test_utils.generate_pattern(
                        n, only_printable=only_printable
                    )
                    yield (f"base_{base_source}_beg_{n}", bytes(patched_content))

                    patched_content[len(base_content) - n : len(base_content)] = (
                        test_utils.generate_pattern(n, only_printable=only_printable)
                    )
                    yield (f"base_{base_source}_end_{n}", bytes(patched_content))


def _check_result_vs_reference_example(
    result: MagikaResult,
    expected_path: Path,
    expected_status: Status,
    expected_prediction: Prediction,
) -> None:
    assert result.path == expected_path
    assert result.status == expected_status
    if result.ok:
        assert result.prediction.dl.label == expected_prediction.dl
        assert result.prediction.output.label == expected_prediction.output
        assert result.prediction.score == pytest.approx(
            expected_prediction.score, abs=1e-5
        )
        assert (
            result.prediction.overwrite_reason == expected_prediction.overwrite_reason
        )


@dataclass
class ExampleByPath:
    """Data model for -inference_examples_by_path.json.gz."""

    prediction_mode: PredictionMode
    path: str
    status: Status
    prediction: Optional[Prediction]


@dataclass
class ExampleByContent:
    """Data model for -inference_examples_by_content.json.gz."""

    prediction_mode: PredictionMode
    content_base64: str
    status: Status
    prediction: Optional[Prediction]


@dataclass
class Prediction:
    dl: ContentTypeLabel
    output: ContentTypeLabel
    score: float
    overwrite_reason: OverwriteReason


if __name__ == "__main__":
    cli()


================================================
FILE: python/tests/test_magika_python_module.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import io
import signal
import tempfile
from pathlib import Path
from typing import Any, List, Optional

import pytest

from magika import Magika, PredictionMode
from magika.types import (
    ContentTypeInfo,
    ContentTypeLabel,
    MagikaPrediction,
    MagikaResult,
    Status,
)
from magika.types.overwrite_reason import OverwriteReason
from tests import utils


@pytest.mark.smoketest
def test_magika_module_check_version() -> None:
    import magika as magika_module

    assert isinstance(magika_module.__version__, str)

    m = Magika()
    assert m.get_module_version() == magika_module.__version__

    # Check that, when we don't specify `model_dir`, Magika uses the default
    # model.
    assert m.get_model_name() == m._get_default_model_name()


@pytest.mark.smoketest
def test_magika_module_with_one_test_file() -> None:
    test_path = utils.get_one_basic_test_file_path()

    m = Magika()

    _ = m.identify_path(test_path)
    _ = m.identify_paths([test_path])
    _ = m.identify_bytes(test_path.read_bytes())
    with open(test_path, "rb") as f:
        _ = m.identify_stream(f)


@pytest.mark.smoketest
def test_magika_module_with_explicit_model_dir() -> None:
    model_dir = utils.get_default_model_dir()
    test_path = utils.get_one_basic_test_file_path()

    m = Magika(model_dir=model_dir)

    _ = m.identify_path(test_path)
    _ = m.identify_paths([test_path])
    _ = m.identify_bytes(test_path.read_bytes())
    with open(test_path, "rb") as f:
        _ = m.identify_stream(f)


def test_magika_module_with_basic_tests_by_paths() -> None:
    tests_paths = utils.get_basic_test_files_paths()

    m = Magika()
    results = m.identify_paths(tests_paths)
    check_results_vs_expected_results(tests_paths, results)


def test_magika_module_with_basic_tests_by_path() -> None:
    tests_paths = utils.get_basic_test_files_paths()

    m = Magika()

    for test_path in tests_paths:
        result = m.identify_path(test_path)
        check_result_vs_expected_result(test_path, result)


def test_magika_module_with_basic_tests_by_bytes() -> None:
    tests_paths = utils.get_basic_test_files_paths()

    m = Magika()

    for test_path in tests_paths:
        content = test_path.read_bytes()
        result = m.identify_bytes(content)
        check_result_vs_expected_result(
            test_path, result, expected_result_path=Path("-")
        )


def test_magika_module_with_basic_tests_by_stream() -> None:
    tests_paths = utils.get_basic_test_files_paths()

    m = Magika()

    for test_path in tests_paths:
        with open(test_path, "rb") as f:
            result = m.identify_stream(f)
        check_result_vs_expected_result(
            test_path, result, expected_result_path=Path("-")
        )


def test_magika_module_with_all_models() -> None:
    tests_paths = utils.get_basic_test_files_paths()

    models_dir = utils.get_models_dir()
    for model_dir in models_dir.iterdir():
        m = Magika(model_dir=model_dir)
        for test_path in tests_paths:
            result = m.identify_path(test_path)
            check_result_vs_expected_result(test_path, result)


def test_magika_module_with_previously_missdetected_samples() -> None:
    model_dir = utils.get_default_model_dir()
    tests_paths = utils.get_previously_missdetected_files_paths()

    m = Magika(model_dir=model_dir)
    results = m.identify_paths(tests_paths)
    check_results_vs_expected_results(tests_paths, results)


def test_magika_module_with_empty_content() -> None:
    m = Magika()

    empty_content = b""

    res = m.identify_bytes(empty_content)
    assert res.ok
    assert res.path == Path("-")
    assert res.prediction.dl.label == ContentTypeLabel.UNDEFINED
    assert res.prediction.output.label == ContentTypeLabel.EMPTY
    assert res.prediction.score == 1.0

    with tempfile.TemporaryDirectory() as td:
        tf_path = Path(td) / "empty.dat"
        tf_path.write_bytes(empty_content)
        res = m.identify_path(tf_path)
        assert res.path == tf_path
        assert res.ok
        assert res.prediction.dl.label == ContentTypeLabel.UNDEFINED
        assert res.prediction.output.label == ContentTypeLabel.EMPTY
        assert res.prediction.score == 1.0

    res = m.identify_stream(io.BytesIO(b""))
    assert res.path == Path("-")
    assert res.ok
    assert res.prediction.dl.label == ContentTypeLabel.UNDEFINED
    assert res.prediction.output.label == ContentTypeLabel.EMPTY
    assert res.prediction.score == 1.0


def test_magika_module_with_short_content() -> None:
    m = Magika()

    text_content = b"asd"
    binary_content = b"\x80\x80\x80"

    for content, expected_ct_label in zip(
        [text_content, binary_content],
        [ContentTypeLabel.TXT, ContentTypeLabel.UNKNOWN],
    ):
        with tempfile.TemporaryDirectory() as td:
            # prediction via path
            tf_path = Path(td) / "file.txt"
            tf_path.write_bytes(content)
            res = m.identify_path(tf_path)
            assert res.path == tf_path
            assert res.ok
            assert res.prediction.dl.label == ContentTypeLabel.UNDEFINED
            assert res.prediction.output.label == expected_ct_label
            assert res.prediction.score == 1.0

            # prediction via bytes
            res = m.identify_bytes(content)
            assert res.path == Path("-")
            assert res.ok
            assert res.prediction.dl.label == ContentTypeLabel.UNDEFINED
            assert res.prediction.output.label == expected_ct_label
            assert res.prediction.score == 1.0

            # prediction via stream
            res = m.identify_stream(io.BytesIO(content))
            assert res.path == Path("-")
            assert res.ok
            assert res.prediction.dl.label == ContentTypeLabel.UNDEFINED
            assert res.prediction.output.label == expected_ct_label
            assert res.prediction.score == 1.0


def test_magika_module_with_python_and_non_python_content() -> None:
    python_content = (
        b"import flask\nimport requests\n\ndef foo(a):\n    print(f'Test {a}')\n"
    )
    non_python_content = b"clearly not python"

    m = Magika()

    res = m.identify_bytes(python_content)
    assert res.ok
    assert res.prediction.output.label == ContentTypeLabel.PYTHON

    res = m.identify_bytes(non_python_content)
    assert res.ok
    assert res.prediction.output.label == ContentTypeLabel.TXT


def test_magika_module_identify_stream_does_not_alter_position() -> None:
    m = Magika()

    contents = [
        b"",
        b"short",
        b"A" * 100,
        b"A" * 1000,
        b"A" * 10000,
    ]
    for content in contents:
        stream = io.BytesIO(content)
        # seek to a specific non-special position
        pos = min(2, len(content))
        stream.seek(pos)
        res = m.identify_stream(stream)
        assert res.ok
        assert stream.tell() == pos


def test_magika_module_with_whitespaces() -> None:
    m = Magika()

    ws_nums = sorted(
        {
            1,
            m._model_config.min_file_size_for_dl - 1,
            m._model_config.min_file_size_for_dl,
            m._model_config.min_file_size_for_dl + 1,
            m._model_config.beg_size - 1,
            m._model_config.beg_size,
            m._model_config.beg_size + 1,
            m._model_config.end_size - 1,
            m._model_config.end_size,
            m._model_config.end_size + 1,
            m._model_config.beg_size + m._model_config.end_size - 1,
            m._model_config.beg_size + m._model_config.end_size,
            m._model_config.beg_size + m._model_config.end_size + 1,
            m._model_config.beg_size + m._model_config.end_size + 1,
            m._model_config.block_size - 1,
            m._model_config.block_size,
            m._model_config.block_size + 1,
            2 * m._model_config.block_size - 1,
            2 * m._model_config.block_size,
            2 * m._model_config.block_size + 1,
            4 * m._model_config.block_size - 1,
            4 * m._model_config.block_size,
            4 * m._model_config.block_size + 1,
        }
    )

    for ws_num in ws_nums:
        print(f"Calling indentify_bytes with {ws_num} whitespaces")
        content = b" " * ws_num
        res = m.identify_bytes(content)
        assert (
            res.ok
            and res.dl.label == ContentTypeLabel.UNDEFINED
            and res.output.label == ContentTypeLabel.TXT
        )
        res = m.identify_stream(io.BytesIO(content))
        assert (
            res.ok
            and res.dl.label == ContentTypeLabel.UNDEFINED
            and res.output.label == ContentTypeLabel.TXT
        )
        with tempfile.TemporaryDirectory() as td:
            tf_path = Path(td) / "test.bin"
            tf_path.write_bytes(content)
            res = m.identify_path(tf_path)
            assert (
                res.ok
                and res.dl.label == ContentTypeLabel.UNDEFINED
                and res.output.label == ContentTypeLabel.TXT
            )


def test_magika_module_with_different_prediction_modes() -> None:
    model_dir = utils.get_default_model_dir()
    m = Magika(model_dir=model_dir, prediction_mode=PredictionMode.BEST_GUESS)
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.01
    ) == (
        ContentTypeLabel.PYTHON,
        OverwriteReason.NONE,
    )
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.40
    ) == (
        ContentTypeLabel.PYTHON,
        OverwriteReason.NONE,
    )
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.60
    ) == (
        ContentTypeLabel.PYTHON,
        OverwriteReason.NONE,
    )
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.99
    ) == (
        ContentTypeLabel.PYTHON,
        OverwriteReason.NONE,
    )

    m = Magika(model_dir=model_dir, prediction_mode=PredictionMode.MEDIUM_CONFIDENCE)
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.01
    ) == (
        ContentTypeLabel.TXT,
        OverwriteReason.LOW_CONFIDENCE,
    )
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, m._model_config.medium_confidence_threshold - 0.01
    ) == (ContentTypeLabel.TXT, OverwriteReason.LOW_CONFIDENCE)
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.60
    ) == (
        ContentTypeLabel.PYTHON,
        OverwriteReason.NONE,
    )
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.99
    ) == (
        ContentTypeLabel.PYTHON,
        OverwriteReason.NONE,
    )

    m = Magika(model_dir=model_dir, prediction_mode=PredictionMode.HIGH_CONFIDENCE)
    high_confidence_threshold = m._model_config.thresholds.get(
        ContentTypeLabel.PYTHON, m._model_config.medium_confidence_threshold
    )
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.01
    ) == (
        ContentTypeLabel.TXT,
        OverwriteReason.LOW_CONFIDENCE,
    )
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, high_confidence_threshold - 0.01
    ) == (ContentTypeLabel.TXT, OverwriteReason.LOW_CONFIDENCE)
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, high_confidence_threshold + 0.01
    ) == (ContentTypeLabel.PYTHON, OverwriteReason.NONE)
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.99
    ) == (
        ContentTypeLabel.PYTHON,
        OverwriteReason.NONE,
    )

    # test that the default is HIGH_CONFIDENCE
    m = Magika(model_dir=model_dir)
    high_confidence_threshold = m._model_config.thresholds.get(
        ContentTypeLabel.PYTHON, m._model_config.medium_confidence_threshold
    )
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.01
    ) == (
        ContentTypeLabel.TXT,
        OverwriteReason.LOW_CONFIDENCE,
    )
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, high_confidence_threshold - 0.01
    ) == (ContentTypeLabel.TXT, OverwriteReason.LOW_CONFIDENCE)
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, high_confidence_threshold + 0.01
    ) == (ContentTypeLabel.PYTHON, OverwriteReason.NONE)
    assert m._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, 0.99
    ) == (
        ContentTypeLabel.PYTHON,
        OverwriteReason.NONE,
    )


def test_magika_module_overwrite_reason() -> None:
    m_high = Magika(prediction_mode=PredictionMode.HIGH_CONFIDENCE)
    m_medium = Magika(prediction_mode=PredictionMode.MEDIUM_CONFIDENCE)
    m_best = Magika(prediction_mode=PredictionMode.BEST_GUESS)

    python_high_confidence_threshold = m_high._model_config.thresholds.get(
        ContentTypeLabel.PYTHON, m_high._model_config.medium_confidence_threshold
    )
    medium_confidence_threshold = m_medium._model_config.medium_confidence_threshold

    assert m_high._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, python_high_confidence_threshold + 0.01
    ) == (ContentTypeLabel.PYTHON, OverwriteReason.NONE)
    assert m_high._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, python_high_confidence_threshold - 0.01
    ) == (ContentTypeLabel.TXT, OverwriteReason.LOW_CONFIDENCE)

    assert m_medium._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, medium_confidence_threshold + 0.01
    ) == (ContentTypeLabel.PYTHON, OverwriteReason.NONE)
    assert m_medium._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, medium_confidence_threshold - 0.01
    ) == (ContentTypeLabel.TXT, OverwriteReason.LOW_CONFIDENCE)

    assert m_best._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, medium_confidence_threshold + 0.01
    ) == (ContentTypeLabel.PYTHON, OverwriteReason.NONE)
    assert m_best._get_output_label_from_dl_label_and_score(
        ContentTypeLabel.PYTHON, medium_confidence_threshold - 0.01
    ) == (ContentTypeLabel.PYTHON, OverwriteReason.NONE)

    for overwrite_map_ct_key in sorted(m_high._model_config.overwrite_map.keys()):
        overwrite_map_ct_value = m_high._model_config.overwrite_map[
            overwrite_map_ct_key
        ]
        is_overwrite_map_ct_target_text = m_high._cts_infos[
            overwrite_map_ct_value
        ].is_text
        overwrite_map_ct_high_confidence_threshold = (
            m_high._model_config.thresholds.get(
                overwrite_map_ct_key, m_high._model_config.medium_confidence_threshold
            )
        )
        assert m_high._get_output_label_from_dl_label_and_score(
            overwrite_map_ct_key, overwrite_map_ct_high_confidence_threshold + 0.01
        ) == (overwrite_map_ct_value, OverwriteReason.OVERWRITE_MAP)
        assert m_high._get_output_label_from_dl_label_and_score(
            overwrite_map_ct_key, overwrite_map_ct_high_confidence_threshold - 0.01
        ) == (
            ContentTypeLabel.TXT
            if is_overwrite_map_ct_target_text
            else ContentTypeLabel.UNKNOWN,
            OverwriteReason.LOW_CONFIDENCE,
        )

    for generic_ct in [ContentTypeLabel.TXT, ContentTypeLabel.UNKNOWN]:
        generic_type_high_confidence_threshold = m_high._model_config.thresholds.get(
            generic_ct,
            m_high._model_config.medium_confidence_threshold,
        )
        assert m_high._get_output_label_from_dl_label_and_score(
            generic_ct,
            generic_type_high_confidence_threshold - 0.01,
        ) == (generic_ct, OverwriteReason.NONE)
        assert m_medium._get_output_label_from_dl_label_and_score(
            generic_ct, medium_confidence_threshold - 0.01
        ) == (generic_ct, OverwriteReason.NONE)


def test_magika_module_with_directory() -> None:
    m = Magika()

    with tempfile.TemporaryDirectory() as td:
        td_path = Path(td)
        res = m.identify_path(td_path)
        assert res.path == td_path
        assert res.ok
        assert res.prediction.dl.label == ContentTypeLabel.UNDEFINED
        assert res.prediction.output.label == ContentTypeLabel.DIRECTORY
        assert res.prediction.score == 1.0


def test_magika_module_multiple_copies_of_the_same_file() -> None:
    with tempfile.TemporaryDirectory() as td:
        test_path = Path(td) / "test.txt"
        test_path.write_text("test")

        test_paths = [test_path] * 3

        m = Magika()
        results = m.identify_paths(test_paths)
        assert len(results) == len(test_paths)
        for result in results:
            assert result.path == test_path
            assert result.ok
            assert result.prediction.output.label == ContentTypeLabel.TXT


def test_magika_module_with_symlink() -> None:
    with tempfile.TemporaryDirectory() as td:
        test_path = Path(td) / "test.txt"
        test_path.write_text("test")

        symlink_path = Path(td) / "symlink-test.txt"
        symlink_path.symlink_to(test_path)

        m = Magika()
        res = m.identify_path(test_path)
        assert res.path == test_path
        assert res.ok
        assert res.prediction.output.label == ContentTypeLabel.TXT
        res = m.identify_path(symlink_path)
        assert res.path == symlink_path
        assert res.ok
        assert res.prediction.output.label == ContentTypeLabel.TXT

        m = Magika(no_dereference=True)
        res = m.identify_path(test_path)
        assert res.path == test_path
        assert res.ok
        assert res.prediction.output.label == ContentTypeLabel.TXT
        res = m.identify_path(symlink_path)
        assert res.path == symlink_path
        assert res.ok
        assert res.prediction.output.label == ContentTypeLabel.SYMLINK


def test_magika_module_with_non_existing_file() -> None:
    m = Magika()

    with tempfile.TemporaryDirectory() as td:
        non_existing_path = Path(td) / "non_existing.txt"

        res = m.identify_path(non_existing_path)
        assert res.path == non_existing_path
        assert not res.ok
        assert res.status == Status.FILE_NOT_FOUND_ERROR


def test_magika_module_with_permission_error() -> None:
    m = Magika()

    with tempfile.TemporaryDirectory() as td:
        unreadable_test_path = Path(td) / "test.txt"
        unreadable_test_path.write_text("text")

        unreadable_test_path.chmod(0o000)

        res = m.identify_path(unreadable_test_path)
        assert res.path == unreadable_test_path
        assert not res.ok
        assert res.status == Status.PERMISSION_ERROR

    # Check that an empty, non-accessible file is marked as "permission error".
    # Note that on some file-systems, one can read the file size even without
    # read permission, and it would thus be possible to return "empty" (this is
    # what we were actually doing in the past). However, returning
    # "permission_error" makes the expected behavior consistent across file
    # systems and it simplifies the implementation.
    with tempfile.TemporaryDirectory() as td:
        unreadable_test_path = Path(td) / "test.txt"
        unreadable_test_path.write_text("")

        unreadable_test_path.chmod(0o000)

        res = m.identify_path(unreadable_test_path)
        assert res.path == unreadable_test_path
        assert not res.ok
        assert res.status == Status.PERMISSION_ERROR


@pytest.mark.skip
def test_magika_module_with_really_many_files() -> None:
    test_file_path = utils.get_one_basic_test_file_path()

    m = Magika()

    for n in [10000]:
        test_files_paths = [test_file_path] * n

        results = m.identify_paths(test_files_paths)
        for result in results:
            assert result.ok
            # TODO: add more checks


@pytest.mark.slow
def test_magika_module_with_big_file() -> None:
    def signal_handler(signum: int, frame: Any) -> None:
        raise Exception("Timeout")

    signal.signal(signal.SIGALRM, signal_handler)

    # It should take much less than this, but pytest weird scheduling sometimes
    # creates unexpected slow downs.
    timeout = 2

    m = Magika()

    for sample_size in [1000, 10000, 1_000_000, 1_000_000_000, 10_000_000_000]:
        with tempfile.TemporaryDirectory() as td:
            sample_path = Path(td) / "sample.dat"
            utils.write_random_file_with_size(sample_path, sample_size)
            print(f"Starting running Magika with a timeout of {timeout}")
            signal.alarm(timeout)
            res = m.identify_path(sample_path)
            assert res.ok
            signal.alarm(0)
            print("Done running Magika")


def test_api_call_with_bad_types() -> None:
    m = Magika()

    _ = m.identify_path(Path("/non_existing.txt"))
    _ = m.identify_path("/non_existing.txt")
    with pytest.raises(TypeError):
        _ = m.identify_path(b"/non_existing.txt")  # type: ignore[arg-type]

    _ = m.identify_paths([Path("/non_existing.txt")])
    _ = m.identify_paths(["/non_existing.txt"])
    _ = m.identify_paths([Path("/non_existing.txt"), Path("/not_existing2.txt")])
    _ = m.identify_paths([Path("/non_existing.txt"), "/not_existing2.txt"])
    _ = m.identify_paths(["/non_existing.txt", "/not_existing2.txt"])
    with pytest.raises(TypeError):
        _ = m.identify_paths(Path("/non_existing.txt"))  # type: ignore[arg-type]
    with pytest.raises(TypeError):
        _ = m.identify_paths([b"/non_existing.txt"])  # type: ignore[list-item]
    with pytest.raises(TypeError):
        _ = m.identify_paths([Path("/non_existing.txt"), b"/not_existing2.txt"])  # type: ignore[list-item]

    _ = m.identify_bytes(b"bytes content")
    with pytest.raises(TypeError):
        _ = m.identify_bytes("str content")  # type: ignore[arg-type]

    _ = m.identify_stream(io.BytesIO(b"bytes stream content"))
    with pytest.raises(TypeError):
        _ = m.identify_stream(io.StringIO("str stream content"))  # type: ignore[arg-type]
    with pytest.raises(TypeError):
        _ = m.identify_stream(b"bytes content")  # type: ignore[arg-type]
    with pytest.raises(TypeError):
        _ = m.identify_stream("str content")  # type: ignore[arg-type]


def test_access_magika_result_and_prediction():
    m = Magika()

    res = m.identify_bytes(b"text")
    assert isinstance(res, MagikaResult)
    assert isinstance(res.path, Path)
    assert isinstance(res.ok, bool)
    assert isinstance(res.status, Status)
    assert isinstance(res.prediction, MagikaPrediction)
    assert isinstance(res.prediction.dl, ContentTypeInfo)
    assert isinstance(res.prediction.output, ContentTypeInfo)
    assert isinstance(res.prediction.score, float)
    # test access to forwarded properties
    assert isinstance(res.dl, ContentTypeInfo)
    assert isinstance(res.output, ContentTypeInfo)
    assert isinstance(res.score, float)
    # test access to non-existing properties
    with pytest.raises(AttributeError):
        _ = res.foo  # type: ignore[attr-defined]
    with pytest.raises(AttributeError):
        _ = res.prediction.foo  # type: ignore[attr-defined]

    res = m.identify_path(Path("/non_existing.txt"))
    assert isinstance(res, MagikaResult)
    assert isinstance(res.path, Path)
    assert isinstance(res.ok, bool)
    assert isinstance(res.status, Status)
    with pytest.raises(ValueError):
        _ = res.prediction
    with pytest.raises(ValueError):
        _ = res.prediction.dl
    with pytest.raises(ValueError):
        _ = res.prediction.output
    with pytest.raises(ValueError):
        _ = res.prediction.score
    with pytest.raises(ValueError):
        _ = res.dl
    with pytest.raises(ValueError):
        _ = res.output
    with pytest.raises(ValueError):
        _ = res.score
    with pytest.raises(AttributeError):
        _ = res.foo  # type: ignore[attr-defined]
    with pytest.raises(ValueError):
        _ = res.prediction.foo  # type: ignore[attr-defined]


def test_access_backward_compatibility_layer() -> None:
    m = Magika()

    res = m.identify_bytes(b"text")
    assert isinstance(res, MagikaResult)
    assert isinstance(res.path, Path)
    assert isinstance(res.ok, bool)
    assert isinstance(res.status, Status)
    assert isinstance(res.prediction, MagikaPrediction)
    assert isinstance(res.prediction.dl, ContentTypeInfo)
    assert isinstance(res.prediction.output, ContentTypeInfo)
    assert isinstance(res.prediction.score, float)

    with pytest.warns(DeprecationWarning):
        assert res.dl.ct_label == res.prediction.dl.label
    with pytest.warns(DeprecationWarning):
        assert res.output.ct_label == res.prediction.output.label

    with pytest.raises(AttributeError):
        _ = res.dl.score
    with pytest.raises(AttributeError):
        _ = res.output.score

    with pytest.warns(DeprecationWarning):
        assert res.dl.magic == res.prediction.dl.description
    with pytest.warns(DeprecationWarning):
        assert res.output.magic == res.prediction.output.description


def test_get_model_and_output_content_types() -> None:
    m = Magika()
    output_content_types = m.get_output_content_types()
    output_content_types_set = set(output_content_types)
    model_content_types = m.get_model_content_types()
    model_content_types_set = set(model_content_types)

    assert isinstance(output_content_types, List)
    assert len(output_content_types) > 0
    assert isinstance(model_content_types, List)
    assert len(model_content_types) > 0

    for ct in output_content_types:
        assert isinstance(ct, ContentTypeLabel)

    # Check for no duplicates
    assert len(output_content_types) == len(output_content_types_set)

    # Check basic properties about special ContentTypeLabel entries
    special_output_content_types = {
        ContentTypeLabel.DIRECTORY,
        ContentTypeLabel.EMPTY,
        ContentTypeLabel.SYMLINK,
        ContentTypeLabel.TXT,
        ContentTypeLabel.UNKNOWN,
    }
    special_model_content_types = {ContentTypeLabel.UNDEFINED}
    assert special_output_content_types.issubset(output_content_types_set)
    assert not special_model_content_types.issubset(output_content_types_set)
    assert special_model_content_types.issubset(model_content_types_set)
    assert not special_output_content_types.issubset(model_content_types_set)

    # Spot check for popular content types
    assert {
        ContentTypeLabel.ELF,
        ContentTypeLabel.PDF,
    }.issubset(output_content_types_set)
    assert {
        ContentTypeLabel.ELF,
        ContentTypeLabel.PDF,
    }.issubset(model_content_types_set)


def test_magika_imports():
    imported_modules = utils.get_imported_objects_after_wildcard()

    # Check that Magika and other public classes are correctly imported
    from magika import (
        ContentTypeInfo,
        ContentTypeLabel,
        Magika,
        MagikaError,
        MagikaPrediction,
        MagikaResult,
        OverwriteReason,
        PredictionMode,
        Status,
    )

    assert imported_modules.get("ContentTypeInfo") == ContentTypeInfo
    assert imported_modules.get("ContentTypeLabel") == ContentTypeLabel
    assert imported_modules.get("Magika") == Magika
    assert imported_modules.get("MagikaError") == MagikaError
    assert imported_modules.get("MagikaPrediction") == MagikaPrediction
    assert imported_modules.get("MagikaResult") == MagikaResult
    assert imported_modules.get("OverwriteReason") == OverwriteReason
    assert imported_modules.get("PredictionMode") == PredictionMode
    assert imported_modules.get("Status") == Status

    # Check that internal classes are not imported
    assert imported_modules.get("ModelFeatures") is None
    assert imported_modules.get("ModelOutput") is None


def get_expected_content_type_label_from_test_file_path(
    test_path: Path,
) -> ContentTypeLabel:
    return ContentTypeLabel(test_path.parent.name)


def check_result_vs_expected_result(
    file_path: Path, result: MagikaResult, expected_result_path: Optional[Path] = None
) -> None:
    if expected_result_path is None:
        expected_result_path = file_path
    assert result.path == expected_result_path
    assert result.ok
    expected_ct_label = get_expected_content_type_label_from_test_file_path(file_path)
    assert result.prediction.output.label == expected_ct_label


def check_results_vs_expected_results(
    files_paths: List[Path], results: List[MagikaResult]
) -> None:
    for file_path, result in zip(files_paths, results):
        check_result_vs_expected_result(file_path, result)


================================================
FILE: python/tests/test_python_magika_client.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import subprocess
from pathlib import Path


def test_python_magika_client() -> None:
    python_root_dir = Path(__file__).parent.parent
    python_magika_client_path = (
        python_root_dir / "src" / "magika" / "cli" / "magika_client.py"
    ).resolve()

    # quick test to check there are no obvious problems
    cmd = [str(python_magika_client_path), "--help"]
    subprocess.run(cmd, capture_output=True, check=True)

    # quick test to check there are no crashes
    cmd = [str(python_magika_client_path), str(python_magika_client_path)]
    subprocess.run(cmd, capture_output=True, check=True)


================================================
FILE: python/tests/utils.py
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import gzip
import math
import random
import string
from pathlib import Path
from typing import Any, Dict, List

import pytest


def get_repo_root_dir() -> Path:
    return Path(__file__).parent.parent.parent.resolve()


def get_tests_data_dir() -> Path:
    repo_root_dir = get_repo_root_dir()
    tests_data_dir = repo_root_dir / "tests_data"
    if tests_data_dir.is_dir():
        return tests_data_dir
    tests_data_dir = repo_root_dir.parent / "tests_data"
    assert tests_data_dir.is_dir()
    return tests_data_dir


def get_basic_tests_files_dir() -> Path:
    tests_files_dir = get_tests_data_dir() / "basic"
    assert tests_files_dir.is_dir()
    return tests_files_dir


def get_mitra_tests_files_dir() -> Path:
    tests_files_dir = get_tests_data_dir() / "mitra"
    assert tests_files_dir.is_dir()
    return tests_files_dir


def get_previously_missdetected_files_dir() -> Path:
    tests_files_dir = get_tests_data_dir() / "previous_missdetections"
    assert tests_files_dir.is_dir()
    return tests_files_dir


def get_basic_test_files_paths() -> List[Path]:
    tests_files_dir = get_basic_tests_files_dir()
    test_files_paths = sorted(filter(lambda p: p.is_file(), tests_files_dir.rglob("*")))
    return test_files_paths


def get_mitra_test_files_paths() -> List[Path]:
    tests_files_dir = get_mitra_tests_files_dir()
    test_files_paths = sorted(filter(lambda p: p.is_file(), tests_files_dir.rglob("*")))
    return test_files_paths


def get_previously_missdetected_files_paths() -> List[Path]:
    tests_files_dir = get_previously_missdetected_files_dir()
    test_files_paths = sorted(filter(lambda p: p.is_file(), tests_files_dir.rglob("*")))
    return test_files_paths


def get_reference_features_extraction_examples_path() -> Path:
    return get_tests_data_dir() / "reference" / "features_extraction_examples.json.gz"


def get_reference_for_inference_examples_by_path_path(model_name: str) -> Path:
    return (
        get_tests_data_dir()
        / "reference"
        / f"{model_name}-inference_examples_by_path.json.gz"
    )


def get_reference_for_inference_examples_by_content_path(model_name: str) -> Path:
    return (
        get_tests_data_dir()
        / "reference"
        / f"{model_name}-inference_examples_by_content.json.gz"
    )


def get_one_basic_test_file_path() -> Path:
    return get_basic_test_files_paths()[0]


def get_random_ascii_bytes(size: int) -> bytes:
    return bytes(
        [
            random.choice(bytes(string.printable[:62].encode("ascii")))
            for _ in range(size)
        ]
    )


def get_random_bytes(size: int) -> bytes:
    return bytes([random.choice(range(256)) for _ in range(size)])


def get_lines_from_stream(stream: str) -> List[str]:
    candidates = stream.split("\n")
    lines = []
    for line in candidates:
        line = line.strip()
        if line == "":
            continue
        lines.append(line)
    return lines


def write_random_file_with_size(sample_path: Path, sample_size: int) -> None:
    print(f"Writing random file at {str(sample_path)} with size {sample_size}")
    assert not sample_path.is_file()
    block_size = 1024 * 1024 * 1024  # 1GB
    with open(sample_path, "wb") as f:
        if sample_size > block_size:
            for _ in range(sample_size // block_size):
                f.write(b"A" * block_size)
            if sample_size % block_size != 0:
                f.write(b"A" * (sample_size % block_size))
        else:
            f.write(b"A" * sample_size)
    print("Random file created")


def get_models_dir() -> Path:
    return Path(__file__).parent.parent / "src" / "magika" / "models"


def get_default_model_dir() -> Path:
    from magika.magika import Magika

    return get_models_dir() / Magika._get_default_model_name()


def generate_whitespaces(size: int) -> bytes:
    whitespaces = string.whitespace
    ws_len = len(whitespaces)
    return bytes([ord(whitespaces[idx % ws_len]) for idx in range(size)])


def generate_pattern(size: int, only_printable: bool) -> bytes:
    """Generate a pattern we can use to test features extraction"""

    if only_printable:
        chars = string.printable[: 10 + 26 * 2]
        base_pattern = chars.encode("ascii")
    else:
        base_pattern = bytes(range(256))

    base_pattern_len = len(base_pattern)
    pattern = (base_pattern * int(math.ceil(size / base_pattern_len)))[:size]
    assert len(pattern) == size
    return pattern


def gzip_compress(content: bytes) -> bytes:
    return gzip.compress(content, mtime=0.0)


def gzip_decompress(content: bytes) -> bytes:
    return gzip.decompress(content)


def get_imported_objects_after_wildcard() -> Dict[str, Any]:
    # Execute 'from magika import *' in a new, clean namespace
    namespace: Dict[str, Any] = {}
    try:
        exec("from magika import *", globals(), namespace)
    except ImportError as e:
        pytest.fail(f"Could not import module 'magika': {e}")
    return namespace


================================================
FILE: rust/.gitignore
================================================
/target/


================================================
FILE: rust/README.md
================================================
This directory contains the Rust crates and their tools. It has the following structure:
- The `cli` directory contains the Magika Rust CLI. It is published on crates.io as `magika-cli`. It
  can be compiled with `cargo build --release` from the `cli` directory. The output binary will be
  `target/release/magika`.
- The `lib` directory contains the Magika Rust library. It is published on crates.io as `magika`.
- The `gen` directory is for maintainers when a new model is available.
- The `test.sh` script tests the crates listed above. It runs as part of the Github continuous
  integration.
- The `sync.sh` script updates the library when a new model is available using the `gen` crate.
- The `publish.sh` script prepares the library and the CLI for publishing to crates.io. It generates
  a commit that must be merged first.
- The `color.sh` is a shell library for the scripts above.
- The remaining files have the usual meaning associated to their name.


================================================
FILE: rust/changelog.sh
================================================
#!/bin/sh
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
. ./color.sh

fail() {
  kind=$1
  dir=$2
  case $kind in
    stale) message="Some changes have not been logged." ;;
    format) message="This line should be an H2 version." ;;
    diff) message="This version differs from the Cargo.toml file." ;;
    *) error "Unsupported kind '$kind'" ;;
  esac
  if [ -z "$CI" ]
  then error "$message"
  else echo "::warning file=rust/$dir/CHANGELOG.md,line=3::$message"
  fi
}

for dir in lib cli; do
  ( cd $dir
    info "Checking $dir"
    ref=$(git log -n1 --pretty=format:%H origin/main.. -- CHANGELOG.md)
    [ -n "$ref" ] || ref=origin/main
    git diff --quiet $ref -- Cargo.toml src || fail stale $dir
    cver="$(sed -n '3s/^## //p' CHANGELOG.md)"
    [ -n "$cver" ] || fail format $dir
    pver="$(sed -n '/^\[package]$/,/^$/{s/^version = "\(.*\)"$/\1/p}' Cargo.toml)"
    [ "$pver" = "$cver" ] || fail diff $dir
  )
done


================================================
FILE: rust/cli/CHANGELOG.md
================================================
# Changelog

## 1.0.3-dev

### Patch

- Update dependencies
- Fix new clippy lints

## 1.0.2

### Patch

- Update dependencies
- Enable full LTO for the release profile

## 1.0.1

### Patch

- Update dependencies

## 1.0.0

### Patch

- Change description
- Update dependencies

## 0.1.4

### Minor

- Use true colors when available

### Patch

- Dissociate repository from published content (see `publish.sh` script)
- Remove `package.metadata.deb`

## 0.1.3

### Minor

- Change performance tuning configuration

### Patch

- Add `package.metadata.deb` for `cargo-deb` customization
- Update dependencies

## 0.1.2

### Minor

- Use the `standard_v3_3` model instead of `standard_v3_2` (see [model changelog])
- Do not print the low-confidence warning if the content type was simply overwritten

### Patch

- Update dependencies

## 0.1.1

### Minor

- Use the `standard_v3_2` model instead of `standard_v3_1` (see [model changelog])

## 0.1.0

No changes.

## 0.1.0-rc.5

### Minor

- Use the `standard_v3_1` model instead of `standard_v3_0` (see [model changelog])

## 0.1.0-rc.4

### Minor

- Update the model thresholds

## 0.1.0-rc.3

### Minor

- Use the `standard_v3_0` model instead of `standard_v2_1` (see [model changelog])

### Patch

- Update dependencies

## 0.1.0-rc.2

### Minor

- Exit with non-zero code if at least one error was encountered (fixes #780)

### Patch

- Update dependencies

## 0.1.0-rc.1

### Minor

- Print model version with `--version`
- Change model from `standard_v2_0` to `standard_v2_1`

### Patch

- Fix running on MacOS by defaulting `--intra-threads` to 4
- Fix the `--version` binary name from `magika-cli` to `magika`
- Make sure ONNX Runtime telemetry is disabled
- Change the default of the hidden flag `--num-tasks` from 1 to the number of CPUs

## 0.1.0-rc.0

This version is the initial implementation and should be considered unstable. In particular, it
ships a new model in comparison to the Python binary and we would love feedback on
[GitHub](https://github.com/google/magika/issues).

## 0.0.0

This version is a placeholder and does not expose anything.

[model changelog]: https://github.com/google/magika/blob/main/assets/models/CHANGELOG.md


================================================
FILE: rust/cli/Cargo.toml
================================================
[package]
name = "magika"
version = "1.0.3-dev"
authors = ["Magika Developers "]
license = "Apache-2.0"
edition = "2021"
description = "Determines file content types using AI"
repository = "https://github.com/google/magika"
homepage = "https://securityresearch.google/magika"
keywords = ["cli", "file", "magic"]
categories = ["command-line-utilities", "filesystem", "parser-implementations"]
include = ["/LICENSE", "/src"]

[dependencies]
anyhow = "1.0.86"
async-channel = "2.3.1"
clap = { version = "4.5.9", features = ["cargo", "derive", "string"] }
colored = "3.0.0"
magika_lib = { package = "magika", version = "=1.1.0-dev", path = "../lib", features = ["serde"] }
num_cpus = "1.16.0"
ort = "=2.0.0-rc.12"
serde = { version = "1.0.204", features = ["derive"] }
serde_json = "1.0.120"
tokio = { version = "1.43.1", features = ["full"] }

[profile.release]
codegen-units = 1
lto = true

[profile.dist]
inherits = "release"


================================================
FILE: rust/cli/README.md
================================================
# Magika CLI

This binary crate implements a command-line interface (CLI) to the library crate
[magika](https://crates.io/crates/magika) which provides file content type detection using AI.

## Disclaimer

This project is not an official Google project. It is not supported by Google and Google
specifically disclaims all warranties as to its quality, merchantability, or fitness for a
particular purpose.

The `magika` library and this `magika-cli` binary are still unstable (as indicated by the major
version of zero) and new versions might introduce breaking changes (all changes will follow [cargo
semver compatibility](https://doc.rust-lang.org/cargo/reference/semver.html)). In particular,
version 0.1.0-rc.0 ships a new model in comparison to the Python binary and we would love feedback
on [GitHub](https://github.com/google/magika/issues).

## Installation

You can install the latest version from the `magika` python package:

```shell
pipx install magika
```

You can install the latest version from a shell using `curl`:

```shell
curl -LsSf https://securityresearch.google/magika/install.sh | sh
```

You can install the latest version from a shell using `wget`:

```shell
wget -qO- https://securityresearch.google/magika/install.sh | sh
```

You can install the latest version from a powershell:

```shell
powershell -ExecutionPolicy Bypass -c "irm https://securityresearch.google/magika/install.ps1 | iex"
```

You can install the latest version from crates.io:

```shell
cargo install --locked magika-cli
```

It is also possible to install from the git repository, in which case the version (accessible with
`magika --version`) will be suffixed by `-dev` (e.g. `0.1.0-dev`) to indicate that the binary is the
development version of the version prefix (e.g. `0.1.0` for the previous example).

To install the latest version from the git repository:

```shell
cargo install --locked --git=https://github.com/google/magika.git magika-cli
```

To install from a local clone of the git repository (possibly with custom changes):

```shell
git clone https://github.com/google/magika.git
cd magika
cargo install --locked --path=rust/cli
```

## Examples

```shell
% cd tests_data/basic && magika -r * | head
asm/code.asm: Assembly (code)
batch/simple.bat: DOS batch file (code)
c/code.c: C source (code)
css/code.css: CSS source (code)
csv/magika_test.csv: CSV document (code)
dockerfile/Dockerfile: Dockerfile (code)
docx/doc.docx: Microsoft Word 2007+ document (document)
docx/magika_test.docx: Microsoft Word 2007+ document (document)
eml/sample.eml: RFC 822 mail (text)
empty/empty_file: Empty file (inode)
```

```shell
% magika ./tests_data/basic/python/code.py --json
[
  {
    "path": "./tests_data/basic/python/code.py",
    "result": {
      "status": "ok",
      "value": {
        "dl": {
          "description": "Python source",
          "extensions": [
            "py",
            "pyi"
          ],
          "group": "code",
          "is_text": true,
          "label": "python",
          "mime_type": "text/x-python"
        },
        "output": {
          "description": "Python source",
          "extensions": [
            "py",
            "pyi"
          ],
          "group": "code",
          "is_text": true,
          "label": "python",
          "mime_type": "text/x-python"
        },
        "score": 0.996999979019165
      }
    }
  }
]
```

```shell
% cat tests_data/basic/ini/doc.ini | magika -
-: INI configuration file (text)
```

```shell
% magika --help
Determines file content types using AI

Usage: magika [OPTIONS] [PATH]...

Arguments:
  [PATH]...
          List of paths to the files to analyze.

          Use a dash (-) to read from standard input (can only be used once).

Options:
  -r, --recursive
          Identifies files within directories instead of identifying the directory itself

      --no-dereference
          Identifies symbolic links as is instead of identifying their content by following them

      --colors
          Prints with colors regardless of terminal support

      --no-colors
          Prints without colors regardless of terminal support

  -s, --output-score
          Prints the prediction score in addition to the content type

  -i, --mime-type
          Prints the MIME type instead of the content type description

  -l, --label
          Prints a simple label instead of the content type description

      --json
          Prints in JSON format

      --jsonl
          Prints in JSONL format

      --format 
          Prints using a custom format (use --help for details).

          The following placeholders are supported:

            %p  The file path
            %l  The unique label identifying the content type
            %d  The description of the content type
            %g  The group of the content type
            %m  The MIME type of the content type
            %e  Possible file extensions for the content type
            %s  The score of the content type for the file
            %S  The score of the content type for the file in percent
            %b  The model output if overruled (empty otherwise)
            %%  A literal %

  -h, --help
          Print help (see a summary with '-h')

  -V, --version
          Print version
```


See the [docs on Magika's core
concepts](https://securityresearch.google/magika/core-concepts/how-magika-works/) for more details
about the output format and other important aspects.


================================================
FILE: rust/cli/output
================================================
+ magika rust/code.rs
rust/code.rs: Rust source (code)
+ env -u COLORTERM magika rust/code.rs --colors
rust/code.rs: Rust source (code)
+ magika rust/code.rs --output-score
rust/code.rs: Rust source (code) 57%
+ magika rust/code.rs --json
[
  {
    "path": "rust/code.rs",
    "result": {
      "status": "ok",
      "value": {
        "dl": {
          "description": "Rust source",
          "extensions": [
            "rs"
          ],
          "group": "code",
          "is_text": true,
          "label": "rust",
          "mime_type": "application/x-rust"
        },
        "output": {
          "description": "Rust source",
          "extensions": [
            "rs"
          ],
          "group": "code",
          "is_text": true,
          "label": "rust",
          "mime_type": "application/x-rust"
        },
        "score": 0.5759999752044678
      }
    }
  }
]
+ magika rust/code.rs python/code.py --json
[
  {
    "path": "rust/code.rs",
    "result": {
      "status": "ok",
      "value": {
        "dl": {
          "description": "Rust source",
          "extensions": [
            "rs"
          ],
          "group": "code",
          "is_text": true,
          "label": "rust",
          "mime_type": "application/x-rust"
        },
        "output": {
          "description": "Rust source",
          "extensions": [
            "rs"
          ],
          "group": "code",
          "is_text": true,
          "label": "rust",
          "mime_type": "application/x-rust"
        },
        "score": 0.5759999752044678
      }
    }
  },
  {
    "path": "python/code.py",
    "result": {
      "status": "ok",
      "value": {
        "dl": {
          "description": "Python source",
          "extensions": [
            "py",
            "pyi"
          ],
          "group": "code",
          "is_text": true,
          "label": "python",
          "mime_type": "text/x-python"
        },
        "output": {
          "description": "Python source",
          "extensions": [
            "py",
            "pyi"
          ],
          "group": "code",
          "is_text": true,
          "label": "python",
          "mime_type": "text/x-python"
        },
        "score": 0.996999979019165
      }
    }
  }
]
+ magika rust/code.rs --jsonl
{"path":"rust/code.rs","result":{"status":"ok","value":{"dl":{"description":"Rust source","extensions":["rs"],"group":"code","is_text":true,"label":"rust","mime_type":"application/x-rust"},"output":{"description":"Rust source","extensions":["rs"],"group":"code","is_text":true,"label":"rust","mime_type":"application/x-rust"},"score":0.5759999752044678}}}
+ magika rust/code.rs python/code.py --jsonl
{"path":"rust/code.rs","result":{"status":"ok","value":{"dl":{"description":"Rust source","extensions":["rs"],"group":"code","is_text":true,"label":"rust","mime_type":"application/x-rust"},"output":{"description":"Rust source","extensions":["rs"],"group":"code","is_text":true,"label":"rust","mime_type":"application/x-rust"},"score":0.5759999752044678}}}
{"path":"python/code.py","result":{"status":"ok","value":{"dl":{"description":"Python source","extensions":["py","pyi"],"group":"code","is_text":true,"label":"python","mime_type":"text/x-python"},"output":{"description":"Python source","extensions":["py","pyi"],"group":"code","is_text":true,"label":"python","mime_type":"text/x-python"},"score":0.996999979019165}}}
+ magika rust/code.rs --mime-type
rust/code.rs: application/x-rust


================================================
FILE: rust/cli/publish.sh
================================================
#!/bin/sh
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
. ../color.sh

# This script renames the package to magika-cli and publishes to crates.io.

[ -z "$(git status -s)" ] || error "Repository is not clean"

info "Patch Cargo.toml"
sed -i '2s/"$/-cli"/;s/^magika_lib = { package = "magika",/magika = {/' Cargo.toml
echo >> Cargo.toml
echo '[[bin]]' >> Cargo.toml
echo 'name = "magika"' >> Cargo.toml
echo 'path = "src/main.rs"' >> Cargo.toml

info "Patch src/main.rs"
sed -i 's/^use magika_lib/use magika/;s/self as magika,//' src/main.rs
cargo fmt -- src/main.rs

info "Run tests"
./test.sh

info "Create a temporary commit"
git commit -aqm'Temporary commit to publish magika-cli'

info "Publish to crates.io"
cargo publish

info "Revert the temporary commit"
git reset --hard HEAD~


================================================
FILE: rust/cli/src/main.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::sync::Arc;

use anyhow::{bail, ensure, Result};
use clap::{Args, Parser};
use colored::ColoredString;
use magika_lib::{
    self as magika, ContentType, Features, FeaturesOrRuled, FileType, InferredType,
    OverwriteReason, Session, TypeInfo,
};
use ort::session::builder::GraphOptimizationLevel;
use serde::Serialize;
use tokio::fs::File;
use tokio::io::AsyncReadExt;

/// Determines file content types using AI.
#[derive(Parser)]
#[command(name = "magika", version = Version, arg_required_else_help = true)]
struct Flags {
    /// List of paths to the files to analyze.
    ///
    /// Use a dash (-) to read from standard input (can only be used once).
    path: Vec,

    /// Identifies files within directories instead of identifying the directory itself.
    #[arg(short, long)]
    recursive: bool,

    /// Identifies symbolic links as is instead of identifying their content by following them.
    #[arg(long)]
    no_dereference: bool,

    #[clap(flatten)]
    colors: Colors,

    #[clap(flatten)]
    modifiers: Modifiers,

    #[clap(flatten)]
    format: Format,

    #[clap(flatten)]
    experimental: Experimental,
}

struct Version;
impl clap::builder::IntoResettable for Version {
    fn into_resettable(self) -> clap::builder::Resettable {
        let binary = clap::crate_version!();
        let model = magika::MODEL_NAME;
        clap::builder::Resettable::Value(format!("{binary} {model}").into())
    }
}

#[derive(Args)]
#[group(multiple = false)]
struct Colors {
    /// Prints with colors regardless of terminal support.
    #[arg(long = "colors")]
    enable: bool,

    /// Prints without colors regardless of terminal support.
    #[arg(long = "no-colors")]
    disable: bool,
}

#[derive(Args)]
#[group(conflicts_with = "format")]
struct Modifiers {
    /// Prints the prediction score in addition to the content type.
    #[arg(short = 's', long)]
    output_score: bool,

    /// Prints the MIME type instead of the content type description.
    #[arg(short = 'i', long)]
    mime_type: bool,

    /// Prints a simple label instead of the content type description.
    #[arg(short, long, conflicts_with = "mime_type")]
    label: bool,
}

#[derive(Args)]
#[group(id = "format", multiple = false)]
struct Format {
    /// Prints in JSON format.
    #[arg(long)]
    json: bool,

    /// Prints in JSONL format.
    #[arg(long)]
    jsonl: bool,

    /// Prints using a custom format (use --help for details).
    ///
    /// The following placeholders are supported:
    ///
    ///   %p  The file path
    ///   %l  The unique label identifying the content type
    ///   %d  The description of the content type
    ///   %g  The group of the content type
    ///   %m  The MIME type of the content type
    ///   %e  Possible file extensions for the content type
    ///   %s  The score of the content type for the file
    ///   %S  The score of the content type for the file in percent
    ///   %b  The model output if overruled (empty otherwise)
    ///   %%  A literal %
    #[arg(long = "format", verbatim_doc_comment)]
    custom: Option,
}

#[derive(Args)]
struct Experimental {
    /// Number of files to identify in a single inference.
    #[arg(hide = true, long, default_value = "1")]
    batch_size: usize,

    /// Number of tasks for batch parallelism.
    #[arg(hide = true, long)]
    num_tasks: Option,

    /// Number of threads for graph parallelism (ONNX Runtime configuration).
    ///
    /// This has no effect if --parallel-execution is false or unset.
    #[arg(hide = true, long)]
    inter_threads: Option,

    /// Number of threads for node parallelism (ONNX Runtime configuration).
    #[arg(hide = true, long)]
    intra_threads: Option,

    /// Graph optimization level, from 0 to 3 (ONNX Runtime configuration).
    #[arg(hide = true, long)]
    optimization_level: Option,

    /// Whether to enable parallel execution (ONNX Runtime configuration).
    #[arg(hide = true, long)]
    parallel_execution: Option,
}

#[tokio::main]
async fn main() -> Result<()> {
    let mut flags = Flags::parse();
    ensure!(0 < flags.experimental.batch_size, "--batch-size cannot be zero");
    // If --num-tasks is set, we don't do any guessing.
    let num_tasks = flags.experimental.num_tasks.unwrap_or_else(|| {
        // Otherwise, if --intra-thread is set, we use a single task.
        if flags.experimental.intra_threads.is_some() {
            return 1;
        }
        // Otherwise, we use the minimum number of intra threads (which is 2).
        flags.experimental.intra_threads = Some(2);
        // And as many tasks as physical CPUs with a minimum of 2.
        std::cmp::max(2, num_cpus::get_physical())
    });
    ensure!(0 < num_tasks, "--num-tasks cannot be zero");
    ensure!(
        flags.path.iter().filter(|x| x.to_str() == Some("-")).count() <= 1,
        "only one path can be the standard input"
    );
    let flags = Arc::new(flags);
    if flags.colors.enable {
        colored::control::set_override(true);
    }
    if flags.colors.disable {
        colored::control::set_override(false);
    }
    let (result_sender, mut result_receiver) =
        tokio::sync::mpsc::channel::>(num_tasks * flags.experimental.batch_size);
    let (batch_sender, batch_receiver) = async_channel::bounded::(num_tasks);
    tokio::spawn({
        let flags = flags.clone();
        let result_sender = result_sender.clone();
        async move {
            if let Err(e) = extract_features(&flags, &batch_sender, &result_sender).await {
                result_sender.send(Err(e)).await.unwrap();
            }
        }
    });
    for _ in 0..num_tasks {
        let mut magika = build_session(&flags)?;
        tokio::spawn({
            let batch_receiver = batch_receiver.clone();
            let result_sender = result_sender.clone();
            async move {
                if let Err(e) = infer_batch(&mut magika, &batch_receiver, &result_sender).await {
                    result_sender.send(Err(e)).await.unwrap();
                }
            }
        });
    }
    drop(result_sender);
    if flags.format.json {
        print!("[");
    }
    let mut reorder = Reorder::default();
    let mut errors = false;
    while let Some(response) = result_receiver.recv().await {
        reorder.push(response?);
        while let Some(response) = reorder.pop() {
            errors |= response.result.is_err();
            if flags.format.json {
                if reorder.next != 1 {
                    print!(",");
                }
                for line in serde_json::to_string_pretty(&response.json()?)?.lines() {
                    print!("\n  {line}");
                }
            } else {
                println!("{}", response.format(&flags)?);
            }
        }
    }
    debug_assert!(reorder.is_empty());
    if flags.format.json {
        if reorder.next != 0 {
            println!();
        }
        println!("]");
    }
    if errors {
        std::process::exit(1);
    }
    Ok(())
}

async fn extract_features(
    flags: &Flags, batch_sender: &async_channel::Sender,
    result_sender: &tokio::sync::mpsc::Sender>,
) -> Result<()> {
    let mut paths = Vec::new();
    let mut features = Vec::new();
    let mut flags_paths = flags.path.clone();
    flags_paths.reverse();
    let mut order = 0;
    while let Some(path) = flags_paths.pop() {
        let mut result = None;
        match process_path(flags, &mut flags_paths, &path).await {
            Err(x) => result = Some(Err(x)),
            Ok(ProcessPath::Recursive) => continue,
            Ok(ProcessPath::Ruled(x)) => result = Some(Ok(x)),
            Ok(ProcessPath::Features(x)) => features.push(x),
        };
        match result {
            Some(result) => result_sender.send(Ok(Response { order, path, result })).await?,
            None => paths.push((order, path)),
        }
        order += 1;
        if features.len() == flags.experimental.batch_size {
            batch_sender.send(Batch { paths, features }).await?;
            paths = Vec::new();
            features = Vec::new();
        }
    }
    if !paths.is_empty() {
        batch_sender.send(Batch { paths, features }).await?;
    }
    Ok(())
}

enum ProcessPath {
    Recursive,
    Features(Features),
    Ruled(FileType),
}

impl From for ProcessPath {
    fn from(value: FeaturesOrRuled) -> Self {
        match value {
            FeaturesOrRuled::Features(x) => ProcessPath::Features(x),
            FeaturesOrRuled::Ruled(x) => ProcessPath::Ruled(FileType::Ruled(x)),
        }
    }
}

async fn process_path(
    flags: &Flags, paths: &mut Vec, path: &Path,
) -> magika::Result {
    if path.to_str() == Some("-") {
        let mut stdin = Vec::new();
        tokio::io::stdin().read_to_end(&mut stdin).await?;
        return Ok(FeaturesOrRuled::extract_sync(&stdin[..])?.into());
    }
    let metadata = if flags.no_dereference {
        tokio::fs::symlink_metadata(&path).await?
    } else {
        tokio::fs::metadata(&path).await?
    };
    if metadata.is_dir() {
        return Ok(if flags.recursive {
            let mut entries = tokio::fs::read_dir(&path).await?;
            let mut dir_paths = Vec::new();
            while let Some(entry) = entries.next_entry().await? {
                dir_paths.push(entry.path());
            }
            dir_paths.sort();
            while let Some(path) = dir_paths.pop() {
                paths.push(path);
            }
            ProcessPath::Recursive
        } else {
            ProcessPath::Ruled(FileType::Directory)
        });
    }
    if metadata.is_symlink() {
        return Ok(ProcessPath::Ruled(FileType::Symlink));
    }
    let file = File::open(&path).await?;
    Ok(FeaturesOrRuled::extract_async(file).await?.into())
}

fn build_session(flags: &Flags) -> Result {
    ort::init().with_telemetry(false).commit();
    let mut magika = Session::builder();
    if let Some(inter_threads) = flags.experimental.inter_threads {
        magika = magika.with_inter_threads(inter_threads);
    }
    // Apparently, SetIntraOpNumThreads must be called on MacOS, otherwise we get the following
    // error: intra op thread pool must have at least one thread for RunAsync.
    let intra_threads_default = cfg!(target_os = "macos").then_some(4);
    if let Some(intra_threads) = flags.experimental.intra_threads.or(intra_threads_default) {
        magika = magika.with_intra_threads(intra_threads);
    }
    if let Some(opt_level) = flags.experimental.optimization_level {
        let opt_level = match opt_level {
            0 => GraphOptimizationLevel::Disable,
            1 => GraphOptimizationLevel::Level1,
            2 => GraphOptimizationLevel::Level2,
            3 => GraphOptimizationLevel::Level3,
            _ => bail!("--optimization-level must be 0, 1, 2, or 3"),
        };
        magika = magika.with_optimization_level(opt_level);
    }
    if let Some(parallel_execution) = flags.experimental.parallel_execution {
        magika = magika.with_parallel_execution(parallel_execution);
    }
    Ok(magika.build()?)
}

async fn infer_batch(
    magika: &mut Session, receiver: &async_channel::Receiver,
    sender: &tokio::sync::mpsc::Sender>,
) -> Result<()> {
    while let Ok(Batch { paths, features }) = receiver.recv().await {
        let batch = magika.identify_features_batch_async(&features).await?;
        assert_eq!(batch.len(), paths.len());
        for ((order, path), output) in paths.into_iter().zip(batch) {
            let result = Ok(output);
            sender.send(Ok(Response { order, path, result })).await?;
        }
    }
    Ok(())
}

#[derive(Debug, Default)]
struct Reorder {
    next: usize,
    todo: HashMap,
}

impl Reorder {
    fn is_empty(&self) -> bool {
        self.todo.is_empty()
    }

    fn push(&mut self, response: Response) {
        debug_assert!(self.next <= response.order);
        let prev = self.todo.insert(response.order, response);
        debug_assert!(prev.is_none());
    }

    fn pop(&mut self) -> Option {
        let result = self.todo.remove(&self.next)?;
        self.next += 1;
        Some(result)
    }
}

struct Batch {
    paths: Vec<(usize, PathBuf)>,
    features: Vec,
}

#[derive(Debug)]
struct Response {
    order: usize,
    path: PathBuf,
    result: Result,
}

#[derive(Serialize)]
#[serde(rename_all = "snake_case")]
enum JsonError {
    Unknown,
    FileDoesNotExist,
    PermissionError,
}

#[derive(Serialize)]
struct JsonResult<'a> {
    dl: &'a TypeInfo,
    output: &'a TypeInfo,
    score: f32,
}

impl From for JsonError {
    fn from(value: magika::Error) -> Self {
        match value {
            magika::Error::IOError(x) => match x.kind() {
                ErrorKind::NotFound => JsonError::FileDoesNotExist,
                ErrorKind::PermissionDenied => JsonError::PermissionError,
                _ => JsonError::Unknown,
            },
            _ => JsonError::Unknown,
        }
    }
}

impl Response {
    fn format(self, flags: &Flags) -> Result {
        let mut result = String::new();
        let format = match &flags.format.custom {
            Some(x) => x.clone(),
            None if flags.format.json => unreachable!(),
            None if flags.format.jsonl => {
                return Ok(serde_json::to_string(&self.json()?)?.into());
            }
            None => {
                let mut format = "%p: ".to_string();
                format.push_str(match () {
                    () if flags.modifiers.mime_type => "%m",
                    () if flags.modifiers.label => "%l",
                    () => "%d (%g)",
                });
                format.push_str("%b");
                format.push_str(if flags.modifiers.output_score { " %S" } else { "" });
                format
            }
        };
        let mut format = format.chars();
        loop {
            match format.next() {
                Some('%') => match format.next() {
                    Some('p') => write!(&mut result, "{}", self.path.display())?,
                    Some('l') => write!(&mut result, "{}", self.label())?,
                    Some('d') => write!(&mut result, "{}", self.description())?,
                    Some('g') => write!(&mut result, "{}", self.group())?,
                    Some('m') => write!(&mut result, "{}", self.mime_type())?,
                    Some('e') => write!(&mut result, "{}", join(self.extensions()))?,
                    Some('s') => write!(&mut result, "{:.2}", self.score())?,
                    Some('S') => write!(&mut result, "{}%", (100. * self.score()).trunc())?,
                    Some('b') => {
                        if let Ok(FileType::Inferred(InferredType {
                            content_type: Some((_, OverwriteReason::LowConfidence)),
                            inferred_type,
                            score,
                        })) = &self.result
                        {
                            write!(
                                &mut result,
                                " [Low-confidence model best-guess: {} ({}), score={:.3}]",
                                inferred_type.info().description,
                                inferred_type.info().group,
                                score,
                            )?;
                        }
                    }
                    Some(c) => result.push(c),
                    None => break,
                },
                Some(c) => result.push(c),
                None => break,
            }
        }
        Ok(self.color(result.into()))
    }

    fn json(self) -> Result {
        let path = self.path.to_path_buf();
        let result = match self.result {
            Ok(x) => {
                let dl = match &x {
                    FileType::Inferred(x) => x.inferred_type.info(),
                    _ => ContentType::Undefined.info(),
                };
                let output = x.info();
                let score = (x.score() * 1000.).trunc() / 1000.;
                let value = serde_json::to_value(JsonResult { dl, output, score })?;
                serde_json::json!({ "status": "ok", "value": value })
            }
            Err(error) => serde_json::json!({ "status": JsonError::from(error) }),
        };
        Ok(serde_json::json!({ "path": path, "result": result }))
    }

    fn label(&self) -> &str {
        match &self.result {
            Err(_) => "error",
            Ok(x) => x.info().label,
        }
    }

    fn description(&self) -> Cow<'_, str> {
        match &self.result {
            Err(e) => e.to_string().into(),
            Ok(x) => x.info().description.into(),
        }
    }

    fn group(&self) -> &str {
        match &self.result {
            Err(_) => "error",
            Ok(x) => x.info().group,
        }
    }

    fn mime_type(&self) -> &str {
        match &self.result {
            Err(_) => "error",
            Ok(x) => x.info().mime_type,
        }
    }

    fn extensions(&self) -> &[&str] {
        match &self.result {
            Err(_) => &[],
            Ok(x) => x.info().extensions,
        }
    }

    fn score(&self) -> f32 {
        match &self.result {
            Err(_) => 1.0,
            Ok(x) => x.score(),
        }
    }

    fn color(&self, result: ColoredString) -> ColoredString {
        use colored::Colorize as _;
        // We only use true colors (except for errors). If the terminal doesn't support true colors,
        // the colored crate will automatically choose the closest one.
        match &self.result {
            Err(_) => result.bold().red(),
            Ok(x) => match x.info().group {
                // Tailwind Colors
                "application" => result.truecolor(0xf4, 0x3f, 0x5e), // Rose 500
                "archive" => result.truecolor(0xf5, 0x9e, 0x0b),     // Amber 500
                "audio" => result.truecolor(0x84, 0xcc, 0x16),       // Lime 500
                "code" => result.truecolor(0x8b, 0x5c, 0xf6),        // Violet 500
                "document" => result.truecolor(0x3b, 0x82, 0xf6),    // Blue 500
                "executable" => result.truecolor(0xec, 0x48, 0x99),  // Pink 500
                "image" => result.truecolor(0x06, 0xb6, 0xd4),       // Cyan 500
                "video" => result.truecolor(0x10, 0xb9, 0x81),       // Emerald 500
                _ => result.bold().truecolor(0xcc, 0xcc, 0xcc),
            },
        }
    }
}

fn join>(xs: impl IntoIterator) -> String {
    let mut result = String::new();
    result.push('[');
    for (i, x) in xs.into_iter().enumerate() {
        if i != 0 {
            result.push(',');
        }
        result.push_str(x.as_ref());
    }
    result.push(']');
    result
}


================================================
FILE: rust/cli/test.sh
================================================
#!/bin/sh
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
. ../color.sh

x cargo check
x cargo build --release
x cargo fmt -- --check
x cargo clippy -- --deny=warnings

PATH=$(dirname $PWD)/target/release:$PATH

TEST_SUITES='basic previous_missdetections'
info "Test against the test suites: $TEST_SUITES"
( cd ../../tests_data
  magika --format='%p: %l' --recursive $TEST_SUITES | while read line; do
    file=${line%: *}
    directory=${file%/*}
    expected=${directory##*/}
    actual=${line#*: }
    [ "$expected" = "$actual" ] || error "$file is detected as $actual"
  done
)

# We rely below on the fact that we don't have permission on /etc/shadow.
[ $(id -u) -eq 0 ] && success "No more tests in Docker"

info "Test exit code with at least one error"
test_error() {
  files="$1"
  expected="$2"
  ( set +e
    actual="$(magika $files)"
    code=$?
    [ $code -eq 1 ] || error "invalid exit code for magika $files"
    [ "$actual" = "$expected" ] || error "invalid output for magika $files"
  )
}
test_error '/etc/shadow' "\
/etc/shadow: Permission denied (os error 13) (error)"
test_error 'non_existent src/main.rs' "\
non_existent: No such file or directory (os error 2) (error)
src/main.rs: Rust source (code)"


================================================
FILE: rust/color.sh
================================================
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

x() { ( set -x; "$@"; ); }

color() { echo "[$1m$2: $3"; }
info() { color '1;36' Info "$*"; }
todo() { color '1;33' Todo "$*"; }
success() { color '1;32' Done "$*"; exit 0; }
error() { color '1;31' Error "$*"; exit 1; }


================================================
FILE: rust/gen/Cargo.toml
================================================
[package]
name = "gen"
version = "0.0.0"
edition = "2021"
publish = false

[dependencies]
anyhow = "1.0.82"
serde = { version = "1.0.197", features = ["derive"] }
serde_json = "1.0.114"


================================================
FILE: rust/gen/README.md
================================================
This crate is for maintenance purposes only. It is used to update the Rust library to a new model.
There are 3 files in the Rust library that depend on the model:

- The model itself, `rust/lib/src/model.onnx`, which is a symbolic link to some model under
  `assets/models`, controlled by the `rust/gen/model` symbolic link. Publishing the crate will
  dereference this symbolic link.
- The labels describing the model output, `rust/lib/src/model.rs`, which is generated from the model
  configuration, `rust/gen/model/config.min.json`.
- The list of possible file types, `rust/lib/src/content.rs`, which is generated from the knowledge
  base of content types, `assets/content_types_kb.min.json`.

The purpose of this crate is to generate the last two files. There is a test to make sure that they
are up-to-date. If the test fails, one simply needs to run `./sync.sh` from the `rust` directory to
regenerate them.

An alternative design to generating the files before publishing the crate, would be to publish the
model and Magika configurations and use a build script to generate the files during compilation.
This has a few disadvantages:

- We need to publish the model and Magika configurations which contain more information than needed
  to use the library (and the CLI).
- We need to use a build script, which is frown upon for security reasons, as the entity compiling
  the library or CLI now needs to trust the build script, which can run arbitrary code. This only
  matters when the entity compiling the library or CLI is not the same as the one running the
  library or CLI (e.g. Debian maintainers), since the library and CLI too can run arbitrary code.
- Using a build script also increases compilation time (and compilation complexity) instead of
  having it factored before publishing.


================================================
FILE: rust/gen/content_types
================================================
3gp
ace
ai
aidl
apk
applebplist
appleplist
asm
asp
autohotkey
autoit
awk
batch
bazel
bib
bmp
bzip
c
cab
cat
chm
clojure
cmake
cobol
coff
coffeescript
cpp
crt
crx
cs
csproj
css
csv
dart
deb
dex
dicom
diff
directory
dm
dmg
doc
dockerfile
docx
dsstore
dwg
dxf
elf
elixir
emf
eml
empty
epub
erb
erlang
flac
flv
fortran
gemfile
gemspec
gif
gitattributes
gitmodules
go
gradle
groovy
gzip
h5
handlebars
haskell
hcl
hlp
htaccess
html
icns
ico
ics
ignorefile
ini
internetshortcut
ipynb
iso
jar
java
javabytecode
javascript
jinja
jp2
jpeg
json
jsonl
julia
kotlin
latex
lha
lisp
lnk
lua
m3u
m4
macho
makefile
markdown
matlab
mht
midi
mkv
mp3
mp4
mscompress
msi
mum
npy
npz
nupkg
objectivec
ocaml
odp
ods
odt
ogg
one
onnx
otf
outlook
parquet
pascal
pcap
pdb
pdf
pebin
pem
perl
php
pickle
png
po
postscript
powershell
ppt
pptx
prolog
proteindb
proto
psd
python
pythonbytecode
pytorch
qt
r
randombytes
randomtxt
rar
rdf
rpm
rst
rtf
ruby
rust
scala
scss
sevenzip
sgml
shell
smali
snap
solidity
sql
sqlite
squashfs
srt
stlbinary
stltext
sum
svg
swf
swift
symlink
tar
tcl
textproto
tga
thumbsdb
tiff
toml
torrent
tsv
ttf
twig
txt
typescript
undefined
unknown
vba
vcxproj
verilog
vhdl
vtt
vue
wasm
wav
webm
webp
winregistry
wmf
woff
woff2
xar
xls
xlsb
xlsx
xml
xpi
xz
yaml
yara
zig
zip
zlibstream


================================================
FILE: rust/gen/src/main.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::{BTreeMap, BTreeSet};
use std::fs::File;
use std::io::Write;
use std::path::Path;

use anyhow::{ensure, Context, Result};
use serde::Deserialize;

fn main() -> Result<()> {
    let content_types: BTreeMap =
        serde_json::from_reader(File::open("../../assets/content_types_kb.min.json")?)?;
    let model_name = std::fs::read_link("model")?;
    let model_name = model_name
        .components()
        .next_back()
        .context("model link")?
        .as_os_str()
        .to_str()
        .context("model name")?;
    let model_config = serde_json::from_reader(File::open("model/config.min.json")?)?;
    let content_types = generate_content_types(content_types, model_name, &model_config)?;
    generate_model_config(&content_types, model_config)?;
    Ok(())
}

fn generate_content_types(
    mut content_types: BTreeMap, model_name: &str, model_config: &ModelConfig,
) -> Result> {
    // We only want to generate content types that are already exposed or that are model labels.
    // This is a conservative approach to avoid exposing the whole knowledge base if it contains
    // experimental content types that won't ever be exposed in the future.
    let content_types_content = std::fs::read_to_string("content_types")?;
    let mut labels = content_types_content.lines().collect::>();
    labels.extend(model_config.target_labels_space.iter().map(|x| x.as_str()));
    let mut content_types_file = File::create("content_types")?;
    for label in &labels {
        writeln!(&mut content_types_file, "{label}")?;
    }
    content_types.retain(|x, _| labels.contains(x.as_str()));
    let mut output = create_generated_file("../lib/src/content.rs")?;
    writeln!(output, "use crate::file::TypeInfo;\n")?;
    writeln!(output, "/// Model name (only comparable with equality).")?;
    writeln!(output, "pub const MODEL_NAME: &str = {model_name:?};\n")?;
    writeln!(output, "/// Model major version.")?;
    writeln!(output, "pub const MODEL_MAJOR_VERSION: u32 = {};\n", model_config.version_major)?;
    struct Variant {
        label: String,
        doc: String,
    }
    let mut variants = Vec::new();
    for (label, info) in content_types {
        let ContentType { mime_type, group, description, extensions, is_text } = info.clone();
        let mime_type = mime_type.unwrap_or_else(|| {
            if is_text { "text/plain" } else { "application/octet-stream" }.to_string()
        });
        let group = group.unwrap_or_else(|| "unknown".to_string());
        let description = description.unwrap_or_else(|| label.clone());
        if !matches!(label.as_str(), "directory" | "symlink") {
            variants.push(Variant { label: label.clone(), doc: description.clone() });
        }
        writeln!(output, "pub(crate) static {}: TypeInfo = TypeInfo {{", const_name(&label))?;
        writeln!(output, "    label: {label:?},")?;
        writeln!(output, "    mime_type: {mime_type:?},")?;
        writeln!(output, "    group: {group:?},")?;
        writeln!(output, "    description: {description:?},")?;
        writeln!(output, "    extensions: &{extensions:?},")?;
        writeln!(output, "    is_text: {is_text:?},")?;
        writeln!(output, "}};\n")?;
    }
    writeln!(output, "/// Content types for regular files.")?;
    writeln!(output, "#[derive(Debug, Copy, Clone, PartialEq, Eq)]")?;
    writeln!(output, "#[non_exhaustive]")?;
    writeln!(output, "pub enum ContentType {{")?;
    for Variant { label, doc } in &variants {
        writeln!(output, "    /// {doc}")?;
        writeln!(output, "    {},", enum_name(label))?;
    }
    writeln!(output, "}}\n")?;
    writeln!(output, "impl ContentType {{")?;
    writeln!(output, "    pub(crate) const SIZE: usize = {};\n", variants.len())?;
    writeln!(output, "    /// Returns the content type information.")?;
    writeln!(output, "    pub fn info(self) -> &'static TypeInfo {{")?;
    writeln!(output, "        match self {{")?;
    for Variant { label, .. } in &variants {
        writeln!(
            output,
            "            ContentType::{} => &{},",
            enum_name(label),
            const_name(label),
        )?;
    }
    writeln!(output, "        }}")?;
    writeln!(output, "    }}")?;
    writeln!(output, "}}")?;
    Ok(variants.into_iter().map(|x| x.label).collect())
}

fn generate_model_config(content_types: &[String], model_config: ModelConfig) -> Result<()> {
    let ModelConfig {
        beg_size,
        mid_size,
        end_size,
        use_inputs_at_offsets,
        medium_confidence_threshold,
        min_file_size_for_dl,
        padding_token,
        block_size,
        target_labels_space,
        thresholds,
        overwrite_map,
        protection: _,
        aes_key_hex: _,
        version_major: _,
    } = model_config;
    let mut output = create_generated_file("../lib/src/model.rs")?;
    writeln!(output, "use std::borrow::Cow;\n")?;
    writeln!(output, "use crate::config::ModelConfig;")?;
    writeln!(output, "use crate::ContentType;\n")?;
    writeln!(output, "pub(crate) const CONFIG: ModelConfig = ModelConfig {{")?;
    writeln!(output, "    beg_size: {beg_size},")?;
    ensure!(mid_size == 0, "unsupported mid_size");
    writeln!(output, "    end_size: {end_size},")?;
    ensure!(!use_inputs_at_offsets, "unsupported use_inputs_at_offsets");
    writeln!(output, "    min_file_size_for_dl: {min_file_size_for_dl},")?;
    writeln!(output, "    padding_token: {padding_token},")?;
    writeln!(output, "    block_size: {block_size},")?;
    writeln!(output, "    thresholds: Cow::Borrowed(&THRESHOLDS),")?;
    writeln!(output, "    overwrite_map: Cow::Borrowed(&OVERWRITE_MAP),")?;
    writeln!(output, "}};\n")?;
    let mut thresholds_array = vec![medium_confidence_threshold; content_types.len()];
    for (label, threshold) in thresholds {
        let pos = content_types.iter().position(|x| *x == label).unwrap();
        thresholds_array[pos] = threshold;
    }
    writeln!(output, "#[rustfmt::skip]")?;
    writeln!(output, "const THRESHOLDS: [f32; ContentType::SIZE] = {thresholds_array:?};")?;
    writeln!(output, "const OVERWRITE_MAP: [ContentType; ContentType::SIZE] = [")?;
    let mut overwrite_array = content_types.to_vec();
    for (src, dst) in overwrite_map {
        let pos = content_types.iter().position(|x| *x == src).unwrap();
        overwrite_array[pos] = dst;
    }
    for label in overwrite_array {
        writeln!(output, "    ContentType::{},", enum_name(&label))?;
    }
    writeln!(output, "];\n")?;
    writeln!(output, "#[derive(Debug, Copy, Clone, PartialEq, Eq)]\n#[repr(u32)]")?;
    writeln!(output, "#[allow(dead_code)] // only constructed through transmute")?;
    writeln!(output, "pub(crate) enum Label {{")?;
    for label in &target_labels_space {
        writeln!(output, "    {},", enum_name(label))?;
    }
    writeln!(output, "}}\n")?;
    writeln!(output, "pub(crate) const NUM_LABELS: usize = {};", target_labels_space.len())?;
    writeln!(output, "impl Label {{")?;
    writeln!(output, "    pub(crate) fn content_type(self) -> ContentType {{")?;
    writeln!(output, "        match self {{")?;
    for label in &target_labels_space {
        let name = enum_name(label);
        writeln!(output, "            Label::{name} => ContentType::{name},")?;
    }
    writeln!(output, "        }}")?;
    writeln!(output, "    }}")?;
    writeln!(output, "}}")?;
    Ok(())
}

fn create_generated_file(path: impl AsRef) -> Result {
    let header = std::fs::read_to_string(file!())?;
    let header = header.split("\n\n").next().context("main.rs does not contain an empty line")?;
    let mut output = File::create(path)?;
    writeln!(output, "{header}\n")?;
    writeln!(output, "// DO NOT EDIT, see link below for more information:")?;
    writeln!(output, "// https://github.com/google/magika/tree/main/rust/gen\n")?;
    Ok(output)
}

#[derive(Clone, Deserialize)]
#[serde(deny_unknown_fields)]
struct ContentType {
    mime_type: Option,
    group: Option,
    description: Option,
    extensions: Vec,
    is_text: bool,
}

#[derive(Deserialize)]
#[serde(deny_unknown_fields)]
struct ModelConfig {
    beg_size: usize,
    mid_size: usize,
    end_size: usize,
    use_inputs_at_offsets: bool,
    medium_confidence_threshold: f32,
    min_file_size_for_dl: usize,
    padding_token: i32,
    block_size: usize,
    target_labels_space: Vec,
    thresholds: BTreeMap,
    overwrite_map: BTreeMap,
    #[allow(dead_code)]
    protection: String,
    #[allow(dead_code)]
    aes_key_hex: String,
    version_major: u32,
}

fn enum_name(xs: &str) -> String {
    assert!(xs.is_ascii());
    let mut xs = xs.as_bytes().to_vec();
    match xs[0] {
        b'A'..=b'Z' => (),
        b'a'..=b'z' => xs[0] = xs[0].to_ascii_uppercase(),
        _ => xs.insert(0, b'_'),
    }
    String::from_utf8(xs).unwrap()
}

fn const_name(xs: &str) -> String {
    assert!(xs.is_ascii());
    let mut xs = xs.as_bytes().to_ascii_uppercase();
    if !xs[0].is_ascii_uppercase() {
        xs.insert(0, b'_');
    }
    String::from_utf8(xs).unwrap()
}


================================================
FILE: rust/gen/test.sh
================================================
#!/bin/sh
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
. ../color.sh

x cargo check
x cargo fmt -- --check
x cargo clippy -- --deny=warnings


================================================
FILE: rust/latest.sh
================================================
#!/bin/sh
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
. ./color.sh

# This script updates (or creates if it does not exist) the trampoline release for the website. The
# trampoline release is a "fake" release (which commits doesn't matter) containing a copy of the
# bash and powershell scripts from the latest cli/vX.Y.Z release.

REPO=google/magika
RELEASES=$(gh release list --repo=$REPO --json=tagName |
             tr -d '[{}]' | tr , '\n' | sed 's/^.*"\([^"]*\)"$/\1/')
info "Found the following releases"
echo "$RELEASES"

TAG=$(echo "$RELEASES" | grep cli/ | head -n1)
LATEST=cli-latest
[ -n "$TAG" ] || error "No CLI release found"
info "The latest CLI release is $TAG"

x gh release download --repo=$REPO $TAG --pattern='magika-installer.*'

info "Delete the previous trampoline release, if any"
echo "$RELEASES" | grep -q $LATEST && x gh release delete --repo=$REPO --yes $LATEST

cat >> notes.txt <"]
license = "Apache-2.0"
edition = "2021"
description = "Determines file content types using AI"
repository = "https://github.com/google/magika"
homepage = "https://securityresearch.google/magika"
keywords = ["file", "magic"]
categories = ["command-line-utilities", "filesystem", "parser-implementations"]
include = ["/LICENSE", "/src"]

[package.metadata.docs.rs]
features = ["_doc"]

[features]
serde = ["dep:serde"]
# Internal features.
_doc = ["serde"]
_test = ["ort/download-binaries", "ort/tls-native"]

[dependencies]
ndarray = "0.17.1"
serde = { version = "1.0.204", features = ["derive"], optional = true }
thiserror = "1.0.63"
tokio = { version = "1.43.1", features = ["fs", "io-util"] }

[dependencies.ort]
version = "=2.0.0-rc.12"
default-features = false
features = ["ndarray", "std"]

[dev-dependencies]
data-encoding = "2.6.0"
flate2 = "1.0.30"
serde = { version = "1.0.204", features = ["derive"] }
serde_json = "1.0.120"

[lints.rust]
missing_docs = "warn"
unreachable_pub = "warn"
unused = { level = "warn", priority = -1 }


================================================
FILE: rust/lib/README.md
================================================
# Magika

This library crate provides file content type detection using AI. A command-line interface (CLI) for
this library is provided by the [magika-cli](https://crates.io/crates/magika-cli) binary crate.

## Disclaimer

This project is not an official Google project. It is not supported by Google and Google
specifically disclaims all warranties as to its quality, merchantability, or fitness for a
particular purpose.

This `magika` library and the `magika-cli` binary are still unstable (as indicated by the major
version of zero) and new versions might introduce breaking changes (all changes will follow [cargo
semver compatibility](https://doc.rust-lang.org/cargo/reference/semver.html)). In particular,
version 0.1.0-rc.0 ships a new model in comparison to the Python binary and we would love feedback
on [GitHub](https://github.com/google/magika/issues).


================================================
FILE: rust/lib/src/builder.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use ort::session::builder::GraphOptimizationLevel;

use crate::{Result, Session};

/// Configures and creates a Magika session.
#[derive(Debug, Default)]
pub struct Builder {
    inter_threads: Option,
    intra_threads: Option,
    optimization_level: Option,
    parallel_execution: Option,
}

impl Builder {
    /// Configures the number of threads to parallelize the execution of the graph.
    pub fn with_inter_threads(mut self, num_threads: usize) -> Self {
        self.inter_threads = Some(num_threads);
        self
    }

    /// Configures the number of threads to parallelize the execution within nodes.
    pub fn with_intra_threads(mut self, num_threads: usize) -> Self {
        self.intra_threads = Some(num_threads);
        self
    }

    /// Configures the session optimization level.
    pub fn with_optimization_level(mut self, opt_level: GraphOptimizationLevel) -> Self {
        self.optimization_level = Some(opt_level);
        self
    }

    /// Configures the session parallel execution.
    pub fn with_parallel_execution(mut self, parallel_execution: bool) -> Self {
        self.parallel_execution = Some(parallel_execution);
        self
    }

    /// Consumes the builder to create a Magika session.
    pub fn build(self) -> Result {
        Ok(self.build_()?)
    }

    fn build_(self) -> ort::Result {
        let mut session = ort::session::Session::builder()?;
        let Builder { inter_threads, intra_threads, optimization_level, parallel_execution } = self;
        if let Some(num_threads) = inter_threads {
            session = session.with_inter_threads(num_threads)?;
        }
        if let Some(num_threads) = intra_threads {
            session = session.with_intra_threads(num_threads)?;
        }
        if let Some(opt_level) = optimization_level {
            session = session.with_optimization_level(opt_level)?;
        }
        if let Some(parallel_execution) = parallel_execution {
            session = session.with_parallel_execution(parallel_execution)?;
        }
        let session = session.commit_from_memory(include_bytes!("model.onnx"))?;
        Ok(Session { session })
    }
}


================================================
FILE: rust/lib/src/config.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::borrow::Cow;

use crate::ContentType;

#[derive(Debug)]
pub(crate) struct ModelConfig {
    pub(crate) beg_size: usize,
    pub(crate) end_size: usize,
    pub(crate) min_file_size_for_dl: usize,
    pub(crate) padding_token: i32,
    pub(crate) block_size: usize,
    pub(crate) thresholds: Cow<'static, [f32; ContentType::SIZE]>,
    pub(crate) overwrite_map: Cow<'static, [ContentType; ContentType::SIZE]>,
}

pub(crate) struct SplitFeatures<'a> {
    pub(crate) beg: &'a mut [i32],
    pub(crate) end: &'a mut [i32],
}

impl ModelConfig {
    pub(crate) fn features_size(&self) -> usize {
        self.beg_size + self.end_size
    }

    pub(crate) fn split_features<'a>(&self, features: &'a mut [i32]) -> SplitFeatures<'a> {
        let (beg, features) = features.split_at_mut(self.beg_size);
        let (end, features) = features.split_at_mut(self.end_size);
        debug_assert!(features.is_empty());
        SplitFeatures { beg, end }
    }
}


================================================
FILE: rust/lib/src/content.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// DO NOT EDIT, see link below for more information:
// https://github.com/google/magika/tree/main/rust/gen

use crate::file::TypeInfo;

/// Model name (only comparable with equality).
pub const MODEL_NAME: &str = "standard_v3_3";

/// Model major version.
pub const MODEL_MAJOR_VERSION: u32 = 3;

pub(crate) static _3GP: TypeInfo = TypeInfo {
    label: "3gp",
    mime_type: "video/3gpp",
    group: "video",
    description: "3GPP multimedia file",
    extensions: &["3gp"],
    is_text: false,
};

pub(crate) static ACE: TypeInfo = TypeInfo {
    label: "ace",
    mime_type: "application/x-ace-compressed",
    group: "archive",
    description: "ACE archive",
    extensions: &["ace"],
    is_text: false,
};

pub(crate) static AI: TypeInfo = TypeInfo {
    label: "ai",
    mime_type: "application/pdf",
    group: "document",
    description: "Adobe Illustrator Artwork",
    extensions: &["ai"],
    is_text: false,
};

pub(crate) static AIDL: TypeInfo = TypeInfo {
    label: "aidl",
    mime_type: "text/plain",
    group: "unknown",
    description: "Android Interface Definition Language",
    extensions: &["aidl"],
    is_text: true,
};

pub(crate) static APK: TypeInfo = TypeInfo {
    label: "apk",
    mime_type: "application/vnd.android.package-archive",
    group: "executable",
    description: "Android package",
    extensions: &["apk"],
    is_text: false,
};

pub(crate) static APPLEBPLIST: TypeInfo = TypeInfo {
    label: "applebplist",
    mime_type: "application/x-bplist",
    group: "application",
    description: "Apple binary property list",
    extensions: &["bplist", "plist"],
    is_text: false,
};

pub(crate) static APPLEPLIST: TypeInfo = TypeInfo {
    label: "appleplist",
    mime_type: "application/x-plist",
    group: "application",
    description: "Apple property list",
    extensions: &["plist"],
    is_text: true,
};

pub(crate) static ASM: TypeInfo = TypeInfo {
    label: "asm",
    mime_type: "text/x-asm",
    group: "code",
    description: "Assembly",
    extensions: &["s", "S", "asm"],
    is_text: true,
};

pub(crate) static ASP: TypeInfo = TypeInfo {
    label: "asp",
    mime_type: "text/html",
    group: "code",
    description: "ASP source",
    extensions: &["aspx", "asp"],
    is_text: true,
};

pub(crate) static AUTOHOTKEY: TypeInfo = TypeInfo {
    label: "autohotkey",
    mime_type: "text/plain",
    group: "code",
    description: "AutoHotKey script",
    extensions: &[],
    is_text: true,
};

pub(crate) static AUTOIT: TypeInfo = TypeInfo {
    label: "autoit",
    mime_type: "text/plain",
    group: "code",
    description: "AutoIt script",
    extensions: &["au3"],
    is_text: true,
};

pub(crate) static AWK: TypeInfo = TypeInfo {
    label: "awk",
    mime_type: "text/plain",
    group: "code",
    description: "Awk",
    extensions: &["awk"],
    is_text: true,
};

pub(crate) static BATCH: TypeInfo = TypeInfo {
    label: "batch",
    mime_type: "text/x-msdos-batch",
    group: "code",
    description: "DOS batch file",
    extensions: &["bat"],
    is_text: true,
};

pub(crate) static BAZEL: TypeInfo = TypeInfo {
    label: "bazel",
    mime_type: "text/plain",
    group: "code",
    description: "Bazel build file",
    extensions: &["bzl"],
    is_text: true,
};

pub(crate) static BIB: TypeInfo = TypeInfo {
    label: "bib",
    mime_type: "text/x-bibtex",
    group: "text",
    description: "BibTeX",
    extensions: &["bib"],
    is_text: true,
};

pub(crate) static BMP: TypeInfo = TypeInfo {
    label: "bmp",
    mime_type: "image/bmp",
    group: "image",
    description: "BMP image data",
    extensions: &["bmp"],
    is_text: false,
};

pub(crate) static BZIP: TypeInfo = TypeInfo {
    label: "bzip",
    mime_type: "application/x-bzip2",
    group: "archive",
    description: "bzip2 compressed data",
    extensions: &["bz2", "tbz2", "tar.bz2"],
    is_text: false,
};

pub(crate) static C: TypeInfo = TypeInfo {
    label: "c",
    mime_type: "text/x-c",
    group: "code",
    description: "C source",
    extensions: &["c"],
    is_text: true,
};

pub(crate) static CAB: TypeInfo = TypeInfo {
    label: "cab",
    mime_type: "application/vnd.ms-cab-compressed",
    group: "archive",
    description: "Microsoft Cabinet archive data",
    extensions: &["cab"],
    is_text: false,
};

pub(crate) static CAT: TypeInfo = TypeInfo {
    label: "cat",
    mime_type: "application/octet-stream",
    group: "application",
    description: "Windows Catalog file",
    extensions: &["cat"],
    is_text: false,
};

pub(crate) static CHM: TypeInfo = TypeInfo {
    label: "chm",
    mime_type: "application/chm",
    group: "application",
    description: "MS Windows HtmlHelp Data",
    extensions: &["chm"],
    is_text: false,
};

pub(crate) static CLOJURE: TypeInfo = TypeInfo {
    label: "clojure",
    mime_type: "text/x-clojure",
    group: "code",
    description: "Clojure",
    extensions: &["clj", "cljs", "cljc", "cljr"],
    is_text: true,
};

pub(crate) static CMAKE: TypeInfo = TypeInfo {
    label: "cmake",
    mime_type: "text/x-cmake",
    group: "code",
    description: "CMake build file",
    extensions: &["cmake"],
    is_text: true,
};

pub(crate) static COBOL: TypeInfo = TypeInfo {
    label: "cobol",
    mime_type: "text/x-cobol",
    group: "code",
    description: "Cobol",
    extensions: &["cbl", "cob", "cpy", "CBL", "COB", "CPY"],
    is_text: true,
};

pub(crate) static COFF: TypeInfo = TypeInfo {
    label: "coff",
    mime_type: "application/x-coff",
    group: "executable",
    description: "Intel 80386 COFF",
    extensions: &["obj", "o"],
    is_text: false,
};

pub(crate) static COFFEESCRIPT: TypeInfo = TypeInfo {
    label: "coffeescript",
    mime_type: "text/coffeescript",
    group: "code",
    description: "CoffeeScript",
    extensions: &["coffee"],
    is_text: true,
};

pub(crate) static CPP: TypeInfo = TypeInfo {
    label: "cpp",
    mime_type: "text/x-c",
    group: "code",
    description: "C++ source",
    extensions: &["cc", "cpp", "cxx", "c++", "cppm", "ixx"],
    is_text: true,
};

pub(crate) static CRT: TypeInfo = TypeInfo {
    label: "crt",
    mime_type: "application/x-x509-ca-cert",
    group: "text",
    description: "Certificates (binary format)",
    extensions: &["der", "cer", "crt"],
    is_text: false,
};

pub(crate) static CRX: TypeInfo = TypeInfo {
    label: "crx",
    mime_type: "application/x-chrome-extension",
    group: "executable",
    description: "Google Chrome extension",
    extensions: &["crx"],
    is_text: false,
};

pub(crate) static CS: TypeInfo = TypeInfo {
    label: "cs",
    mime_type: "text/plain",
    group: "code",
    description: "C# source",
    extensions: &["cs", "csx"],
    is_text: true,
};

pub(crate) static CSPROJ: TypeInfo = TypeInfo {
    label: "csproj",
    mime_type: "text/plain",
    group: "code",
    description: ".NET project config",
    extensions: &["csproj"],
    is_text: true,
};

pub(crate) static CSS: TypeInfo = TypeInfo {
    label: "css",
    mime_type: "text/css",
    group: "code",
    description: "CSS source",
    extensions: &["css"],
    is_text: true,
};

pub(crate) static CSV: TypeInfo = TypeInfo {
    label: "csv",
    mime_type: "text/csv",
    group: "code",
    description: "CSV document",
    extensions: &["csv"],
    is_text: true,
};

pub(crate) static DART: TypeInfo = TypeInfo {
    label: "dart",
    mime_type: "text/plain",
    group: "code",
    description: "Dart source",
    extensions: &["dart"],
    is_text: true,
};

pub(crate) static DEB: TypeInfo = TypeInfo {
    label: "deb",
    mime_type: "application/vnd.debian.binary-package",
    group: "archive",
    description: "Debian binary package",
    extensions: &["deb"],
    is_text: false,
};

pub(crate) static DEX: TypeInfo = TypeInfo {
    label: "dex",
    mime_type: "application/x-android-dex",
    group: "executable",
    description: "Dalvik dex file",
    extensions: &["dex"],
    is_text: false,
};

pub(crate) static DICOM: TypeInfo = TypeInfo {
    label: "dicom",
    mime_type: "application/dicom",
    group: "image",
    description: "DICOM",
    extensions: &["dcm"],
    is_text: false,
};

pub(crate) static DIFF: TypeInfo = TypeInfo {
    label: "diff",
    mime_type: "text/plain",
    group: "text",
    description: "Diff file",
    extensions: &["diff", "patch"],
    is_text: true,
};

pub(crate) static DIRECTORY: TypeInfo = TypeInfo {
    label: "directory",
    mime_type: "inode/directory",
    group: "inode",
    description: "A directory",
    extensions: &[],
    is_text: false,
};

pub(crate) static DM: TypeInfo = TypeInfo {
    label: "dm",
    mime_type: "text/plain",
    group: "code",
    description: "Dream Maker",
    extensions: &["dm"],
    is_text: true,
};

pub(crate) static DMG: TypeInfo = TypeInfo {
    label: "dmg",
    mime_type: "application/x-apple-diskimage",
    group: "archive",
    description: "Apple disk image",
    extensions: &["dmg"],
    is_text: false,
};

pub(crate) static DOC: TypeInfo = TypeInfo {
    label: "doc",
    mime_type: "application/msword",
    group: "document",
    description: "Microsoft Word CDF document",
    extensions: &["doc"],
    is_text: false,
};

pub(crate) static DOCKERFILE: TypeInfo = TypeInfo {
    label: "dockerfile",
    mime_type: "text/x-dockerfile",
    group: "code",
    description: "Dockerfile",
    extensions: &[],
    is_text: true,
};

pub(crate) static DOCX: TypeInfo = TypeInfo {
    label: "docx",
    mime_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    group: "document",
    description: "Microsoft Word 2007+ document",
    extensions: &["docx", "docm"],
    is_text: false,
};

pub(crate) static DSSTORE: TypeInfo = TypeInfo {
    label: "dsstore",
    mime_type: "application/octet-stream",
    group: "unknown",
    description: "Application Desktop Services Store",
    extensions: &[],
    is_text: false,
};

pub(crate) static DWG: TypeInfo = TypeInfo {
    label: "dwg",
    mime_type: "image/x-dwg",
    group: "image",
    description: "Autocad Drawing",
    extensions: &["dwg"],
    is_text: false,
};

pub(crate) static DXF: TypeInfo = TypeInfo {
    label: "dxf",
    mime_type: "image/vnd.dxf",
    group: "image",
    description: "Audocad Drawing Exchange Format",
    extensions: &["dxf"],
    is_text: true,
};

pub(crate) static ELF: TypeInfo = TypeInfo {
    label: "elf",
    mime_type: "application/x-executable-elf",
    group: "executable",
    description: "ELF executable",
    extensions: &["elf"],
    is_text: false,
};

pub(crate) static ELIXIR: TypeInfo = TypeInfo {
    label: "elixir",
    mime_type: "text/plain",
    group: "code",
    description: "Elixir script",
    extensions: &["exs"],
    is_text: true,
};

pub(crate) static EMF: TypeInfo = TypeInfo {
    label: "emf",
    mime_type: "application/octet-stream",
    group: "application",
    description: "Windows Enhanced Metafile image data",
    extensions: &["emf"],
    is_text: false,
};

pub(crate) static EML: TypeInfo = TypeInfo {
    label: "eml",
    mime_type: "message/rfc822",
    group: "text",
    description: "RFC 822 mail",
    extensions: &["eml"],
    is_text: true,
};

pub(crate) static EMPTY: TypeInfo = TypeInfo {
    label: "empty",
    mime_type: "inode/x-empty",
    group: "inode",
    description: "Empty file",
    extensions: &[],
    is_text: false,
};

pub(crate) static EPUB: TypeInfo = TypeInfo {
    label: "epub",
    mime_type: "application/epub+zip",
    group: "document",
    description: "EPUB document",
    extensions: &["epub"],
    is_text: false,
};

pub(crate) static ERB: TypeInfo = TypeInfo {
    label: "erb",
    mime_type: "text/x-ruby",
    group: "code",
    description: "Embedded Ruby source",
    extensions: &["erb"],
    is_text: true,
};

pub(crate) static ERLANG: TypeInfo = TypeInfo {
    label: "erlang",
    mime_type: "text/x-erlang",
    group: "code",
    description: "Erlang source",
    extensions: &["erl", "hrl"],
    is_text: true,
};

pub(crate) static FLAC: TypeInfo = TypeInfo {
    label: "flac",
    mime_type: "audio/flac",
    group: "audio",
    description: "FLAC audio bitstream data",
    extensions: &["flac"],
    is_text: false,
};

pub(crate) static FLV: TypeInfo = TypeInfo {
    label: "flv",
    mime_type: "video/x-flv",
    group: "video",
    description: "Flash Video",
    extensions: &["flv"],
    is_text: false,
};

pub(crate) static FORTRAN: TypeInfo = TypeInfo {
    label: "fortran",
    mime_type: "text/x-fortran",
    group: "document",
    description: "Fortran",
    extensions: &["f90", "f95", "f03", "F90"],
    is_text: true,
};

pub(crate) static GEMFILE: TypeInfo = TypeInfo {
    label: "gemfile",
    mime_type: "text/plain",
    group: "code",
    description: "Gemfile file",
    extensions: &[],
    is_text: true,
};

pub(crate) static GEMSPEC: TypeInfo = TypeInfo {
    label: "gemspec",
    mime_type: "text/plain",
    group: "code",
    description: "Gemspec file",
    extensions: &["gemspec"],
    is_text: true,
};

pub(crate) static GIF: TypeInfo = TypeInfo {
    label: "gif",
    mime_type: "image/gif",
    group: "image",
    description: "GIF image data",
    extensions: &["gif"],
    is_text: false,
};

pub(crate) static GITATTRIBUTES: TypeInfo = TypeInfo {
    label: "gitattributes",
    mime_type: "text/plain",
    group: "code",
    description: "Gitattributes file",
    extensions: &[],
    is_text: true,
};

pub(crate) static GITMODULES: TypeInfo = TypeInfo {
    label: "gitmodules",
    mime_type: "text/plain",
    group: "code",
    description: "Gitmodules file",
    extensions: &[],
    is_text: true,
};

pub(crate) static GO: TypeInfo = TypeInfo {
    label: "go",
    mime_type: "text/x-golang",
    group: "code",
    description: "Golang source",
    extensions: &["go"],
    is_text: true,
};

pub(crate) static GRADLE: TypeInfo = TypeInfo {
    label: "gradle",
    mime_type: "text/x-groovy",
    group: "code",
    description: "Gradle source",
    extensions: &["gradle"],
    is_text: true,
};

pub(crate) static GROOVY: TypeInfo = TypeInfo {
    label: "groovy",
    mime_type: "text/x-groovy",
    group: "code",
    description: "Groovy source",
    extensions: &["groovy"],
    is_text: true,
};

pub(crate) static GZIP: TypeInfo = TypeInfo {
    label: "gzip",
    mime_type: "application/gzip",
    group: "archive",
    description: "gzip compressed data",
    extensions: &["gz", "gzip", "tgz", "tar.gz"],
    is_text: false,
};

pub(crate) static H5: TypeInfo = TypeInfo {
    label: "h5",
    mime_type: "application/x-hdf5",
    group: "archive",
    description: "Hierarchical Data Format v5",
    extensions: &["h5", "hdf5"],
    is_text: false,
};

pub(crate) static HANDLEBARS: TypeInfo = TypeInfo {
    label: "handlebars",
    mime_type: "text/x-handlebars-template",
    group: "code",
    description: "Handlebars source",
    extensions: &["hbs", "handlebars"],
    is_text: true,
};

pub(crate) static HASKELL: TypeInfo = TypeInfo {
    label: "haskell",
    mime_type: "text/plain",
    group: "code",
    description: "Haskell source",
    extensions: &["hs", "lhs"],
    is_text: true,
};

pub(crate) static HCL: TypeInfo = TypeInfo {
    label: "hcl",
    mime_type: "text/x-hcl",
    group: "code",
    description: "HashiCorp configuration language",
    extensions: &["hcl"],
    is_text: true,
};

pub(crate) static HLP: TypeInfo = TypeInfo {
    label: "hlp",
    mime_type: "application/winhlp",
    group: "application",
    description: "MS Windows help",
    extensions: &["hlp"],
    is_text: false,
};

pub(crate) static HTACCESS: TypeInfo = TypeInfo {
    label: "htaccess",
    mime_type: "text/x-apache-conf",
    group: "code",
    description: "Apache access configuration",
    extensions: &[],
    is_text: true,
};

pub(crate) static HTML: TypeInfo = TypeInfo {
    label: "html",
    mime_type: "text/html",
    group: "code",
    description: "HTML document",
    extensions: &["html", "htm", "xhtml", "xht"],
    is_text: true,
};

pub(crate) static ICNS: TypeInfo = TypeInfo {
    label: "icns",
    mime_type: "image/x-icns",
    group: "image",
    description: "Mac OS X icon",
    extensions: &["icns"],
    is_text: false,
};

pub(crate) static ICO: TypeInfo = TypeInfo {
    label: "ico",
    mime_type: "image/vnd.microsoft.icon",
    group: "image",
    description: "MS Windows icon resource",
    extensions: &["ico"],
    is_text: false,
};

pub(crate) static ICS: TypeInfo = TypeInfo {
    label: "ics",
    mime_type: "text/calendar",
    group: "application",
    description: "Internet Calendaring and Scheduling",
    extensions: &["ics"],
    is_text: true,
};

pub(crate) static IGNOREFILE: TypeInfo = TypeInfo {
    label: "ignorefile",
    mime_type: "text/plain",
    group: "code",
    description: "Ignorefile",
    extensions: &[],
    is_text: true,
};

pub(crate) static INI: TypeInfo = TypeInfo {
    label: "ini",
    mime_type: "text/plain",
    group: "text",
    description: "INI configuration file",
    extensions: &["ini"],
    is_text: true,
};

pub(crate) static INTERNETSHORTCUT: TypeInfo = TypeInfo {
    label: "internetshortcut",
    mime_type: "application/x-mswinurl",
    group: "application",
    description: "MS Windows Internet shortcut",
    extensions: &["url"],
    is_text: true,
};

pub(crate) static IPYNB: TypeInfo = TypeInfo {
    label: "ipynb",
    mime_type: "application/json",
    group: "code",
    description: "Jupyter notebook",
    extensions: &["ipynb"],
    is_text: true,
};

pub(crate) static ISO: TypeInfo = TypeInfo {
    label: "iso",
    mime_type: "application/x-iso9660-image",
    group: "archive",
    description: "ISO 9660 CD-ROM filesystem data",
    extensions: &["iso"],
    is_text: false,
};

pub(crate) static JAR: TypeInfo = TypeInfo {
    label: "jar",
    mime_type: "application/java-archive",
    group: "archive",
    description: "Java archive data (JAR)",
    extensions: &["jar", "klib"],
    is_text: false,
};

pub(crate) static JAVA: TypeInfo = TypeInfo {
    label: "java",
    mime_type: "text/x-java",
    group: "code",
    description: "Java source",
    extensions: &["java"],
    is_text: true,
};

pub(crate) static JAVABYTECODE: TypeInfo = TypeInfo {
    label: "javabytecode",
    mime_type: "application/x-java-applet",
    group: "executable",
    description: "Java compiled bytecode",
    extensions: &["class"],
    is_text: false,
};

pub(crate) static JAVASCRIPT: TypeInfo = TypeInfo {
    label: "javascript",
    mime_type: "application/javascript",
    group: "code",
    description: "JavaScript source",
    extensions: &["js", "mjs", "cjs"],
    is_text: true,
};

pub(crate) static JINJA: TypeInfo = TypeInfo {
    label: "jinja",
    mime_type: "text/x-jinja2-template",
    group: "code",
    description: "Jinja template",
    extensions: &["jinja", "jinja2", "j2"],
    is_text: true,
};

pub(crate) static JP2: TypeInfo = TypeInfo {
    label: "jp2",
    mime_type: "image/jpeg2000",
    group: "image",
    description: "jpeg2000",
    extensions: &["jp2"],
    is_text: false,
};

pub(crate) static JPEG: TypeInfo = TypeInfo {
    label: "jpeg",
    mime_type: "image/jpeg",
    group: "image",
    description: "JPEG image data",
    extensions: &["jpg", "jpeg"],
    is_text: false,
};

pub(crate) static JSON: TypeInfo = TypeInfo {
    label: "json",
    mime_type: "application/json",
    group: "code",
    description: "JSON document",
    extensions: &["json"],
    is_text: true,
};

pub(crate) static JSONL: TypeInfo = TypeInfo {
    label: "jsonl",
    mime_type: "application/json",
    group: "code",
    description: "JSONL document",
    extensions: &["jsonl", "jsonld"],
    is_text: true,
};

pub(crate) static JULIA: TypeInfo = TypeInfo {
    label: "julia",
    mime_type: "text/x-julia",
    group: "code",
    description: "Julia source",
    extensions: &["jl"],
    is_text: true,
};

pub(crate) static KOTLIN: TypeInfo = TypeInfo {
    label: "kotlin",
    mime_type: "text/plain",
    group: "code",
    description: "Kotlin source",
    extensions: &["kt", "kts"],
    is_text: true,
};

pub(crate) static LATEX: TypeInfo = TypeInfo {
    label: "latex",
    mime_type: "text/x-tex",
    group: "text",
    description: "LaTeX document",
    extensions: &["tex", "sty"],
    is_text: true,
};

pub(crate) static LHA: TypeInfo = TypeInfo {
    label: "lha",
    mime_type: "application/x-lha",
    group: "archive",
    description: "LHarc archive",
    extensions: &["lha", "lzh"],
    is_text: false,
};

pub(crate) static LISP: TypeInfo = TypeInfo {
    label: "lisp",
    mime_type: "text/x-lisp",
    group: "code",
    description: "Lisp source",
    extensions: &["lisp", "lsp", "l", "cl"],
    is_text: true,
};

pub(crate) static LNK: TypeInfo = TypeInfo {
    label: "lnk",
    mime_type: "application/x-ms-shortcut",
    group: "application",
    description: "MS Windows shortcut",
    extensions: &["lnk"],
    is_text: false,
};

pub(crate) static LUA: TypeInfo = TypeInfo {
    label: "lua",
    mime_type: "text/plain",
    group: "code",
    description: "Lua",
    extensions: &["lua"],
    is_text: true,
};

pub(crate) static M3U: TypeInfo = TypeInfo {
    label: "m3u",
    mime_type: "text/plain",
    group: "application",
    description: "M3U playlist",
    extensions: &["m3u8", "m3u"],
    is_text: true,
};

pub(crate) static M4: TypeInfo = TypeInfo {
    label: "m4",
    mime_type: "text/plain",
    group: "code",
    description: "GNU Macro",
    extensions: &["m4"],
    is_text: true,
};

pub(crate) static MACHO: TypeInfo = TypeInfo {
    label: "macho",
    mime_type: "application/x-mach-o",
    group: "executable",
    description: "Mach-O executable",
    extensions: &[],
    is_text: false,
};

pub(crate) static MAKEFILE: TypeInfo = TypeInfo {
    label: "makefile",
    mime_type: "text/x-makefile",
    group: "code",
    description: "Makefile source",
    extensions: &[],
    is_text: true,
};

pub(crate) static MARKDOWN: TypeInfo = TypeInfo {
    label: "markdown",
    mime_type: "text/markdown",
    group: "text",
    description: "Markdown document",
    extensions: &["md", "markdown"],
    is_text: true,
};

pub(crate) static MATLAB: TypeInfo = TypeInfo {
    label: "matlab",
    mime_type: "text/x-matlab",
    group: "code",
    description: "Matlab Source",
    extensions: &["m", "matlab"],
    is_text: true,
};

pub(crate) static MHT: TypeInfo = TypeInfo {
    label: "mht",
    mime_type: "application/x-mimearchive",
    group: "code",
    description: "MHTML document",
    extensions: &["mht"],
    is_text: true,
};

pub(crate) static MIDI: TypeInfo = TypeInfo {
    label: "midi",
    mime_type: "audio/midi",
    group: "audio",
    description: "Midi",
    extensions: &["mid"],
    is_text: false,
};

pub(crate) static MKV: TypeInfo = TypeInfo {
    label: "mkv",
    mime_type: "video/x-matroska",
    group: "video",
    description: "Matroska",
    extensions: &["mkv"],
    is_text: false,
};

pub(crate) static MP3: TypeInfo = TypeInfo {
    label: "mp3",
    mime_type: "audio/mpeg",
    group: "audio",
    description: "MP3 media file",
    extensions: &["mp3"],
    is_text: false,
};

pub(crate) static MP4: TypeInfo = TypeInfo {
    label: "mp4",
    mime_type: "video/mp4",
    group: "video",
    description: "MP4 media file",
    extensions: &["mp4"],
    is_text: false,
};

pub(crate) static MSCOMPRESS: TypeInfo = TypeInfo {
    label: "mscompress",
    mime_type: "application/x-ms-compress-szdd",
    group: "archive",
    description: "MS Compress archive data",
    extensions: &[],
    is_text: false,
};

pub(crate) static MSI: TypeInfo = TypeInfo {
    label: "msi",
    mime_type: "application/x-msi",
    group: "archive",
    description: "Microsoft Installer file",
    extensions: &["msi"],
    is_text: false,
};

pub(crate) static MUM: TypeInfo = TypeInfo {
    label: "mum",
    mime_type: "text/xml",
    group: "application",
    description: "Windows Update Package file",
    extensions: &["mum"],
    is_text: true,
};

pub(crate) static NPY: TypeInfo = TypeInfo {
    label: "npy",
    mime_type: "application/octet-stream",
    group: "archive",
    description: "Numpy Array",
    extensions: &["npy"],
    is_text: false,
};

pub(crate) static NPZ: TypeInfo = TypeInfo {
    label: "npz",
    mime_type: "application/octet-stream",
    group: "archive",
    description: "Numpy Arrays Archive",
    extensions: &["npz"],
    is_text: false,
};

pub(crate) static NUPKG: TypeInfo = TypeInfo {
    label: "nupkg",
    mime_type: "application/octet-stream",
    group: "unknown",
    description: "NuGet Package",
    extensions: &["nupkg"],
    is_text: false,
};

pub(crate) static OBJECTIVEC: TypeInfo = TypeInfo {
    label: "objectivec",
    mime_type: "text/x-objcsrc",
    group: "code",
    description: "ObjectiveC source",
    extensions: &["m", "mm"],
    is_text: true,
};

pub(crate) static OCAML: TypeInfo = TypeInfo {
    label: "ocaml",
    mime_type: "text-ocaml",
    group: "code",
    description: "OCaml",
    extensions: &["ml", "mli"],
    is_text: true,
};

pub(crate) static ODP: TypeInfo = TypeInfo {
    label: "odp",
    mime_type: "application/vnd.oasis.opendocument.presentation",
    group: "document",
    description: "OpenDocument Presentation",
    extensions: &["odp"],
    is_text: false,
};

pub(crate) static ODS: TypeInfo = TypeInfo {
    label: "ods",
    mime_type: "application/vnd.oasis.opendocument.spreadsheet",
    group: "document",
    description: "OpenDocument Spreadsheet",
    extensions: &["ods"],
    is_text: false,
};

pub(crate) static ODT: TypeInfo = TypeInfo {
    label: "odt",
    mime_type: "application/vnd.oasis.opendocument.text",
    group: "document",
    description: "OpenDocument Text",
    extensions: &["odt"],
    is_text: false,
};

pub(crate) static OGG: TypeInfo = TypeInfo {
    label: "ogg",
    mime_type: "audio/ogg",
    group: "audio",
    description: "Ogg data",
    extensions: &["ogg"],
    is_text: false,
};

pub(crate) static ONE: TypeInfo = TypeInfo {
    label: "one",
    mime_type: "application/msonenote",
    group: "document",
    description: "One Note",
    extensions: &["one"],
    is_text: false,
};

pub(crate) static ONNX: TypeInfo = TypeInfo {
    label: "onnx",
    mime_type: "application/octet-stream",
    group: "archive",
    description: "Open Neural Network Exchange",
    extensions: &["onnx"],
    is_text: false,
};

pub(crate) static OTF: TypeInfo = TypeInfo {
    label: "otf",
    mime_type: "font/otf",
    group: "font",
    description: "OpenType font",
    extensions: &["otf"],
    is_text: false,
};

pub(crate) static OUTLOOK: TypeInfo = TypeInfo {
    label: "outlook",
    mime_type: "application/vnd.ms-outlook",
    group: "application",
    description: "MS Outlook Message",
    extensions: &[],
    is_text: false,
};

pub(crate) static PARQUET: TypeInfo = TypeInfo {
    label: "parquet",
    mime_type: "application/vnd.apache.parquet",
    group: "unknown",
    description: "Apache Parquet",
    extensions: &["pqt", "parquet"],
    is_text: false,
};

pub(crate) static PASCAL: TypeInfo = TypeInfo {
    label: "pascal",
    mime_type: "text/x-pascal",
    group: "code",
    description: "Pascal source",
    extensions: &["pas", "pp"],
    is_text: true,
};

pub(crate) static PCAP: TypeInfo = TypeInfo {
    label: "pcap",
    mime_type: "application/vnd.tcpdump.pcap",
    group: "application",
    description: "pcap capture file",
    extensions: &["pcap", "pcapng"],
    is_text: false,
};

pub(crate) static PDB: TypeInfo = TypeInfo {
    label: "pdb",
    mime_type: "application/octet-stream",
    group: "application",
    description: "Windows Program Database",
    extensions: &["pdb"],
    is_text: false,
};

pub(crate) static PDF: TypeInfo = TypeInfo {
    label: "pdf",
    mime_type: "application/pdf",
    group: "document",
    description: "PDF document",
    extensions: &["pdf"],
    is_text: false,
};

pub(crate) static PEBIN: TypeInfo = TypeInfo {
    label: "pebin",
    mime_type: "application/x-dosexec",
    group: "executable",
    description: "PE Windows executable",
    extensions: &["exe", "dll"],
    is_text: false,
};

pub(crate) static PEM: TypeInfo = TypeInfo {
    label: "pem",
    mime_type: "application/x-pem-file",
    group: "application",
    description: "PEM certificate",
    extensions: &["pem", "pub", "gpg"],
    is_text: true,
};

pub(crate) static PERL: TypeInfo = TypeInfo {
    label: "perl",
    mime_type: "text/x-perl",
    group: "code",
    description: "Perl source",
    extensions: &["pl"],
    is_text: true,
};

pub(crate) static PHP: TypeInfo = TypeInfo {
    label: "php",
    mime_type: "text/x-php",
    group: "code",
    description: "PHP source",
    extensions: &["php"],
    is_text: true,
};

pub(crate) static PICKLE: TypeInfo = TypeInfo {
    label: "pickle",
    mime_type: "application/octet-stream",
    group: "application",
    description: "Python pickle",
    extensions: &["pickle", "pkl"],
    is_text: false,
};

pub(crate) static PNG: TypeInfo = TypeInfo {
    label: "png",
    mime_type: "image/png",
    group: "image",
    description: "PNG image",
    extensions: &["png"],
    is_text: false,
};

pub(crate) static PO: TypeInfo = TypeInfo {
    label: "po",
    mime_type: "text/gettext-translation",
    group: "application",
    description: "Portable Object (PO) for i18n",
    extensions: &["po"],
    is_text: true,
};

pub(crate) static POSTSCRIPT: TypeInfo = TypeInfo {
    label: "postscript",
    mime_type: "application/postscript",
    group: "document",
    description: "PostScript document",
    extensions: &["ps"],
    is_text: false,
};

pub(crate) static POWERSHELL: TypeInfo = TypeInfo {
    label: "powershell",
    mime_type: "application/x-powershell",
    group: "code",
    description: "Powershell source",
    extensions: &["ps1"],
    is_text: true,
};

pub(crate) static PPT: TypeInfo = TypeInfo {
    label: "ppt",
    mime_type: "application/vnd.ms-powerpoint",
    group: "document",
    description: "Microsoft PowerPoint CDF document",
    extensions: &["ppt"],
    is_text: false,
};

pub(crate) static PPTX: TypeInfo = TypeInfo {
    label: "pptx",
    mime_type: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
    group: "document",
    description: "Microsoft PowerPoint 2007+ document",
    extensions: &["pptx", "pptm"],
    is_text: false,
};

pub(crate) static PROLOG: TypeInfo = TypeInfo {
    label: "prolog",
    mime_type: "text/x-prolog",
    group: "code",
    description: "Prolog source",
    extensions: &["pl", "pro", "P"],
    is_text: true,
};

pub(crate) static PROTEINDB: TypeInfo = TypeInfo {
    label: "proteindb",
    mime_type: "application/octet-stream",
    group: "application",
    description: "Protein DB",
    extensions: &["pdb"],
    is_text: true,
};

pub(crate) static PROTO: TypeInfo = TypeInfo {
    label: "proto",
    mime_type: "text/x-proto",
    group: "code",
    description: "Protocol buffer definition",
    extensions: &["proto"],
    is_text: true,
};

pub(crate) static PSD: TypeInfo = TypeInfo {
    label: "psd",
    mime_type: "image/vnd.adobe.photoshop",
    group: "image",
    description: "Adobe Photoshop",
    extensions: &["psd"],
    is_text: false,
};

pub(crate) static PYTHON: TypeInfo = TypeInfo {
    label: "python",
    mime_type: "text/x-python",
    group: "code",
    description: "Python source",
    extensions: &["py", "pyi"],
    is_text: true,
};

pub(crate) static PYTHONBYTECODE: TypeInfo = TypeInfo {
    label: "pythonbytecode",
    mime_type: "application/x-bytecode.python",
    group: "executable",
    description: "Python compiled bytecode",
    extensions: &["pyc", "pyo"],
    is_text: false,
};

pub(crate) static PYTORCH: TypeInfo = TypeInfo {
    label: "pytorch",
    mime_type: "application/octet-stream",
    group: "application",
    description: "Pytorch storage file",
    extensions: &["pt", "pth"],
    is_text: false,
};

pub(crate) static QT: TypeInfo = TypeInfo {
    label: "qt",
    mime_type: "video/quicktime",
    group: "video",
    description: "QuickTime",
    extensions: &["mov"],
    is_text: false,
};

pub(crate) static R: TypeInfo = TypeInfo {
    label: "r",
    mime_type: "text/x-R",
    group: "code",
    description: "R (language)",
    extensions: &["R"],
    is_text: true,
};

pub(crate) static RANDOMBYTES: TypeInfo = TypeInfo {
    label: "randombytes",
    mime_type: "application/octet-stream",
    group: "unknown",
    description: "Random bytes",
    extensions: &[],
    is_text: false,
};

pub(crate) static RANDOMTXT: TypeInfo = TypeInfo {
    label: "randomtxt",
    mime_type: "text/plain",
    group: "text",
    description: "Random text",
    extensions: &[],
    is_text: true,
};

pub(crate) static RAR: TypeInfo = TypeInfo {
    label: "rar",
    mime_type: "application/x-rar",
    group: "archive",
    description: "RAR archive data",
    extensions: &["rar"],
    is_text: false,
};

pub(crate) static RDF: TypeInfo = TypeInfo {
    label: "rdf",
    mime_type: "application/rdf+xml",
    group: "text",
    description: "Resource Description Framework document (RDF)",
    extensions: &["rdf"],
    is_text: true,
};

pub(crate) static RPM: TypeInfo = TypeInfo {
    label: "rpm",
    mime_type: "application/x-rpm",
    group: "archive",
    description: "RedHat Package Manager archive (RPM)",
    extensions: &["rpm"],
    is_text: false,
};

pub(crate) static RST: TypeInfo = TypeInfo {
    label: "rst",
    mime_type: "text/x-rst",
    group: "text",
    description: "ReStructuredText document",
    extensions: &["rst"],
    is_text: true,
};

pub(crate) static RTF: TypeInfo = TypeInfo {
    label: "rtf",
    mime_type: "text/rtf",
    group: "text",
    description: "Rich Text Format document",
    extensions: &["rtf"],
    is_text: true,
};

pub(crate) static RUBY: TypeInfo = TypeInfo {
    label: "ruby",
    mime_type: "application/x-ruby",
    group: "code",
    description: "Ruby source",
    extensions: &["rb"],
    is_text: true,
};

pub(crate) static RUST: TypeInfo = TypeInfo {
    label: "rust",
    mime_type: "application/x-rust",
    group: "code",
    description: "Rust source",
    extensions: &["rs"],
    is_text: true,
};

pub(crate) static SCALA: TypeInfo = TypeInfo {
    label: "scala",
    mime_type: "application/x-scala",
    group: "code",
    description: "Scala source",
    extensions: &["scala"],
    is_text: true,
};

pub(crate) static SCSS: TypeInfo = TypeInfo {
    label: "scss",
    mime_type: "text/x-scss",
    group: "code",
    description: "SCSS source",
    extensions: &["scss"],
    is_text: true,
};

pub(crate) static SEVENZIP: TypeInfo = TypeInfo {
    label: "sevenzip",
    mime_type: "application/x-7z-compressed",
    group: "archive",
    description: "7-zip archive data",
    extensions: &["7z"],
    is_text: false,
};

pub(crate) static SGML: TypeInfo = TypeInfo {
    label: "sgml",
    mime_type: "application/sgml",
    group: "text",
    description: "sgml",
    extensions: &["sgml"],
    is_text: true,
};

pub(crate) static SHELL: TypeInfo = TypeInfo {
    label: "shell",
    mime_type: "text/x-shellscript",
    group: "code",
    description: "Shell script",
    extensions: &["sh"],
    is_text: true,
};

pub(crate) static SMALI: TypeInfo = TypeInfo {
    label: "smali",
    mime_type: "application/x-smali",
    group: "code",
    description: "Smali source",
    extensions: &["smali"],
    is_text: true,
};

pub(crate) static SNAP: TypeInfo = TypeInfo {
    label: "snap",
    mime_type: "application/octet-stream",
    group: "archive",
    description: "Snap archive",
    extensions: &["snap"],
    is_text: false,
};

pub(crate) static SOLIDITY: TypeInfo = TypeInfo {
    label: "solidity",
    mime_type: "text/plain",
    group: "code",
    description: "Solidity source",
    extensions: &["sol"],
    is_text: true,
};

pub(crate) static SQL: TypeInfo = TypeInfo {
    label: "sql",
    mime_type: "application/x-sql",
    group: "code",
    description: "SQL source",
    extensions: &["sql"],
    is_text: true,
};

pub(crate) static SQLITE: TypeInfo = TypeInfo {
    label: "sqlite",
    mime_type: "application/octet-stream",
    group: "application",
    description: "SQLITE database",
    extensions: &["sqlite", "sqlite3"],
    is_text: false,
};

pub(crate) static SQUASHFS: TypeInfo = TypeInfo {
    label: "squashfs",
    mime_type: "application/octet-stream",
    group: "archive",
    description: "Squash filesystem",
    extensions: &[],
    is_text: false,
};

pub(crate) static SRT: TypeInfo = TypeInfo {
    label: "srt",
    mime_type: "text/srt",
    group: "application",
    description: "SubRip Text Format",
    extensions: &["srt"],
    is_text: true,
};

pub(crate) static STLBINARY: TypeInfo = TypeInfo {
    label: "stlbinary",
    mime_type: "application/sla",
    group: "image",
    description: "Stereolithography CAD (binary)",
    extensions: &["stl"],
    is_text: false,
};

pub(crate) static STLTEXT: TypeInfo = TypeInfo {
    label: "stltext",
    mime_type: "application/sla",
    group: "image",
    description: "Stereolithography CAD (text)",
    extensions: &["stl"],
    is_text: true,
};

pub(crate) static SUM: TypeInfo = TypeInfo {
    label: "sum",
    mime_type: "text/plain",
    group: "application",
    description: "Checksum file",
    extensions: &["sum"],
    is_text: true,
};

pub(crate) static SVG: TypeInfo = TypeInfo {
    label: "svg",
    mime_type: "image/svg+xml",
    group: "image",
    description: "SVG Scalable Vector Graphics image data",
    extensions: &["svg"],
    is_text: true,
};

pub(crate) static SWF: TypeInfo = TypeInfo {
    label: "swf",
    mime_type: "application/x-shockwave-flash",
    group: "executable",
    description: "Small Web File",
    extensions: &["swf"],
    is_text: false,
};

pub(crate) static SWIFT: TypeInfo = TypeInfo {
    label: "swift",
    mime_type: "text/x-swift",
    group: "code",
    description: "Swift",
    extensions: &["swift"],
    is_text: true,
};

pub(crate) static SYMLINK: TypeInfo = TypeInfo {
    label: "symlink",
    mime_type: "inode/symlink",
    group: "inode",
    description: "Symbolic link",
    extensions: &[],
    is_text: false,
};

pub(crate) static TAR: TypeInfo = TypeInfo {
    label: "tar",
    mime_type: "application/x-tar",
    group: "archive",
    description: "POSIX tar archive",
    extensions: &["tar"],
    is_text: false,
};

pub(crate) static TCL: TypeInfo = TypeInfo {
    label: "tcl",
    mime_type: "application/x-tcl",
    group: "code",
    description: "Tickle",
    extensions: &["tcl"],
    is_text: true,
};

pub(crate) static TEXTPROTO: TypeInfo = TypeInfo {
    label: "textproto",
    mime_type: "text/plain",
    group: "code",
    description: "Text protocol buffer",
    extensions: &["textproto", "textpb", "pbtxt"],
    is_text: true,
};

pub(crate) static TGA: TypeInfo = TypeInfo {
    label: "tga",
    mime_type: "image/x-tga",
    group: "image",
    description: "Targa image data",
    extensions: &["tga"],
    is_text: false,
};

pub(crate) static THUMBSDB: TypeInfo = TypeInfo {
    label: "thumbsdb",
    mime_type: "image/vnd.ms-thumb",
    group: "application",
    description: "Windows thumbnail cache",
    extensions: &[],
    is_text: false,
};

pub(crate) static TIFF: TypeInfo = TypeInfo {
    label: "tiff",
    mime_type: "image/tiff",
    group: "image",
    description: "TIFF image data",
    extensions: &["tiff", "tif"],
    is_text: false,
};

pub(crate) static TOML: TypeInfo = TypeInfo {
    label: "toml",
    mime_type: "application/toml",
    group: "text",
    description: "Tom's obvious, minimal language",
    extensions: &["toml"],
    is_text: true,
};

pub(crate) static TORRENT: TypeInfo = TypeInfo {
    label: "torrent",
    mime_type: "application/x-bittorrent",
    group: "application",
    description: "BitTorrent file",
    extensions: &["torrent"],
    is_text: false,
};

pub(crate) static TSV: TypeInfo = TypeInfo {
    label: "tsv",
    mime_type: "text/tsv",
    group: "code",
    description: "TSV document",
    extensions: &["tsv"],
    is_text: true,
};

pub(crate) static TTF: TypeInfo = TypeInfo {
    label: "ttf",
    mime_type: "font/sfnt",
    group: "font",
    description: "TrueType Font data",
    extensions: &["ttf", "ttc"],
    is_text: false,
};

pub(crate) static TWIG: TypeInfo = TypeInfo {
    label: "twig",
    mime_type: "text/x-twig",
    group: "code",
    description: "Twig template",
    extensions: &["twig"],
    is_text: true,
};

pub(crate) static TXT: TypeInfo = TypeInfo {
    label: "txt",
    mime_type: "text/plain",
    group: "text",
    description: "Generic text document",
    extensions: &["txt"],
    is_text: true,
};

pub(crate) static TYPESCRIPT: TypeInfo = TypeInfo {
    label: "typescript",
    mime_type: "application/typescript",
    group: "code",
    description: "TypeScript source",
    extensions: &["ts", "mts", "cts"],
    is_text: true,
};

pub(crate) static UNDEFINED: TypeInfo = TypeInfo {
    label: "undefined",
    mime_type: "application/undefined",
    group: "undefined",
    description: "Undefined",
    extensions: &[],
    is_text: false,
};

pub(crate) static UNKNOWN: TypeInfo = TypeInfo {
    label: "unknown",
    mime_type: "application/octet-stream",
    group: "unknown",
    description: "Unknown binary data",
    extensions: &[],
    is_text: false,
};

pub(crate) static VBA: TypeInfo = TypeInfo {
    label: "vba",
    mime_type: "text/vbscript",
    group: "code",
    description: "MS Visual Basic source (VBA)",
    extensions: &["vbs", "vba", "vb"],
    is_text: true,
};

pub(crate) static VCXPROJ: TypeInfo = TypeInfo {
    label: "vcxproj",
    mime_type: "application/xml",
    group: "code",
    description: "Visual Studio MSBuild project",
    extensions: &["vcxproj"],
    is_text: true,
};

pub(crate) static VERILOG: TypeInfo = TypeInfo {
    label: "verilog",
    mime_type: "text/x-verilog",
    group: "code",
    description: "Verilog source",
    extensions: &["v", "verilog", "vlg", "vh"],
    is_text: true,
};

pub(crate) static VHDL: TypeInfo = TypeInfo {
    label: "vhdl",
    mime_type: "text/x-vhdl",
    group: "code",
    description: "VHDL source",
    extensions: &["vhd"],
    is_text: true,
};

pub(crate) static VTT: TypeInfo = TypeInfo {
    label: "vtt",
    mime_type: "text/vtt",
    group: "text",
    description: "Web Video Text Tracks",
    extensions: &["vtt", "webvtt"],
    is_text: true,
};

pub(crate) static VUE: TypeInfo = TypeInfo {
    label: "vue",
    mime_type: "application/javascript",
    group: "code",
    description: "Vue source",
    extensions: &["vue"],
    is_text: true,
};

pub(crate) static WASM: TypeInfo = TypeInfo {
    label: "wasm",
    mime_type: "application/wasm",
    group: "executable",
    description: "Web Assembly",
    extensions: &["wasm"],
    is_text: false,
};

pub(crate) static WAV: TypeInfo = TypeInfo {
    label: "wav",
    mime_type: "audio/x-wav",
    group: "audio",
    description: "Waveform Audio file (WAV)",
    extensions: &["wav"],
    is_text: false,
};

pub(crate) static WEBM: TypeInfo = TypeInfo {
    label: "webm",
    mime_type: "video/webm",
    group: "video",
    description: "WebM media file",
    extensions: &["webm"],
    is_text: false,
};

pub(crate) static WEBP: TypeInfo = TypeInfo {
    label: "webp",
    mime_type: "image/webp",
    group: "image",
    description: "WebP media file",
    extensions: &["webp"],
    is_text: false,
};

pub(crate) static WINREGISTRY: TypeInfo = TypeInfo {
    label: "winregistry",
    mime_type: "text/x-ms-regedit",
    group: "application",
    description: "Windows Registry text",
    extensions: &["reg"],
    is_text: true,
};

pub(crate) static WMF: TypeInfo = TypeInfo {
    label: "wmf",
    mime_type: "image/wmf",
    group: "image",
    description: "Windows metafile",
    extensions: &["wmf"],
    is_text: false,
};

pub(crate) static WOFF: TypeInfo = TypeInfo {
    label: "woff",
    mime_type: "font/woff",
    group: "font",
    description: "Web Open Font Format",
    extensions: &["woff"],
    is_text: false,
};

pub(crate) static WOFF2: TypeInfo = TypeInfo {
    label: "woff2",
    mime_type: "font/woff2",
    group: "font",
    description: "Web Open Font Format v2",
    extensions: &["woff2"],
    is_text: false,
};

pub(crate) static XAR: TypeInfo = TypeInfo {
    label: "xar",
    mime_type: "application/x-xar",
    group: "archive",
    description: "XAR archive compressed data",
    extensions: &["pkg", "xar"],
    is_text: false,
};

pub(crate) static XLS: TypeInfo = TypeInfo {
    label: "xls",
    mime_type: "application/vnd.ms-excel",
    group: "document",
    description: "Microsoft Excel CDF document",
    extensions: &["xls"],
    is_text: false,
};

pub(crate) static XLSB: TypeInfo = TypeInfo {
    label: "xlsb",
    mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    group: "document",
    description: "Microsoft Excel 2007+ document (binary format)",
    extensions: &["xlsb"],
    is_text: false,
};

pub(crate) static XLSX: TypeInfo = TypeInfo {
    label: "xlsx",
    mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    group: "document",
    description: "Microsoft Excel 2007+ document",
    extensions: &["xlsx", "xlsm"],
    is_text: false,
};

pub(crate) static XML: TypeInfo = TypeInfo {
    label: "xml",
    mime_type: "text/xml",
    group: "code",
    description: "XML document",
    extensions: &["xml"],
    is_text: true,
};

pub(crate) static XPI: TypeInfo = TypeInfo {
    label: "xpi",
    mime_type: "application/zip",
    group: "archive",
    description: "Compressed installation archive (XPI)",
    extensions: &["xpi"],
    is_text: false,
};

pub(crate) static XZ: TypeInfo = TypeInfo {
    label: "xz",
    mime_type: "application/x-xz",
    group: "archive",
    description: "XZ compressed data",
    extensions: &["xz"],
    is_text: false,
};

pub(crate) static YAML: TypeInfo = TypeInfo {
    label: "yaml",
    mime_type: "application/x-yaml",
    group: "code",
    description: "YAML source",
    extensions: &["yml", "yaml"],
    is_text: true,
};

pub(crate) static YARA: TypeInfo = TypeInfo {
    label: "yara",
    mime_type: "text/x-yara",
    group: "code",
    description: "YARA rule",
    extensions: &["yar", "yara"],
    is_text: true,
};

pub(crate) static ZIG: TypeInfo = TypeInfo {
    label: "zig",
    mime_type: "text/zig",
    group: "code",
    description: "Zig source",
    extensions: &["zig"],
    is_text: true,
};

pub(crate) static ZIP: TypeInfo = TypeInfo {
    label: "zip",
    mime_type: "application/zip",
    group: "archive",
    description: "Zip archive data",
    extensions: &["zip"],
    is_text: false,
};

pub(crate) static ZLIBSTREAM: TypeInfo = TypeInfo {
    label: "zlibstream",
    mime_type: "application/zlib",
    group: "application",
    description: "zlib compressed data",
    extensions: &[],
    is_text: false,
};

/// Content types for regular files.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum ContentType {
    /// 3GPP multimedia file
    _3gp,
    /// ACE archive
    Ace,
    /// Adobe Illustrator Artwork
    Ai,
    /// Android Interface Definition Language
    Aidl,
    /// Android package
    Apk,
    /// Apple binary property list
    Applebplist,
    /// Apple property list
    Appleplist,
    /// Assembly
    Asm,
    /// ASP source
    Asp,
    /// AutoHotKey script
    Autohotkey,
    /// AutoIt script
    Autoit,
    /// Awk
    Awk,
    /// DOS batch file
    Batch,
    /// Bazel build file
    Bazel,
    /// BibTeX
    Bib,
    /// BMP image data
    Bmp,
    /// bzip2 compressed data
    Bzip,
    /// C source
    C,
    /// Microsoft Cabinet archive data
    Cab,
    /// Windows Catalog file
    Cat,
    /// MS Windows HtmlHelp Data
    Chm,
    /// Clojure
    Clojure,
    /// CMake build file
    Cmake,
    /// Cobol
    Cobol,
    /// Intel 80386 COFF
    Coff,
    /// CoffeeScript
    Coffeescript,
    /// C++ source
    Cpp,
    /// Certificates (binary format)
    Crt,
    /// Google Chrome extension
    Crx,
    /// C# source
    Cs,
    /// .NET project config
    Csproj,
    /// CSS source
    Css,
    /// CSV document
    Csv,
    /// Dart source
    Dart,
    /// Debian binary package
    Deb,
    /// Dalvik dex file
    Dex,
    /// DICOM
    Dicom,
    /// Diff file
    Diff,
    /// Dream Maker
    Dm,
    /// Apple disk image
    Dmg,
    /// Microsoft Word CDF document
    Doc,
    /// Dockerfile
    Dockerfile,
    /// Microsoft Word 2007+ document
    Docx,
    /// Application Desktop Services Store
    Dsstore,
    /// Autocad Drawing
    Dwg,
    /// Audocad Drawing Exchange Format
    Dxf,
    /// ELF executable
    Elf,
    /// Elixir script
    Elixir,
    /// Windows Enhanced Metafile image data
    Emf,
    /// RFC 822 mail
    Eml,
    /// Empty file
    Empty,
    /// EPUB document
    Epub,
    /// Embedded Ruby source
    Erb,
    /// Erlang source
    Erlang,
    /// FLAC audio bitstream data
    Flac,
    /// Flash Video
    Flv,
    /// Fortran
    Fortran,
    /// Gemfile file
    Gemfile,
    /// Gemspec file
    Gemspec,
    /// GIF image data
    Gif,
    /// Gitattributes file
    Gitattributes,
    /// Gitmodules file
    Gitmodules,
    /// Golang source
    Go,
    /// Gradle source
    Gradle,
    /// Groovy source
    Groovy,
    /// gzip compressed data
    Gzip,
    /// Hierarchical Data Format v5
    H5,
    /// Handlebars source
    Handlebars,
    /// Haskell source
    Haskell,
    /// HashiCorp configuration language
    Hcl,
    /// MS Windows help
    Hlp,
    /// Apache access configuration
    Htaccess,
    /// HTML document
    Html,
    /// Mac OS X icon
    Icns,
    /// MS Windows icon resource
    Ico,
    /// Internet Calendaring and Scheduling
    Ics,
    /// Ignorefile
    Ignorefile,
    /// INI configuration file
    Ini,
    /// MS Windows Internet shortcut
    Internetshortcut,
    /// Jupyter notebook
    Ipynb,
    /// ISO 9660 CD-ROM filesystem data
    Iso,
    /// Java archive data (JAR)
    Jar,
    /// Java source
    Java,
    /// Java compiled bytecode
    Javabytecode,
    /// JavaScript source
    Javascript,
    /// Jinja template
    Jinja,
    /// jpeg2000
    Jp2,
    /// JPEG image data
    Jpeg,
    /// JSON document
    Json,
    /// JSONL document
    Jsonl,
    /// Julia source
    Julia,
    /// Kotlin source
    Kotlin,
    /// LaTeX document
    Latex,
    /// LHarc archive
    Lha,
    /// Lisp source
    Lisp,
    /// MS Windows shortcut
    Lnk,
    /// Lua
    Lua,
    /// M3U playlist
    M3u,
    /// GNU Macro
    M4,
    /// Mach-O executable
    Macho,
    /// Makefile source
    Makefile,
    /// Markdown document
    Markdown,
    /// Matlab Source
    Matlab,
    /// MHTML document
    Mht,
    /// Midi
    Midi,
    /// Matroska
    Mkv,
    /// MP3 media file
    Mp3,
    /// MP4 media file
    Mp4,
    /// MS Compress archive data
    Mscompress,
    /// Microsoft Installer file
    Msi,
    /// Windows Update Package file
    Mum,
    /// Numpy Array
    Npy,
    /// Numpy Arrays Archive
    Npz,
    /// NuGet Package
    Nupkg,
    /// ObjectiveC source
    Objectivec,
    /// OCaml
    Ocaml,
    /// OpenDocument Presentation
    Odp,
    /// OpenDocument Spreadsheet
    Ods,
    /// OpenDocument Text
    Odt,
    /// Ogg data
    Ogg,
    /// One Note
    One,
    /// Open Neural Network Exchange
    Onnx,
    /// OpenType font
    Otf,
    /// MS Outlook Message
    Outlook,
    /// Apache Parquet
    Parquet,
    /// Pascal source
    Pascal,
    /// pcap capture file
    Pcap,
    /// Windows Program Database
    Pdb,
    /// PDF document
    Pdf,
    /// PE Windows executable
    Pebin,
    /// PEM certificate
    Pem,
    /// Perl source
    Perl,
    /// PHP source
    Php,
    /// Python pickle
    Pickle,
    /// PNG image
    Png,
    /// Portable Object (PO) for i18n
    Po,
    /// PostScript document
    Postscript,
    /// Powershell source
    Powershell,
    /// Microsoft PowerPoint CDF document
    Ppt,
    /// Microsoft PowerPoint 2007+ document
    Pptx,
    /// Prolog source
    Prolog,
    /// Protein DB
    Proteindb,
    /// Protocol buffer definition
    Proto,
    /// Adobe Photoshop
    Psd,
    /// Python source
    Python,
    /// Python compiled bytecode
    Pythonbytecode,
    /// Pytorch storage file
    Pytorch,
    /// QuickTime
    Qt,
    /// R (language)
    R,
    /// Random bytes
    Randombytes,
    /// Random text
    Randomtxt,
    /// RAR archive data
    Rar,
    /// Resource Description Framework document (RDF)
    Rdf,
    /// RedHat Package Manager archive (RPM)
    Rpm,
    /// ReStructuredText document
    Rst,
    /// Rich Text Format document
    Rtf,
    /// Ruby source
    Ruby,
    /// Rust source
    Rust,
    /// Scala source
    Scala,
    /// SCSS source
    Scss,
    /// 7-zip archive data
    Sevenzip,
    /// sgml
    Sgml,
    /// Shell script
    Shell,
    /// Smali source
    Smali,
    /// Snap archive
    Snap,
    /// Solidity source
    Solidity,
    /// SQL source
    Sql,
    /// SQLITE database
    Sqlite,
    /// Squash filesystem
    Squashfs,
    /// SubRip Text Format
    Srt,
    /// Stereolithography CAD (binary)
    Stlbinary,
    /// Stereolithography CAD (text)
    Stltext,
    /// Checksum file
    Sum,
    /// SVG Scalable Vector Graphics image data
    Svg,
    /// Small Web File
    Swf,
    /// Swift
    Swift,
    /// POSIX tar archive
    Tar,
    /// Tickle
    Tcl,
    /// Text protocol buffer
    Textproto,
    /// Targa image data
    Tga,
    /// Windows thumbnail cache
    Thumbsdb,
    /// TIFF image data
    Tiff,
    /// Tom's obvious, minimal language
    Toml,
    /// BitTorrent file
    Torrent,
    /// TSV document
    Tsv,
    /// TrueType Font data
    Ttf,
    /// Twig template
    Twig,
    /// Generic text document
    Txt,
    /// TypeScript source
    Typescript,
    /// Undefined
    Undefined,
    /// Unknown binary data
    Unknown,
    /// MS Visual Basic source (VBA)
    Vba,
    /// Visual Studio MSBuild project
    Vcxproj,
    /// Verilog source
    Verilog,
    /// VHDL source
    Vhdl,
    /// Web Video Text Tracks
    Vtt,
    /// Vue source
    Vue,
    /// Web Assembly
    Wasm,
    /// Waveform Audio file (WAV)
    Wav,
    /// WebM media file
    Webm,
    /// WebP media file
    Webp,
    /// Windows Registry text
    Winregistry,
    /// Windows metafile
    Wmf,
    /// Web Open Font Format
    Woff,
    /// Web Open Font Format v2
    Woff2,
    /// XAR archive compressed data
    Xar,
    /// Microsoft Excel CDF document
    Xls,
    /// Microsoft Excel 2007+ document (binary format)
    Xlsb,
    /// Microsoft Excel 2007+ document
    Xlsx,
    /// XML document
    Xml,
    /// Compressed installation archive (XPI)
    Xpi,
    /// XZ compressed data
    Xz,
    /// YAML source
    Yaml,
    /// YARA rule
    Yara,
    /// Zig source
    Zig,
    /// Zip archive data
    Zip,
    /// zlib compressed data
    Zlibstream,
}

impl ContentType {
    pub(crate) const SIZE: usize = 217;

    /// Returns the content type information.
    pub fn info(self) -> &'static TypeInfo {
        match self {
            ContentType::_3gp => &_3GP,
            ContentType::Ace => &ACE,
            ContentType::Ai => &AI,
            ContentType::Aidl => &AIDL,
            ContentType::Apk => &APK,
            ContentType::Applebplist => &APPLEBPLIST,
            ContentType::Appleplist => &APPLEPLIST,
            ContentType::Asm => &ASM,
            ContentType::Asp => &ASP,
            ContentType::Autohotkey => &AUTOHOTKEY,
            ContentType::Autoit => &AUTOIT,
            ContentType::Awk => &AWK,
            ContentType::Batch => &BATCH,
            ContentType::Bazel => &BAZEL,
            ContentType::Bib => &BIB,
            ContentType::Bmp => &BMP,
            ContentType::Bzip => &BZIP,
            ContentType::C => &C,
            ContentType::Cab => &CAB,
            ContentType::Cat => &CAT,
            ContentType::Chm => &CHM,
            ContentType::Clojure => &CLOJURE,
            ContentType::Cmake => &CMAKE,
            ContentType::Cobol => &COBOL,
            ContentType::Coff => &COFF,
            ContentType::Coffeescript => &COFFEESCRIPT,
            ContentType::Cpp => &CPP,
            ContentType::Crt => &CRT,
            ContentType::Crx => &CRX,
            ContentType::Cs => &CS,
            ContentType::Csproj => &CSPROJ,
            ContentType::Css => &CSS,
            ContentType::Csv => &CSV,
            ContentType::Dart => &DART,
            ContentType::Deb => &DEB,
            ContentType::Dex => &DEX,
            ContentType::Dicom => &DICOM,
            ContentType::Diff => &DIFF,
            ContentType::Dm => &DM,
            ContentType::Dmg => &DMG,
            ContentType::Doc => &DOC,
            ContentType::Dockerfile => &DOCKERFILE,
            ContentType::Docx => &DOCX,
            ContentType::Dsstore => &DSSTORE,
            ContentType::Dwg => &DWG,
            ContentType::Dxf => &DXF,
            ContentType::Elf => &ELF,
            ContentType::Elixir => &ELIXIR,
            ContentType::Emf => &EMF,
            ContentType::Eml => &EML,
            ContentType::Empty => &EMPTY,
            ContentType::Epub => &EPUB,
            ContentType::Erb => &ERB,
            ContentType::Erlang => &ERLANG,
            ContentType::Flac => &FLAC,
            ContentType::Flv => &FLV,
            ContentType::Fortran => &FORTRAN,
            ContentType::Gemfile => &GEMFILE,
            ContentType::Gemspec => &GEMSPEC,
            ContentType::Gif => &GIF,
            ContentType::Gitattributes => &GITATTRIBUTES,
            ContentType::Gitmodules => &GITMODULES,
            ContentType::Go => &GO,
            ContentType::Gradle => &GRADLE,
            ContentType::Groovy => &GROOVY,
            ContentType::Gzip => &GZIP,
            ContentType::H5 => &H5,
            ContentType::Handlebars => &HANDLEBARS,
            ContentType::Haskell => &HASKELL,
            ContentType::Hcl => &HCL,
            ContentType::Hlp => &HLP,
            ContentType::Htaccess => &HTACCESS,
            ContentType::Html => &HTML,
            ContentType::Icns => &ICNS,
            ContentType::Ico => &ICO,
            ContentType::Ics => &ICS,
            ContentType::Ignorefile => &IGNOREFILE,
            ContentType::Ini => &INI,
            ContentType::Internetshortcut => &INTERNETSHORTCUT,
            ContentType::Ipynb => &IPYNB,
            ContentType::Iso => &ISO,
            ContentType::Jar => &JAR,
            ContentType::Java => &JAVA,
            ContentType::Javabytecode => &JAVABYTECODE,
            ContentType::Javascript => &JAVASCRIPT,
            ContentType::Jinja => &JINJA,
            ContentType::Jp2 => &JP2,
            ContentType::Jpeg => &JPEG,
            ContentType::Json => &JSON,
            ContentType::Jsonl => &JSONL,
            ContentType::Julia => &JULIA,
            ContentType::Kotlin => &KOTLIN,
            ContentType::Latex => &LATEX,
            ContentType::Lha => &LHA,
            ContentType::Lisp => &LISP,
            ContentType::Lnk => &LNK,
            ContentType::Lua => &LUA,
            ContentType::M3u => &M3U,
            ContentType::M4 => &M4,
            ContentType::Macho => &MACHO,
            ContentType::Makefile => &MAKEFILE,
            ContentType::Markdown => &MARKDOWN,
            ContentType::Matlab => &MATLAB,
            ContentType::Mht => &MHT,
            ContentType::Midi => &MIDI,
            ContentType::Mkv => &MKV,
            ContentType::Mp3 => &MP3,
            ContentType::Mp4 => &MP4,
            ContentType::Mscompress => &MSCOMPRESS,
            ContentType::Msi => &MSI,
            ContentType::Mum => &MUM,
            ContentType::Npy => &NPY,
            ContentType::Npz => &NPZ,
            ContentType::Nupkg => &NUPKG,
            ContentType::Objectivec => &OBJECTIVEC,
            ContentType::Ocaml => &OCAML,
            ContentType::Odp => &ODP,
            ContentType::Ods => &ODS,
            ContentType::Odt => &ODT,
            ContentType::Ogg => &OGG,
            ContentType::One => &ONE,
            ContentType::Onnx => &ONNX,
            ContentType::Otf => &OTF,
            ContentType::Outlook => &OUTLOOK,
            ContentType::Parquet => &PARQUET,
            ContentType::Pascal => &PASCAL,
            ContentType::Pcap => &PCAP,
            ContentType::Pdb => &PDB,
            ContentType::Pdf => &PDF,
            ContentType::Pebin => &PEBIN,
            ContentType::Pem => &PEM,
            ContentType::Perl => &PERL,
            ContentType::Php => &PHP,
            ContentType::Pickle => &PICKLE,
            ContentType::Png => &PNG,
            ContentType::Po => &PO,
            ContentType::Postscript => &POSTSCRIPT,
            ContentType::Powershell => &POWERSHELL,
            ContentType::Ppt => &PPT,
            ContentType::Pptx => &PPTX,
            ContentType::Prolog => &PROLOG,
            ContentType::Proteindb => &PROTEINDB,
            ContentType::Proto => &PROTO,
            ContentType::Psd => &PSD,
            ContentType::Python => &PYTHON,
            ContentType::Pythonbytecode => &PYTHONBYTECODE,
            ContentType::Pytorch => &PYTORCH,
            ContentType::Qt => &QT,
            ContentType::R => &R,
            ContentType::Randombytes => &RANDOMBYTES,
            ContentType::Randomtxt => &RANDOMTXT,
            ContentType::Rar => &RAR,
            ContentType::Rdf => &RDF,
            ContentType::Rpm => &RPM,
            ContentType::Rst => &RST,
            ContentType::Rtf => &RTF,
            ContentType::Ruby => &RUBY,
            ContentType::Rust => &RUST,
            ContentType::Scala => &SCALA,
            ContentType::Scss => &SCSS,
            ContentType::Sevenzip => &SEVENZIP,
            ContentType::Sgml => &SGML,
            ContentType::Shell => &SHELL,
            ContentType::Smali => &SMALI,
            ContentType::Snap => &SNAP,
            ContentType::Solidity => &SOLIDITY,
            ContentType::Sql => &SQL,
            ContentType::Sqlite => &SQLITE,
            ContentType::Squashfs => &SQUASHFS,
            ContentType::Srt => &SRT,
            ContentType::Stlbinary => &STLBINARY,
            ContentType::Stltext => &STLTEXT,
            ContentType::Sum => &SUM,
            ContentType::Svg => &SVG,
            ContentType::Swf => &SWF,
            ContentType::Swift => &SWIFT,
            ContentType::Tar => &TAR,
            ContentType::Tcl => &TCL,
            ContentType::Textproto => &TEXTPROTO,
            ContentType::Tga => &TGA,
            ContentType::Thumbsdb => &THUMBSDB,
            ContentType::Tiff => &TIFF,
            ContentType::Toml => &TOML,
            ContentType::Torrent => &TORRENT,
            ContentType::Tsv => &TSV,
            ContentType::Ttf => &TTF,
            ContentType::Twig => &TWIG,
            ContentType::Txt => &TXT,
            ContentType::Typescript => &TYPESCRIPT,
            ContentType::Undefined => &UNDEFINED,
            ContentType::Unknown => &UNKNOWN,
            ContentType::Vba => &VBA,
            ContentType::Vcxproj => &VCXPROJ,
            ContentType::Verilog => &VERILOG,
            ContentType::Vhdl => &VHDL,
            ContentType::Vtt => &VTT,
            ContentType::Vue => &VUE,
            ContentType::Wasm => &WASM,
            ContentType::Wav => &WAV,
            ContentType::Webm => &WEBM,
            ContentType::Webp => &WEBP,
            ContentType::Winregistry => &WINREGISTRY,
            ContentType::Wmf => &WMF,
            ContentType::Woff => &WOFF,
            ContentType::Woff2 => &WOFF2,
            ContentType::Xar => &XAR,
            ContentType::Xls => &XLS,
            ContentType::Xlsb => &XLSB,
            ContentType::Xlsx => &XLSX,
            ContentType::Xml => &XML,
            ContentType::Xpi => &XPI,
            ContentType::Xz => &XZ,
            ContentType::Yaml => &YAML,
            ContentType::Yara => &YARA,
            ContentType::Zig => &ZIG,
            ContentType::Zip => &ZIP,
            ContentType::Zlibstream => &ZLIBSTREAM,
        }
    }
}


================================================
FILE: rust/lib/src/error.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/// Result type of Magika functions.
pub type Result = core::result::Result;

/// Errors returned by Magika functions.
#[derive(Debug, thiserror::Error)]
pub enum Error {
    /// Input/output errors reported by the standard library.
    #[error("{0}")]
    IOError(#[from] std::io::Error),

    /// Errors reported by the ONNX Runtime.
    #[error("{0}")]
    OrtError(#[from] ort::Error),

    /// Shape errors reported by the ndarray library.
    #[error("{0}")]
    ShapeError(#[from] ndarray::ShapeError),
}


================================================
FILE: rust/lib/src/file.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use ndarray::ArrayViewD;

use crate::model::Label;
use crate::ContentType;

/// File types.
///
/// The word file is used in the Linux sense where everything is a file. This could be equivalently
/// understood as a path.
#[derive(Debug, Clone)]
pub enum FileType {
    /// The file is a directory.
    Directory,

    /// The file is a symbolic link.
    Symlink,

    /// The file is a regular file and was identified using AI.
    Inferred(InferredType),

    /// The file is a regular file and was identified using rules.
    Ruled(ContentType),
}

/// Content type identified using AI.
#[derive(Debug, Clone)]
pub struct InferredType {
    /// The content type.
    ///
    /// The inferred content type may be overwritten for a variety of reasons. Use
    /// [`Self::content_type()`] to access the final content type (after possible overwrite).
    pub content_type: Option<(ContentType, OverwriteReason)>,

    /// The inferred content type.
    pub inferred_type: ContentType,

    /// The inference score between 0 and 1.
    pub score: f32,
}

/// Reason to overwrite an inferred content type.
#[derive(Debug, Clone)]
pub enum OverwriteReason {
    /// The inference score is too low for the inferred content type.
    LowConfidence,

    /// The inferred content type is not canonical.
    OverwriteMap,
}

impl FileType {
    /// Returns the content type for regular files.
    pub fn content_type(&self) -> Option {
        match self {
            FileType::Directory => None,
            FileType::Symlink => None,
            FileType::Inferred(x) => Some(x.content_type()),
            FileType::Ruled(x) => Some(*x),
        }
    }

    /// Returns the file type information.
    pub fn info(&self) -> &'static TypeInfo {
        match self {
            FileType::Directory => &crate::content::DIRECTORY,
            FileType::Symlink => &crate::content::SYMLINK,
            FileType::Inferred(x) => x.content_type().info(),
            FileType::Ruled(x) => x.info(),
        }
    }

    /// Returns the score of the identification, between 0 and 1.
    ///
    /// If the model was run, this is the model score. Otherwise this is 1.
    pub fn score(&self) -> f32 {
        match self {
            FileType::Directory => 1.0,
            FileType::Symlink => 1.0,
            FileType::Inferred(x) => x.score,
            FileType::Ruled(_) => 1.0,
        }
    }
}

impl InferredType {
    /// Returns the content type.
    pub fn content_type(&self) -> ContentType {
        match self.content_type {
            Some((x, _)) => x,
            None => self.inferred_type,
        }
    }
}

/// File type information.
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct TypeInfo {
    /// The unique label identifying this file type.
    pub label: &'static str,

    /// The MIME type of the file type.
    pub mime_type: &'static str,

    /// The group of the file type.
    pub group: &'static str,

    /// The description of the file type.
    pub description: &'static str,

    /// Possible extensions for the file type.
    pub extensions: &'static [&'static str],

    /// Whether the file type is text.
    pub is_text: bool,
}

impl FileType {
    pub(crate) fn convert(tensor: ArrayViewD) -> Vec {
        let mut results = Vec::new();
        for view in tensor.view().axis_iter(ndarray::Axis(0)) {
            let scores = view.to_slice().unwrap();
            let mut best = 0;
            for (i, &x) in scores.iter().enumerate() {
                if scores[best].max(x) == x {
                    best = i;
                }
            }
            assert!(best < crate::model::NUM_LABELS);
            let score = scores[best];
            // SAFETY: Labels are u32 smaller than NUM_LABELS.
            let label = unsafe { std::mem::transmute::(best as u32) };
            let inferred_type = label.content_type();
            let config = &crate::model::CONFIG;
            let mut content_type = if score < config.thresholds[inferred_type as usize] {
                let is_text = inferred_type.info().is_text;
                Some((
                    if is_text { ContentType::Txt } else { ContentType::Unknown },
                    OverwriteReason::LowConfidence,
                ))
            } else {
                let overwrite = config.overwrite_map[inferred_type as usize];
                (overwrite != inferred_type).then_some((overwrite, OverwriteReason::OverwriteMap))
            };
            if content_type.as_ref().is_some_and(|(x, _)| *x == inferred_type) {
                content_type = None;
            }
            results.push(FileType::Inferred(InferredType { content_type, inferred_type, score }));
        }
        results
    }
}


================================================
FILE: rust/lib/src/future.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fs::Metadata;
use std::future::Future;
use std::path::Path;
use std::pin::Pin;
use std::sync::OnceLock;
use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker};

use ndarray::Array2;
use ort::session::{NoSelectedOutputs, RunOptions};
use ort::value::Tensor;

use crate::{AsyncInput, Result};

pub(crate) fn exec(mut future: impl Future) -> T {
    let future = unsafe { Pin::new_unchecked(&mut future) };
    let waker = panic_waker();
    let mut context = Context::from_waker(&waker);
    match future.poll(&mut context) {
        Poll::Ready(x) => x,
        Poll::Pending => unreachable!(),
    }
}

pub(crate) trait Env {
    type File: AsyncInput;
    async fn symlink_metadata(path: &Path) -> Result;
    async fn open(path: &Path) -> Result;
    async fn ort_session_run(
        session: &mut ort::session::Session, input: Array2,
    ) -> Result>;
}

pub(crate) enum SyncEnv {}
impl Env for SyncEnv {
    type File = std::fs::File;

    async fn symlink_metadata(path: &Path) -> Result {
        Ok(std::fs::symlink_metadata(path)?)
    }

    async fn open(path: &Path) -> Result {
        Ok(std::fs::File::open(path)?)
    }

    async fn ort_session_run(
        session: &mut ort::session::Session, input: Array2,
    ) -> Result> {
        Ok(session.run(ort::inputs!("bytes" => Tensor::from_array(input)?))?)
    }
}

pub(crate) enum AsyncEnv {}
impl Env for AsyncEnv {
    type File = tokio::fs::File;

    async fn symlink_metadata(path: &Path) -> Result {
        Ok(tokio::fs::symlink_metadata(path).await?)
    }

    async fn open(path: &Path) -> Result {
        Ok(tokio::fs::File::open(path).await?)
    }

    async fn ort_session_run(
        session: &mut ort::session::Session, input: Array2,
    ) -> Result> {
        static OPTIONS: OnceLock> = OnceLock::new();
        // TODO(https://github.com/rust-lang/rust/issues/109737): Use get_or_try_init.
        let options = match OPTIONS.get() {
            Some(x) => x,
            None => {
                let _ = OPTIONS.set(RunOptions::new()?);
                OPTIONS.get().unwrap()
            }
        };
        Ok(session.run_async(ort::inputs!("bytes" => Tensor::from_array(input)?), options)?.await?)
    }
}

fn panic_waker() -> Waker {
    const PANIC_WAKER: RawWakerVTable = RawWakerVTable::new(clone, wake, wake, drop);
    fn clone(p: *const ()) -> RawWaker {
        RawWaker::new(p, &PANIC_WAKER)
    }
    fn wake(_: *const ()) {
        unreachable!()
    }
    fn drop(_: *const ()) {}
    let raw = clone(std::ptr::null());
    unsafe { Waker::from_raw(raw) }
}


================================================
FILE: rust/lib/src/input.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::future::Future;
use std::io::{Read, Seek, SeekFrom};

use tokio::io::{AsyncReadExt as _, AsyncSeekExt as _};

use crate::config::ModelConfig;
use crate::future::exec;
use crate::{ContentType, Result};

/// Features to identify a file using AI.
pub struct Features(pub(crate) Vec);

/// Synchronous abstraction over file content.
pub trait SyncInput {
    /// Returns the size of the input.
    fn length(&self) -> Result;

    /// Reads from the input at the given offset to fill the buffer.
    fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<()>;
}

/// Asynchronous abstraction over file content.
pub trait AsyncInput {
    /// Returns the size of the input.
    fn length(&self) -> impl Future>;

    /// Reads from the input at the given offset to fill the buffer.
    fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> impl Future>;
}

const _: () = const {
    // We assume in the rest of the file, that u64 holds any usize.
    assert!(std::mem::size_of::() <= std::mem::size_of::());
};

impl SyncInput for &[u8] {
    fn length(&self) -> Result {
        Ok(self.len() as u64)
    }

    fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<()> {
        let offset = offset.try_into().unwrap();
        buffer.copy_from_slice(&self[offset..][..buffer.len()]);
        Ok(())
    }
}

impl SyncInput for std::fs::File {
    fn length(&self) -> Result {
        Ok(self.metadata()?.len())
    }

    fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<()> {
        self.seek(SeekFrom::Start(offset))?;
        Ok(self.read_exact(buffer)?)
    }
}

impl SyncInput for &mut T {
    fn length(&self) -> Result {
        ::length(self)
    }

    fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<()> {
        ::read_at(self, buffer, offset)
    }
}

impl AsyncInput for T {
    fn length(&self) -> impl Future> {
        std::future::ready(self.length())
    }

    fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> impl Future> {
        std::future::ready(self.read_at(buffer, offset))
    }
}

impl AsyncInput for tokio::fs::File {
    async fn length(&self) -> Result {
        Ok(self.metadata().await?.len())
    }

    async fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<()> {
        self.seek(SeekFrom::Start(offset)).await?;
        self.read_exact(buffer).await?;
        Ok(())
    }
}

/// Result of features extraction.
pub enum FeaturesOrRuled {
    /// Features extracted for identification using AI.
    Features(Features),

    /// Content identified with rules.
    Ruled(ContentType),
}

impl FeaturesOrRuled {
    /// Extracts the features from a file (synchronously).
    ///
    /// Returns the content type directly if the file cannot be identified using AI.
    pub fn extract_sync(file: impl SyncInput) -> Result {
        exec(Self::extract(file))
    }

    /// Extracts the features from a file (asynchronously).
    ///
    /// Returns the content type directly if the file cannot be identified using AI.
    pub async fn extract_async(file: impl AsyncInput) -> Result {
        Self::extract(file).await
    }

    pub(crate) async fn extract(file: impl AsyncInput) -> Result {
        let config = &crate::model::CONFIG;
        let file_len = file.length().await?;
        if file_len == 0 {
            return Ok(FeaturesOrRuled::Ruled(ContentType::Empty));
        }
        let (first_block, features) = extract_features_async(config, file, file_len).await?;
        if features[config.min_file_size_for_dl - 1] != config.padding_token {
            return Ok(FeaturesOrRuled::Features(Features(features)));
        }
        debug_assert!(first_block.len() <= config.block_size);
        let content_type = match std::str::from_utf8(&first_block) {
            Ok(_) => ContentType::Txt,
            Err(_) => ContentType::Unknown,
        };
        Ok(FeaturesOrRuled::Ruled(content_type))
    }
}

async fn extract_features_async(
    config: &ModelConfig, mut file: impl AsyncInput, file_len: u64,
) -> Result<(Vec, Vec)> {
    debug_assert!(config.beg_size < config.block_size);
    debug_assert!(config.end_size < config.block_size);
    let buffer_size = std::cmp::min(config.block_size as u64, file_len) as usize;
    let mut content_beg = vec![0; buffer_size];
    file.read_at(&mut content_beg, 0).await?;
    let beg = strip_prefix(&content_beg);
    let mut end = vec![0; buffer_size];
    file.read_at(&mut end, file_len - buffer_size as u64).await?;
    let end = strip_suffix(&end);
    let mut features = vec![config.padding_token; config.features_size()];
    let split_features = config.split_features(&mut features);
    copy_features(split_features.beg, beg, 0);
    copy_features(split_features.end, end, 1);
    Ok((content_beg, features))
}

fn copy_features(dst: &mut [i32], src: &[u8], align: usize) {
    let len = std::cmp::min(dst.len(), src.len());
    let dst_len = dst.len(); // borrowing issue: cannot inline below
    let dst = &mut dst[(dst_len - len) * align..][..len];
    let src = &src[(src.len() - len) * align..][..len];
    for (dst, src) in dst.iter_mut().zip(src.iter()) {
        *dst = *src as i32;
    }
}

fn strip_prefix(xs: &[u8]) -> &[u8] {
    strip(xs, |xs| xs.split_first())
}

fn strip_suffix(xs: &[u8]) -> &[u8] {
    strip(xs, |xs| xs.split_last())
}

fn strip(mut xs: &[u8], mut split: impl FnMut(&[u8]) -> Option<(&u8, &[u8])>) -> &[u8] {
    while let Some((&x, ys)) = split(xs) {
        if !is_whitespace(x) {
            break;
        }
        xs = ys;
    }
    xs
}

fn is_whitespace(x: u8) -> bool {
    x.is_ascii_whitespace() || x == 0x0b
}

#[cfg(test)]
mod tests {
    use std::fs::File;
    use std::io::Read;

    use data_encoding::BASE64;
    use flate2::read::GzDecoder;
    use serde::Deserialize;

    use super::*;

    #[test]
    fn features_extraction_reference() {
        // We deny unknown fields to be sure we don't pass the tests by accident when the JSON
        // format is modified. Fields that are not used are simply marked as dead-code.
        #[derive(Debug, Deserialize)]
        #[serde(deny_unknown_fields)]
        struct Args {
            beg_size: usize,
            mid_size: usize,
            end_size: usize,
            block_size: usize,
            padding_token: i32,
            use_inputs_at_offsets: bool,
        }
        #[derive(Debug, Deserialize)]
        #[serde(deny_unknown_fields)]
        struct Metadata {
            #[allow(dead_code)] // debugging only
            core_content_size: usize,
            #[allow(dead_code)] // debugging only
            left_ws_num: usize,
            #[allow(dead_code)] // debugging only
            right_ws_num: usize,
        }
        #[derive(Debug, Deserialize)]
        #[serde(deny_unknown_fields)]
        struct Features {
            beg: Vec,
            mid: Vec,
            end: Vec,
            offset_0x8000_0x8007: Vec,
            offset_0x8800_0x8807: Vec,
            offset_0x9000_0x9007: Vec,
            offset_0x9800_0x9807: Vec,
        }
        #[derive(Debug, Deserialize)]
        #[serde(deny_unknown_fields)]
        struct Test {
            args: Args,
            #[allow(dead_code)] // debugging only
            metadata: Metadata,
            content_base64: String,
            features: Features,
        }
        const PATH: &str = "../../tests_data/reference/features_extraction_examples.json.gz";
        let mut tests = String::new();
        GzDecoder::new(File::open(PATH).unwrap()).read_to_string(&mut tests).unwrap();
        let tests: Vec = serde_json::from_str(&tests).unwrap();
        for test in tests {
            assert_eq!(test.args.mid_size, 0, "unsupported mid_size");
            assert!(!test.args.use_inputs_at_offsets, "unsupported use_inputs_at_offsets");
            assert!(test.features.mid.is_empty(), "unsupported mid");
            assert!(test.features.offset_0x8000_0x8007.is_empty(), "unsupported offset");
            assert!(test.features.offset_0x8800_0x8807.is_empty(), "unsupported offset");
            assert!(test.features.offset_0x9000_0x9007.is_empty(), "unsupported offset");
            assert!(test.features.offset_0x9800_0x9807.is_empty(), "unsupported offset");
            let config = ModelConfig {
                beg_size: test.args.beg_size,
                end_size: test.args.end_size,
                padding_token: test.args.padding_token,
                block_size: test.args.block_size,
                ..crate::model::CONFIG
            };
            let mut expected = Vec::new();
            expected.extend_from_slice(&test.features.beg);
            expected.extend_from_slice(&test.features.end);
            let content = BASE64.decode(test.content_base64.as_bytes()).unwrap();
            let actual = extract_features_async(&config, content.as_slice(), content.len() as u64);
            let actual = exec(actual).unwrap().1;
            let actual: Vec<_> = actual.into_iter().map(|x| x as usize).collect();
            assert_eq!(actual, expected, "{test:?}");
        }
    }
}


================================================
FILE: rust/lib/src/lib.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Determines file content types using AI.
//!
//! This library indirectly depends on the ONNX Runtime through the `ort` [crate][ort-crate]. The
//! final user is responsible to make sure the ONNX Runtime is available. The `ort` crate provides
//! [many options][ort-linking] in this regard. The simplest option is to enable the default [cargo
//! features][ort-features] of the `ort` crate by adding the following dependency to the
//! `Cargo.toml` file of the final binary:
//!
//! ```toml
//! [dependencies]
//! ort = "=2.0.0-rc.12"
//! ```
//!
//! # Examples
//!
//! ```rust
//! # fn main() -> magika::Result<()> {
//! // A Magika session can be used multiple times across multiple threads.
//! let mut magika = magika::Session::new()?;
//!
//! // Files can be identified from their path.
//! assert_eq!(magika.identify_file_sync("src/lib.rs")?.info().label, "rust");
//!
//! // Contents can also be identified directly from memory.
//! let result = magika.identify_content_sync(&b"#!/bin/sh\necho hello"[..])?;
//! assert_eq!(result.info().label, "shell");
//! # Ok(())
//! # }
//! ```
//!
//! [ort-crate]: https://crates.io/crates/ort
//! [ort-linking]: https://ort.pyke.io/setup/linking
//! [ort-features]: https://ort.pyke.io/setup/cargo-features

#![cfg_attr(feature = "_doc", feature(doc_cfg))]

pub use crate::builder::Builder;
pub use crate::content::{ContentType, MODEL_MAJOR_VERSION, MODEL_NAME};
pub use crate::error::{Error, Result};
pub use crate::file::{FileType, InferredType, OverwriteReason, TypeInfo};
pub use crate::input::{AsyncInput, Features, FeaturesOrRuled, SyncInput};
pub use crate::session::Session;

mod builder;
mod config;
mod content;
mod error;
mod file;
mod future;
mod input;
mod model;
mod session;

#[cfg(test)]
mod tests {
    use std::fs::File;
    use std::io::Read;

    use data_encoding::BASE64;
    use flate2::read::GzDecoder;
    use serde::Deserialize;

    use super::*;

    #[derive(Debug, Deserialize)]
    #[serde(deny_unknown_fields)]
    struct Prediction {
        dl: String,
        output: String,
        score: f32,
        overwrite_reason: String,
    }

    fn assert_float(actual: f32, expected: f32, debug: &str) {
        const PRECISION: f32 = 10000.;
        let actual = (actual * PRECISION).trunc() / PRECISION;
        let expected = (expected * PRECISION).trunc() / PRECISION;
        assert_eq!(actual, expected, "{debug}");
    }

    fn assert_prediction(actual: FileType, expected: Prediction, debug: &str) {
        let actual = match actual {
            FileType::Inferred(x) => x,
            FileType::Ruled(content_type) => {
                assert_eq!(content_type.info().label, expected.output, "{debug}");
                assert_eq!(1.0, expected.score, "{debug}");
                assert_eq!("none", expected.overwrite_reason, "{debug}");
                assert_eq!("undefined", expected.dl, "{debug}");
                return;
            }
            _ => unreachable!(),
        };
        assert_eq!(actual.content_type().info().label, expected.output, "{debug}");
        assert_float(actual.score, expected.score, debug);
        let overwrite_reason = match actual.content_type {
            None => "none",
            Some((_, OverwriteReason::LowConfidence)) => "low-confidence",
            Some((_, OverwriteReason::OverwriteMap)) => "overwrite-map",
        };
        assert_eq!(overwrite_reason, expected.overwrite_reason);
        assert_eq!(actual.inferred_type.info().label, expected.dl, "{debug}");
    }

    #[test]
    fn identify_by_path_reference() {
        #[derive(Debug, Deserialize)]
        #[serde(deny_unknown_fields)]
        struct Test {
            prediction_mode: String,
            path: String,
            status: String,
            prediction: Option,
        }
        let path =
            format!("../../tests_data/reference/{MODEL_NAME}-inference_examples_by_path.json.gz");
        let mut tests = String::new();
        GzDecoder::new(File::open(path).unwrap()).read_to_string(&mut tests).unwrap();
        let tests: Vec = serde_json::from_str(&tests).unwrap();
        let mut session = Session::new().unwrap();
        for test in tests {
            if test.prediction_mode != "high-confidence" {
                continue; // we only support high-confidence
            }
            assert_eq!(test.status, "ok"); // only scenario tested so far
            let expected = test.prediction.unwrap();
            let actual = session.identify_file_sync(format!("../../{}", test.path)).unwrap();
            assert_prediction(actual, expected, &test.path);
        }
    }

    #[test]
    fn identify_by_content_reference() {
        #[derive(Debug, Deserialize)]
        #[serde(deny_unknown_fields)]
        struct Test {
            prediction_mode: String,
            content_base64: String,
            status: String,
            prediction: Option,
        }
        let path = format!(
            "../../tests_data/reference/{MODEL_NAME}-inference_examples_by_content.json.gz"
        );
        let mut tests = String::new();
        GzDecoder::new(File::open(path).unwrap()).read_to_string(&mut tests).unwrap();
        let tests: Vec = serde_json::from_str(&tests).unwrap();
        let mut session = Session::new().unwrap();
        for test in tests {
            if test.prediction_mode != "high-confidence" {
                continue; // we only support high-confidence
            }
            assert_eq!(test.status, "ok"); // only scenario tested so far
            let expected = test.prediction.unwrap();
            let content = BASE64.decode(test.content_base64.as_bytes()).unwrap();
            let actual = session.identify_content_sync(content.as_slice()).unwrap();
            assert_prediction(actual, expected, &test.content_base64);
        }
    }
}


================================================
FILE: rust/lib/src/model.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// DO NOT EDIT, see link below for more information:
// https://github.com/google/magika/tree/main/rust/gen

use std::borrow::Cow;

use crate::config::ModelConfig;
use crate::ContentType;

pub(crate) const CONFIG: ModelConfig = ModelConfig {
    beg_size: 1024,
    end_size: 1024,
    min_file_size_for_dl: 8,
    padding_token: 256,
    block_size: 4096,
    thresholds: Cow::Borrowed(&THRESHOLDS),
    overwrite_map: Cow::Borrowed(&OVERWRITE_MAP),
};

#[rustfmt::skip]
const THRESHOLDS: [f32; ContentType::SIZE] = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5];
const OVERWRITE_MAP: [ContentType; ContentType::SIZE] = [
    ContentType::_3gp,
    ContentType::Ace,
    ContentType::Ai,
    ContentType::Aidl,
    ContentType::Apk,
    ContentType::Applebplist,
    ContentType::Appleplist,
    ContentType::Asm,
    ContentType::Asp,
    ContentType::Autohotkey,
    ContentType::Autoit,
    ContentType::Awk,
    ContentType::Batch,
    ContentType::Bazel,
    ContentType::Bib,
    ContentType::Bmp,
    ContentType::Bzip,
    ContentType::C,
    ContentType::Cab,
    ContentType::Cat,
    ContentType::Chm,
    ContentType::Clojure,
    ContentType::Cmake,
    ContentType::Cobol,
    ContentType::Coff,
    ContentType::Coffeescript,
    ContentType::Cpp,
    ContentType::Crt,
    ContentType::Crx,
    ContentType::Cs,
    ContentType::Csproj,
    ContentType::Css,
    ContentType::Csv,
    ContentType::Dart,
    ContentType::Deb,
    ContentType::Dex,
    ContentType::Dicom,
    ContentType::Diff,
    ContentType::Dm,
    ContentType::Dmg,
    ContentType::Doc,
    ContentType::Dockerfile,
    ContentType::Docx,
    ContentType::Dsstore,
    ContentType::Dwg,
    ContentType::Dxf,
    ContentType::Elf,
    ContentType::Elixir,
    ContentType::Emf,
    ContentType::Eml,
    ContentType::Empty,
    ContentType::Epub,
    ContentType::Erb,
    ContentType::Erlang,
    ContentType::Flac,
    ContentType::Flv,
    ContentType::Fortran,
    ContentType::Gemfile,
    ContentType::Gemspec,
    ContentType::Gif,
    ContentType::Gitattributes,
    ContentType::Gitmodules,
    ContentType::Go,
    ContentType::Gradle,
    ContentType::Groovy,
    ContentType::Gzip,
    ContentType::H5,
    ContentType::Handlebars,
    ContentType::Haskell,
    ContentType::Hcl,
    ContentType::Hlp,
    ContentType::Htaccess,
    ContentType::Html,
    ContentType::Icns,
    ContentType::Ico,
    ContentType::Ics,
    ContentType::Ignorefile,
    ContentType::Ini,
    ContentType::Internetshortcut,
    ContentType::Ipynb,
    ContentType::Iso,
    ContentType::Jar,
    ContentType::Java,
    ContentType::Javabytecode,
    ContentType::Javascript,
    ContentType::Jinja,
    ContentType::Jp2,
    ContentType::Jpeg,
    ContentType::Json,
    ContentType::Jsonl,
    ContentType::Julia,
    ContentType::Kotlin,
    ContentType::Latex,
    ContentType::Lha,
    ContentType::Lisp,
    ContentType::Lnk,
    ContentType::Lua,
    ContentType::M3u,
    ContentType::M4,
    ContentType::Macho,
    ContentType::Makefile,
    ContentType::Markdown,
    ContentType::Matlab,
    ContentType::Mht,
    ContentType::Midi,
    ContentType::Mkv,
    ContentType::Mp3,
    ContentType::Mp4,
    ContentType::Mscompress,
    ContentType::Msi,
    ContentType::Mum,
    ContentType::Npy,
    ContentType::Npz,
    ContentType::Nupkg,
    ContentType::Objectivec,
    ContentType::Ocaml,
    ContentType::Odp,
    ContentType::Ods,
    ContentType::Odt,
    ContentType::Ogg,
    ContentType::One,
    ContentType::Onnx,
    ContentType::Otf,
    ContentType::Outlook,
    ContentType::Parquet,
    ContentType::Pascal,
    ContentType::Pcap,
    ContentType::Pdb,
    ContentType::Pdf,
    ContentType::Pebin,
    ContentType::Pem,
    ContentType::Perl,
    ContentType::Php,
    ContentType::Pickle,
    ContentType::Png,
    ContentType::Po,
    ContentType::Postscript,
    ContentType::Powershell,
    ContentType::Ppt,
    ContentType::Pptx,
    ContentType::Prolog,
    ContentType::Proteindb,
    ContentType::Proto,
    ContentType::Psd,
    ContentType::Python,
    ContentType::Pythonbytecode,
    ContentType::Pytorch,
    ContentType::Qt,
    ContentType::R,
    ContentType::Unknown,
    ContentType::Txt,
    ContentType::Rar,
    ContentType::Rdf,
    ContentType::Rpm,
    ContentType::Rst,
    ContentType::Rtf,
    ContentType::Ruby,
    ContentType::Rust,
    ContentType::Scala,
    ContentType::Scss,
    ContentType::Sevenzip,
    ContentType::Sgml,
    ContentType::Shell,
    ContentType::Smali,
    ContentType::Snap,
    ContentType::Solidity,
    ContentType::Sql,
    ContentType::Sqlite,
    ContentType::Squashfs,
    ContentType::Srt,
    ContentType::Stlbinary,
    ContentType::Stltext,
    ContentType::Sum,
    ContentType::Svg,
    ContentType::Swf,
    ContentType::Swift,
    ContentType::Tar,
    ContentType::Tcl,
    ContentType::Textproto,
    ContentType::Tga,
    ContentType::Thumbsdb,
    ContentType::Tiff,
    ContentType::Toml,
    ContentType::Torrent,
    ContentType::Tsv,
    ContentType::Ttf,
    ContentType::Twig,
    ContentType::Txt,
    ContentType::Typescript,
    ContentType::Undefined,
    ContentType::Unknown,
    ContentType::Vba,
    ContentType::Vcxproj,
    ContentType::Verilog,
    ContentType::Vhdl,
    ContentType::Vtt,
    ContentType::Vue,
    ContentType::Wasm,
    ContentType::Wav,
    ContentType::Webm,
    ContentType::Webp,
    ContentType::Winregistry,
    ContentType::Wmf,
    ContentType::Woff,
    ContentType::Woff2,
    ContentType::Xar,
    ContentType::Xls,
    ContentType::Xlsb,
    ContentType::Xlsx,
    ContentType::Xml,
    ContentType::Xpi,
    ContentType::Xz,
    ContentType::Yaml,
    ContentType::Yara,
    ContentType::Zig,
    ContentType::Zip,
    ContentType::Zlibstream,
];

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(u32)]
#[allow(dead_code)] // only constructed through transmute
pub(crate) enum Label {
    _3gp,
    Ace,
    Ai,
    Aidl,
    Apk,
    Applebplist,
    Appleplist,
    Asm,
    Asp,
    Autohotkey,
    Autoit,
    Awk,
    Batch,
    Bazel,
    Bib,
    Bmp,
    Bzip,
    C,
    Cab,
    Cat,
    Chm,
    Clojure,
    Cmake,
    Cobol,
    Coff,
    Coffeescript,
    Cpp,
    Crt,
    Crx,
    Cs,
    Csproj,
    Css,
    Csv,
    Dart,
    Deb,
    Dex,
    Dicom,
    Diff,
    Dm,
    Dmg,
    Doc,
    Dockerfile,
    Docx,
    Dsstore,
    Dwg,
    Dxf,
    Elf,
    Elixir,
    Emf,
    Eml,
    Epub,
    Erb,
    Erlang,
    Flac,
    Flv,
    Fortran,
    Gemfile,
    Gemspec,
    Gif,
    Gitattributes,
    Gitmodules,
    Go,
    Gradle,
    Groovy,
    Gzip,
    H5,
    Handlebars,
    Haskell,
    Hcl,
    Hlp,
    Htaccess,
    Html,
    Icns,
    Ico,
    Ics,
    Ignorefile,
    Ini,
    Internetshortcut,
    Ipynb,
    Iso,
    Jar,
    Java,
    Javabytecode,
    Javascript,
    Jinja,
    Jp2,
    Jpeg,
    Json,
    Jsonl,
    Julia,
    Kotlin,
    Latex,
    Lha,
    Lisp,
    Lnk,
    Lua,
    M3u,
    M4,
    Macho,
    Makefile,
    Markdown,
    Matlab,
    Mht,
    Midi,
    Mkv,
    Mp3,
    Mp4,
    Mscompress,
    Msi,
    Mum,
    Npy,
    Npz,
    Nupkg,
    Objectivec,
    Ocaml,
    Odp,
    Ods,
    Odt,
    Ogg,
    One,
    Onnx,
    Otf,
    Outlook,
    Parquet,
    Pascal,
    Pcap,
    Pdb,
    Pdf,
    Pebin,
    Pem,
    Perl,
    Php,
    Pickle,
    Png,
    Po,
    Postscript,
    Powershell,
    Ppt,
    Pptx,
    Prolog,
    Proteindb,
    Proto,
    Psd,
    Python,
    Pythonbytecode,
    Pytorch,
    Qt,
    R,
    Randombytes,
    Randomtxt,
    Rar,
    Rdf,
    Rpm,
    Rst,
    Rtf,
    Ruby,
    Rust,
    Scala,
    Scss,
    Sevenzip,
    Sgml,
    Shell,
    Smali,
    Snap,
    Solidity,
    Sql,
    Sqlite,
    Squashfs,
    Srt,
    Stlbinary,
    Stltext,
    Sum,
    Svg,
    Swf,
    Swift,
    Tar,
    Tcl,
    Textproto,
    Tga,
    Thumbsdb,
    Tiff,
    Toml,
    Torrent,
    Tsv,
    Ttf,
    Twig,
    Txt,
    Typescript,
    Vba,
    Vcxproj,
    Verilog,
    Vhdl,
    Vtt,
    Vue,
    Wasm,
    Wav,
    Webm,
    Webp,
    Winregistry,
    Wmf,
    Woff,
    Woff2,
    Xar,
    Xls,
    Xlsb,
    Xlsx,
    Xml,
    Xpi,
    Xz,
    Yaml,
    Yara,
    Zig,
    Zip,
    Zlibstream,
}

pub(crate) const NUM_LABELS: usize = 214;
impl Label {
    pub(crate) fn content_type(self) -> ContentType {
        match self {
            Label::_3gp => ContentType::_3gp,
            Label::Ace => ContentType::Ace,
            Label::Ai => ContentType::Ai,
            Label::Aidl => ContentType::Aidl,
            Label::Apk => ContentType::Apk,
            Label::Applebplist => ContentType::Applebplist,
            Label::Appleplist => ContentType::Appleplist,
            Label::Asm => ContentType::Asm,
            Label::Asp => ContentType::Asp,
            Label::Autohotkey => ContentType::Autohotkey,
            Label::Autoit => ContentType::Autoit,
            Label::Awk => ContentType::Awk,
            Label::Batch => ContentType::Batch,
            Label::Bazel => ContentType::Bazel,
            Label::Bib => ContentType::Bib,
            Label::Bmp => ContentType::Bmp,
            Label::Bzip => ContentType::Bzip,
            Label::C => ContentType::C,
            Label::Cab => ContentType::Cab,
            Label::Cat => ContentType::Cat,
            Label::Chm => ContentType::Chm,
            Label::Clojure => ContentType::Clojure,
            Label::Cmake => ContentType::Cmake,
            Label::Cobol => ContentType::Cobol,
            Label::Coff => ContentType::Coff,
            Label::Coffeescript => ContentType::Coffeescript,
            Label::Cpp => ContentType::Cpp,
            Label::Crt => ContentType::Crt,
            Label::Crx => ContentType::Crx,
            Label::Cs => ContentType::Cs,
            Label::Csproj => ContentType::Csproj,
            Label::Css => ContentType::Css,
            Label::Csv => ContentType::Csv,
            Label::Dart => ContentType::Dart,
            Label::Deb => ContentType::Deb,
            Label::Dex => ContentType::Dex,
            Label::Dicom => ContentType::Dicom,
            Label::Diff => ContentType::Diff,
            Label::Dm => ContentType::Dm,
            Label::Dmg => ContentType::Dmg,
            Label::Doc => ContentType::Doc,
            Label::Dockerfile => ContentType::Dockerfile,
            Label::Docx => ContentType::Docx,
            Label::Dsstore => ContentType::Dsstore,
            Label::Dwg => ContentType::Dwg,
            Label::Dxf => ContentType::Dxf,
            Label::Elf => ContentType::Elf,
            Label::Elixir => ContentType::Elixir,
            Label::Emf => ContentType::Emf,
            Label::Eml => ContentType::Eml,
            Label::Epub => ContentType::Epub,
            Label::Erb => ContentType::Erb,
            Label::Erlang => ContentType::Erlang,
            Label::Flac => ContentType::Flac,
            Label::Flv => ContentType::Flv,
            Label::Fortran => ContentType::Fortran,
            Label::Gemfile => ContentType::Gemfile,
            Label::Gemspec => ContentType::Gemspec,
            Label::Gif => ContentType::Gif,
            Label::Gitattributes => ContentType::Gitattributes,
            Label::Gitmodules => ContentType::Gitmodules,
            Label::Go => ContentType::Go,
            Label::Gradle => ContentType::Gradle,
            Label::Groovy => ContentType::Groovy,
            Label::Gzip => ContentType::Gzip,
            Label::H5 => ContentType::H5,
            Label::Handlebars => ContentType::Handlebars,
            Label::Haskell => ContentType::Haskell,
            Label::Hcl => ContentType::Hcl,
            Label::Hlp => ContentType::Hlp,
            Label::Htaccess => ContentType::Htaccess,
            Label::Html => ContentType::Html,
            Label::Icns => ContentType::Icns,
            Label::Ico => ContentType::Ico,
            Label::Ics => ContentType::Ics,
            Label::Ignorefile => ContentType::Ignorefile,
            Label::Ini => ContentType::Ini,
            Label::Internetshortcut => ContentType::Internetshortcut,
            Label::Ipynb => ContentType::Ipynb,
            Label::Iso => ContentType::Iso,
            Label::Jar => ContentType::Jar,
            Label::Java => ContentType::Java,
            Label::Javabytecode => ContentType::Javabytecode,
            Label::Javascript => ContentType::Javascript,
            Label::Jinja => ContentType::Jinja,
            Label::Jp2 => ContentType::Jp2,
            Label::Jpeg => ContentType::Jpeg,
            Label::Json => ContentType::Json,
            Label::Jsonl => ContentType::Jsonl,
            Label::Julia => ContentType::Julia,
            Label::Kotlin => ContentType::Kotlin,
            Label::Latex => ContentType::Latex,
            Label::Lha => ContentType::Lha,
            Label::Lisp => ContentType::Lisp,
            Label::Lnk => ContentType::Lnk,
            Label::Lua => ContentType::Lua,
            Label::M3u => ContentType::M3u,
            Label::M4 => ContentType::M4,
            Label::Macho => ContentType::Macho,
            Label::Makefile => ContentType::Makefile,
            Label::Markdown => ContentType::Markdown,
            Label::Matlab => ContentType::Matlab,
            Label::Mht => ContentType::Mht,
            Label::Midi => ContentType::Midi,
            Label::Mkv => ContentType::Mkv,
            Label::Mp3 => ContentType::Mp3,
            Label::Mp4 => ContentType::Mp4,
            Label::Mscompress => ContentType::Mscompress,
            Label::Msi => ContentType::Msi,
            Label::Mum => ContentType::Mum,
            Label::Npy => ContentType::Npy,
            Label::Npz => ContentType::Npz,
            Label::Nupkg => ContentType::Nupkg,
            Label::Objectivec => ContentType::Objectivec,
            Label::Ocaml => ContentType::Ocaml,
            Label::Odp => ContentType::Odp,
            Label::Ods => ContentType::Ods,
            Label::Odt => ContentType::Odt,
            Label::Ogg => ContentType::Ogg,
            Label::One => ContentType::One,
            Label::Onnx => ContentType::Onnx,
            Label::Otf => ContentType::Otf,
            Label::Outlook => ContentType::Outlook,
            Label::Parquet => ContentType::Parquet,
            Label::Pascal => ContentType::Pascal,
            Label::Pcap => ContentType::Pcap,
            Label::Pdb => ContentType::Pdb,
            Label::Pdf => ContentType::Pdf,
            Label::Pebin => ContentType::Pebin,
            Label::Pem => ContentType::Pem,
            Label::Perl => ContentType::Perl,
            Label::Php => ContentType::Php,
            Label::Pickle => ContentType::Pickle,
            Label::Png => ContentType::Png,
            Label::Po => ContentType::Po,
            Label::Postscript => ContentType::Postscript,
            Label::Powershell => ContentType::Powershell,
            Label::Ppt => ContentType::Ppt,
            Label::Pptx => ContentType::Pptx,
            Label::Prolog => ContentType::Prolog,
            Label::Proteindb => ContentType::Proteindb,
            Label::Proto => ContentType::Proto,
            Label::Psd => ContentType::Psd,
            Label::Python => ContentType::Python,
            Label::Pythonbytecode => ContentType::Pythonbytecode,
            Label::Pytorch => ContentType::Pytorch,
            Label::Qt => ContentType::Qt,
            Label::R => ContentType::R,
            Label::Randombytes => ContentType::Randombytes,
            Label::Randomtxt => ContentType::Randomtxt,
            Label::Rar => ContentType::Rar,
            Label::Rdf => ContentType::Rdf,
            Label::Rpm => ContentType::Rpm,
            Label::Rst => ContentType::Rst,
            Label::Rtf => ContentType::Rtf,
            Label::Ruby => ContentType::Ruby,
            Label::Rust => ContentType::Rust,
            Label::Scala => ContentType::Scala,
            Label::Scss => ContentType::Scss,
            Label::Sevenzip => ContentType::Sevenzip,
            Label::Sgml => ContentType::Sgml,
            Label::Shell => ContentType::Shell,
            Label::Smali => ContentType::Smali,
            Label::Snap => ContentType::Snap,
            Label::Solidity => ContentType::Solidity,
            Label::Sql => ContentType::Sql,
            Label::Sqlite => ContentType::Sqlite,
            Label::Squashfs => ContentType::Squashfs,
            Label::Srt => ContentType::Srt,
            Label::Stlbinary => ContentType::Stlbinary,
            Label::Stltext => ContentType::Stltext,
            Label::Sum => ContentType::Sum,
            Label::Svg => ContentType::Svg,
            Label::Swf => ContentType::Swf,
            Label::Swift => ContentType::Swift,
            Label::Tar => ContentType::Tar,
            Label::Tcl => ContentType::Tcl,
            Label::Textproto => ContentType::Textproto,
            Label::Tga => ContentType::Tga,
            Label::Thumbsdb => ContentType::Thumbsdb,
            Label::Tiff => ContentType::Tiff,
            Label::Toml => ContentType::Toml,
            Label::Torrent => ContentType::Torrent,
            Label::Tsv => ContentType::Tsv,
            Label::Ttf => ContentType::Ttf,
            Label::Twig => ContentType::Twig,
            Label::Txt => ContentType::Txt,
            Label::Typescript => ContentType::Typescript,
            Label::Vba => ContentType::Vba,
            Label::Vcxproj => ContentType::Vcxproj,
            Label::Verilog => ContentType::Verilog,
            Label::Vhdl => ContentType::Vhdl,
            Label::Vtt => ContentType::Vtt,
            Label::Vue => ContentType::Vue,
            Label::Wasm => ContentType::Wasm,
            Label::Wav => ContentType::Wav,
            Label::Webm => ContentType::Webm,
            Label::Webp => ContentType::Webp,
            Label::Winregistry => ContentType::Winregistry,
            Label::Wmf => ContentType::Wmf,
            Label::Woff => ContentType::Woff,
            Label::Woff2 => ContentType::Woff2,
            Label::Xar => ContentType::Xar,
            Label::Xls => ContentType::Xls,
            Label::Xlsb => ContentType::Xlsb,
            Label::Xlsx => ContentType::Xlsx,
            Label::Xml => ContentType::Xml,
            Label::Xpi => ContentType::Xpi,
            Label::Xz => ContentType::Xz,
            Label::Yaml => ContentType::Yaml,
            Label::Yara => ContentType::Yara,
            Label::Zig => ContentType::Zig,
            Label::Zip => ContentType::Zip,
            Label::Zlibstream => ContentType::Zlibstream,
        }
    }
}


================================================
FILE: rust/lib/src/session.rs
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::path::Path;

use ndarray::Array2;

use crate::future::{exec, AsyncEnv, Env, SyncEnv};
use crate::{AsyncInput, Builder, Features, FeaturesOrRuled, FileType, Result, SyncInput};

/// A Magika session to identify files.
#[derive(Debug)]
pub struct Session {
    pub(crate) session: ort::session::Session,
}

impl Session {
    /// Creates a default session.
    pub fn new() -> Result {
        Session::builder().build()
    }

    /// Initializes a new Magika session builder with default values.
    pub fn builder() -> Builder {
        Builder::default()
    }

    /// Identifies a single file (synchronously).
    pub fn identify_file_sync(&mut self, file: impl AsRef) -> Result {
        exec(self.identify_file::(file.as_ref()))
    }

    /// Identifies a single file (asynchronously).
    pub async fn identify_file_async(&mut self, file: impl AsRef) -> Result {
        self.identify_file::(file.as_ref()).await
    }

    async fn identify_file(&mut self, file: &Path) -> Result {
        let metadata = E::symlink_metadata(file).await?;
        if metadata.is_dir() {
            Ok(FileType::Directory)
        } else if metadata.is_symlink() {
            Ok(FileType::Symlink)
        } else {
            debug_assert!(metadata.is_file());
            self.identify_content::(E::open(file).await?).await
        }
    }

    /// Identifies a single file from its content (synchronously).
    pub fn identify_content_sync(&mut self, file: impl SyncInput) -> Result {
        exec(self.identify_content::(file))
    }

    /// Identifies a single file from its content (asynchronously).
    pub async fn identify_content_async(&mut self, file: impl AsyncInput) -> Result {
        self.identify_content::(file).await
    }

    async fn identify_content(&mut self, file: impl AsyncInput) -> Result {
        match FeaturesOrRuled::extract(file).await? {
            FeaturesOrRuled::Ruled(content_type) => Ok(FileType::Ruled(content_type)),
            FeaturesOrRuled::Features(features) => self.identify_features::(&features).await,
        }
    }

    /// Identifies a single file from its features (synchronously).
    pub fn identify_features_sync(&mut self, features: &Features) -> Result {
        exec(self.identify_features::(features))
    }

    /// Identifies a single file from its features (asynchronously).
    pub async fn identify_features_async(&mut self, features: &Features) -> Result {
        self.identify_features::(features).await
    }

    async fn identify_features(&mut self, features: &Features) -> Result {
        let results = self.identify_features_batch::(std::slice::from_ref(features)).await?;
        let [result] = results.try_into().ok().unwrap();
        Ok(result)
    }

    /// Identifies multiple files in parallel from their features (synchronously).
    pub fn identify_features_batch_sync(&mut self, features: &[Features]) -> Result> {
        exec(self.identify_features_batch::(features))
    }

    /// Identifies multiple files in parallel from their features (asynchronously).
    pub async fn identify_features_batch_async(
        &mut self, features: &[Features],
    ) -> Result> {
        self.identify_features_batch::(features).await
    }

    async fn identify_features_batch(
        &mut self, features: &[Features],
    ) -> Result> {
        if features.is_empty() {
            return Ok(Vec::new());
        }
        let features_size = crate::model::CONFIG.features_size();
        let input = Array2::from_shape_vec(
            [features.len(), features_size],
            features.iter().flat_map(|x| &x.0).cloned().collect(),
        )?;
        let mut output = E::ort_session_run(&mut self.session, input).await?;
        let output = output.remove("target_label").unwrap();
        let output = output.try_extract_array()?;
        Ok(FileType::convert(output))
    }
}


================================================
FILE: rust/lib/test.sh
================================================
#!/bin/sh
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
. ../color.sh

x cargo check
x cargo check --features=serde
x cargo test --features=_test
x cargo fmt -- --check
x cargo clippy -- --deny=warnings
if cargo --version | grep -q nightly; then
  x env RUSTDOCFLAGS=--deny=warnings cargo doc --features=_doc
fi


================================================
FILE: rust/onnx/build.sh
================================================
#!/bin/bash
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
. ../color.sh

# This script builds ONNX Runtime as a static library to be linked in the Magika CLI.
#
# This is needed when building for manylinux since the prebuilt binaries provided by the ort crate
# have too recent dependency requirements.

if [ -e runtime ]; then
  info "Using cached static libraries."
else
  info "Make sure we have Python 3.x and cmake-3.27 or higher."
  python3 -m venv venv
  source venv/bin/activate
  python3 -m pip install cmake==3.31.6

  info "Clone ONNX Runtime repository (recursively)."
  git clone --recursive https://github.com/Microsoft/onnxruntime.git runtime
  cd runtime

  info "Checkout v1.24.2 because that's what ort v2.0.0-rc.12 supports."
  git checkout v1.24.2

  # The build fails with GCC 14 due to warnings as errors.
  sed -i '/function(onnxruntime_set_compile_flags/a\
    target_compile_options(${target_name} PRIVATE "$<$:-Wno-array-bounds>")\
    target_compile_options(${target_name} PRIVATE "$<$:-Wno-maybe-uninitialized>")\
    target_compile_options(${target_name} PRIVATE "$<$:-Wno-uninitialized>")' \
    cmake/CMakeLists.txt

  info "Build the static libraries."
  x ./build.sh --config=Release --parallel $ONNX_RUNTIME_BUILD_FLAGS

  info "Only keep the static libraries to save cache space."
  find build/Linux -not -name '*.a' \( -not -type d -or -empty \) -delete
  cd ..
fi

info "Point the ort crate to the locally built static library."
cd ../..
cat >> .cargo/config.toml < cli/output 2>&1

info "Updating CLI output in README.md"
( cd cli
  for i in $(seq 1 $(grep '^% ' README.md | wc -l)); do
    grep -n '^% ' README.md | cut -f1 -d: | head -n$i | tail -n1 | while read line; do
      sed -i $line',/```/{'$line'p;/```/!d}' README.md
      cmd="$(head -n$line README.md | tail -n1 | sed 's/^% //')"
      ( cd ../..; eval "$cmd"; ) 2>/dev/null > tmp
      sed -i $line'r tmp' README.md
    done
  done
  rm tmp
  sed -i 's/ \+$//' README.md
)

info "Updating ort version in library documentation"
( cd lib
  VERSION=$(sed -n '/^\[dependencies.ort\]$/,/^$/{s/^version = //p}' Cargo.toml)
  sed -Ei 's#^(//! ort =) .*$#\1 '"$VERSION"'#' src/lib.rs
)

if [ "$1" = --check ]; then
  if ! git diff --exit-code; then
    [ -n "$CI" ] && todo 'Execute ./sync.sh from the rust directory'
    error 'Generated files are not in sync'
  fi
fi
success "Generated files are synced"


================================================
FILE: rust/taplo.toml
================================================
[formatting]
column_width = 100
reorder_arrays = true
reorder_keys = true

[[rule]]
formatting = { reorder_keys = false }
keys = ["build-dependencies.*", "dependencies.*", "dev-dependencies.*", "package"]

[[rule]]
formatting = { reorder_arrays = false }
keys = ["package.metadata.deb"]


================================================
FILE: rust/test.sh
================================================
#!/bin/sh
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
. ./color.sh

TOOLCHAINS='stable nightly'
[ -z "$CI" ] || TOOLCHAINS=$(rustup show active-toolchain | sed 's/-.*//')

for toolchain in $TOOLCHAINS; do
  for dir in gen lib cli; do
    info "Running tests from $dir with $toolchain"
    ( cd $dir && rustup run $toolchain ./test.sh; )
  done
done

./sync.sh --check


================================================
FILE: tests_data/README.md
================================================
# Tests Data

We use these files for regressions testing.

These files were not (and should not) be used for training purposes.

They are organized by directory:
- `basic/`: a number of simple files of various content types.
- `mitra/`: a selection of the files available at [https://github.com/corkami/mitra](https://github.com/corkami/mitra/tree/master/input).


================================================
FILE: tests_data/basic/asm/code.asm
================================================
.section .text
	xor    %eax,%eax
	push   %eax
	push   $0x68732f2f
	push   $0x6e69622f
	mov    %esp,%ebx
	push   %eax
	push   %ebx
	mov    %esp,%ecx
	mov    $0xb,%al
	int    $0x80


================================================
FILE: tests_data/basic/batch/simple.bat
================================================
@echo off
echo %1
echo %2
echo %3

================================================
FILE: tests_data/basic/c/code.c
================================================
#include 

int main() {
    char c;
    printf("Enter a character: ");
    scanf("%c", &c);

    if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
        printf("%c is a letter");
    else
        printf("%c is not a letter", c);

    return 0;
}

================================================
FILE: tests_data/basic/css/code.css
================================================
/* table.mycv-entry { */
  /* border: 3px; */
  /* background-color: red; */
/* } */

table.cv-entry {
  border: 0px;
}

.cv-entry tbody tr td,
tbody tr th {
  background-color: #ffffff;
  border: 0px;
}

div.cv-entry {
  margin: 0px 10px 10px 10px;
  text-align: left
}

span.doctitle {
  font-weight: bold;
  display: block;
  /*color:#dcb975;*/
  color:#000000;
  text-align: left
}

span.docauthors {
  display: block;
}

span.docproc {
  font-style: italic;
}

span.doclink {
  display:block;
}

img.social-icon {
  display:inline;
  margin: 0px;
  vertical-align:middle;
}

table.contact-info {
  border: 0px;
}

.contact-info tbody tr td,
tbody tr th {
  background-color: #ffffff;
  border: 0px;
}


================================================
FILE: tests_data/basic/csv/magika_test.csv
================================================
Name,Value1,Value2,Value3
Test1,1,10,100
Test2,2,20,200
Test3,3,30,300
Test4,4,40,400
Test5,5,50,500

================================================
FILE: tests_data/basic/dockerfile/Dockerfile
================================================
# syntax=docker/dockerfile:1

ARG PYTHON_VERSION=3.11
FROM python:${PYTHON_VERSION}-slim as base

WORKDIR /magika

# This requires buildx
# RUN --mount=type=cache,target=/root/.cache/pip \
#     pip install magika

RUN pip install magika

ENTRYPOINT ["magika"]


================================================
FILE: tests_data/basic/eml/sample.eml
================================================
Delivered-To: samplelefay@gmail.com
Received: by 2002:a05:6e04:60c8:b0:358:f759:ca6 with SMTP id do8csp3407339imd;
        Sat, 24 May 2025 13:12:33 -0700 (PDT)
X-Google-Smtp-Source: AGHT+IE2JLkhE/TNDdtqP0aHL2ofqoWkUhPk4z8fSmHFWHbViXC85kImFXv+Ve6tpwRtChavAQEY
X-Received: by 2002:a05:6402:28a5:b0:602:b6fd:150a with SMTP id 4fb4d7f45d1cf-602da9d8a34mr2169618a12.33.1748117553220;
        Sat, 24 May 2025 13:12:33 -0700 (PDT)
ARC-Seal: i=1; a=rsa-sha256; t=1748117553; cv=none;
        d=google.com; s=arc-20240605;
        b=SddPvSqiWCZ3hcLsPoKY5mgF8G90kpPEboy8YkxV4AqqgrNfPla4NETejNpnuJvLJI
         byt+SaYq7Ve2RoJoDkZSBaT4J2RnU0MA4QW3F7YtWHrCgyo+6O/RvB9/ZTdrrdG58Z54
         p0jFd0pWF+pTEhh71+Y2rDrbDwq9D2omwUp22IaFUZsL7KiLOlKQAMdaIF1GlmAPwEKM
         epNVkNFNIzE/T+051fMcrUm6dIVe9xBIIMv5HJ4px+beQM/cHy7Jgj9Ryxh7l7CYpwmJ
         dJwvYqQRrZ3XAv65p/A55YDtv5OFk82723B9MeW/m5G1XB8T4rFsBbcpcJQxrC5OfXL5
         P6lw==
ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605;
        h=content-transfer-encoding:mime-version:list-unsubscribe
         :list-unsubscribe-post:message-id:subject:from:to:date
         :dkim-signature;
        bh=GBTxS/zUYzBpSA9sl7FQde1QfK2F+C1PfO1VwyLNlxU=;
        fh=GMaPNuYHJRKUGsaZzBcjeSmH+fuDvdR3OD4VRmo8LKk=;
        b=Rrv9c6Y6Z0aAUmlV7kkSgHmjexkqZaryb+ewpcGXZAJLjwG1xT36eEr+xNSza5JUl9
         C+bPB/Roa4aLghXgOUie709Unq91eI6EoMKxltvYryLMShqQf2lUPZLJBH324ARNB0rZ
         G+IjcJwQb8m1ygBkN9f72s6QW9a2L1uWoNZ9FxIEEsP2RZMiLcORzEhC4mNFdydQm7gA
         90EKqelXscw7mii3yfDfmoCb5fnbodG4SDhvQxpRLmv1RosBpDDh92jbKcH8MkYkbzfX
         Bnylixa3LtK0On61xFeg/WyEIMBRgaBP2pBzy9nJhwsx8+1nfGR/ZhmPqUrlz2WJ1FOo
         46ag==;
        dara=google.com
ARC-Authentication-Results: i=1; mx.google.com;
       dkim=pass header.i=@anonymousemail.eu header.s=mail header.b=qPEZ9skF;
       spf=pass (google.com: domain of noreply@anonymousemail.eu designates 5.45.184.142 as permitted sender) smtp.mailfrom=noreply@anonymousemail.eu;
       dmarc=pass (p=REJECT sp=REJECT dis=NONE) header.from=anonymousemail.eu
Return-Path: 
Received: from s9.inboxpress.eu (s9.inboxpress.eu. [5.45.184.142])
        by mx.google.com with ESMTPS id 4fb4d7f45d1cf-6045eb06b43si298940a12.544.2025.05.24.13.12.31
        for 
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Sat, 24 May 2025 13:12:33 -0700 (PDT)
Received-SPF: pass (google.com: domain of noreply@anonymousemail.eu designates 5.45.184.142 as permitted sender) client-ip=5.45.184.142;
Authentication-Results: mx.google.com;
       dkim=pass header.i=@anonymousemail.eu header.s=mail header.b=qPEZ9skF;
       spf=pass (google.com: domain of noreply@anonymousemail.eu designates 5.45.184.142 as permitted sender) smtp.mailfrom=noreply@anonymousemail.eu;
       dmarc=pass (p=REJECT sp=REJECT dis=NONE) header.from=anonymousemail.eu
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=anonymousemail.eu;
	s=mail; t=1748117551;
	bh=GBTxS/zUYzBpSA9sl7FQde1QfK2F+C1PfO1VwyLNlxU=;
	h=Date:To:From:Subject:List-Unsubscribe:From;
	b=qPEZ9skFQk06ORoxmFFYnqX188C7IfQLRyph7CHfLArHqw0r8RSzpQfpUjpnmRKED
	 wrm2jqxdj6L+2sNUEZGvs7HznrOrhYY9/RSzVOvGBQ0PMl7Fk0cvU32rm6Tu8+eGYp
	 7KNpMO4TCJh6oOZ4x33cU+3HHeEZSjPCYO8zBujJherHw5fTEx4VyfuBQpAioVbBkf
	 B5CGscCax1bnoQJBbpFENs6vHZgXvjJcgSxD01g2Hmv5a9mV/7c4VD6WsrvePcEsW+
	 tKTHJBXw1b+xyh4Jv3JjP4KGO2SWTlD+F5bIQxDRGiUtg8KXSHGBaKaGTEhfEduoXX
	 PTxWm9Kg9SC6w==
Received: from authenticated-user (s9.inboxpress.eu [5.45.184.142])
	(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
	 key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256)
	(No client certificate requested)
	by s9.inboxpress.eu (Postfix) with ESMTPSA id A66156601C
	for ; Sat, 24 May 2025 20:12:31 +0000 (UTC)
Date: Sat, 24 May 2025 20:12:31 +0000
To: samplelefay@gmail.com
From: Anonymousemail 
Subject: =?UTF-8?Q?Welcome_to_your_sample_email._=F0=9F=99=83?=
Message-ID: <1182da168e8347704d33d53e86784b69@anonymousemail.eu>
List-Unsubscribe-Post: List-Unsubscribe=One-Click
List-Unsubscribe: 
MIME-Version: 1.0
Content-Type: multipart/alternative;
 boundary="b1=_jkjvSBcyLEOb6jiwQT8Nx1r1IwoULznyMtSrrJRQO0A"
Content-Transfer-Encoding: 8bit

--b1=_jkjvSBcyLEOb6jiwQT8Nx1r1IwoULznyMtSrrJRQO0A
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Powered by Anonymousemail

You've been sent a sample email! 🎉

--b1=_jkjvSBcyLEOb6jiwQT8Nx1r1IwoULznyMtSrrJRQO0A
Content-Type: text/html; charset=UTF-8
Content-Transfer-Encoding: 8bit

Powered by Anonymousemail

You've been sent a sample email! 🎉

--b1=_jkjvSBcyLEOb6jiwQT8Nx1r1IwoULznyMtSrrJRQO0A-- ================================================ FILE: tests_data/basic/empty/empty_file ================================================ ================================================ FILE: tests_data/basic/handlebars/example.handlebars ================================================ {{#if isUserLoggedIn}} Welcome, {{username}}! {{#each notifications}}

{{this}}

{{/each}} {{else}} Please log in. {{/if}} ================================================ FILE: tests_data/basic/html/doc.html ================================================ 301 Moved Permanently

301 Moved Permanently


nginx/1.18.0 (Ubuntu)
================================================ FILE: tests_data/basic/ignorefile/example.ignorefile ================================================ .DS_Store .classpath .project .target/ .settings/ ================================================ FILE: tests_data/basic/ignorefile/other.ignorefile ================================================ # Standard build outputs /dist /build *.log *.tmp .DS_Store Thumbs.db node_modules/ !package.json **/*.min.js cache/?lockfile temp-*/ /log/* secret ================================================ FILE: tests_data/basic/ini/doc.ini ================================================ [pytest] log_cli = 1 log_level = WARNING filterwarnings = ignore::DeprecationWarning markers = smoketest slow ================================================ FILE: tests_data/basic/javascript/code.js ================================================ (function(){/* Copyright The Closure Library Authors. SPDX-License-Identifier: Apache-2.0 */ var n=this||self,p=function(a,b){a=a.split(".");var c=n;a[0]in c||"undefined"==typeof c.execScript||c.execScript("var "+a[0]);for(var d;a.length&&(d=a.shift());)a.length||void 0===b?c=c[d]&&c[d]!==Object.prototype[d]?c[d]:c[d]={}:c[d]=b};function q(){for(var a=r,b={},c=0;c>4);64!=h&&(c+=String.fromCharCode(f<<4&240|h>>2),64!=g&&(c+=String.fromCharCode(h<<6&192|g)))}};var w={},y=function(a){w.TAGGING=w.TAGGING||[];w.TAGGING[a]=!0};var ba=Array.isArray,ca=function(a,b){if(a&&ba(a))for(var c=0;cc?a.href:a.href.substr(0,c));a=c;break;case "protocol":a=d;break;case "host":a=a.hostname.replace(N,"").toLowerCase();c&&(c=/^www\d*\./.exec(a))&&c[0]&&(a=a.substr(c[0].length));break;case "port":a=String(Number(a.port)||("http"===d?80:"https"===d?443:""));break;case "path":a.pathname||a.hostname||y(1);a="/"===a.pathname.substr(0,1)?a.pathname:"/"+a.pathname;a=a.split("/");0<=[].indexOf(a[a.length-1])&&(a[a.length-1]="");a=a.join("/");break;case "query":a=a.search.replace("?","");break;case "extension":a= a.pathname.split(".");a=1>2;l=(l&3)<<4|t>>4;t=(t&15)<<2|x>>6;x&=63;m||(x=64,k||(t=64));h.push(r[ra],r[l],r[t],r[x])}f.call(e,h.join(""))}}a=b.join("*");return["1",la(a),a].join("*")}; function la(a,b){a=[J.userAgent,(new Date).getTimezoneOffset(),J.userLanguage||J.language,Math.floor((new Date(Date.now())).getTime()/60/1E3)-(void 0===b?0:b),a].join("*");if(!(b=S)){b=Array(256);for(var c=0;256>c;c++){for(var d=c,e=0;8>e;e++)d=d&1?d>>>1^3988292384:d>>>1;b[c]=d}}S=b;b=4294967295;for(c=0;c>>8^S[(b^a.charCodeAt(c))&255];return((b^-1)>>>0).toString(36)} function ma(a){return function(b){var c=R(G.location.href),d=c.search.replace("?","");a:{var e=d.split("&");for(var f=0;fb;++b){var c=ja.exec(a);if(c){var d=c;break b}a=decodeURIComponent(a)}d=void 0}if(d&&"1"===d[1]){var e=d[2],f=d[3];b:{for(d=0;3>d;++d)if(e===la(f,d)){var h=!0;break b}h=!1}if(h){var g=f;break a}y(7)}}g=void 0}e=g;if(void 0!==e){g={};var k=e?e.split("*"):[];for(e=0;e+1>21:b}return b};var $c=function(a){this.C=a||[]};$c.prototype.set=function(a){this.C[a]=!0};$c.prototype.get=function(a){return this.C[a]};$c.prototype.encode=function(){for(var a=[],b=0;b\x3c/script>'))):(c=M.createElement("script"),c.type="text/javascript",c.async=!0,c.src=ff.createScriptURL(a),d&&(c.onload=d),e&&(c.onerror=e),b&&(c.id=b),g&&c.setAttribute("nonce", g),a=M.getElementsByTagName("script")[0],a.parentNode.insertBefore(c,a))}},be=function(a,b){return E(M.location[b?"href":"search"],a)},E=function(a,b){return(a=a.match("(?:&|#|\\?)"+K(b).replace(/([.*+?^=!:${}()|\[\]\/\\])/g,"\\$1")+"=([^&#]*)"))&&2==a.length?a[1]:""},xa=function(){var a=""+M.location.hostname;return 0==a.indexOf("www.")?a.substring(4):a},de=function(a,b){var c=a.indexOf(b);if(5==c||6==c)if(a=a.charAt(c+b.length),"/"==a||"?"==a||""==a||":"==a)return!0;return!1},of=function(a,b){var c= M.referrer;if(/^(https?|android-app):\/\//i.test(c)){if(a)return c;a="//"+M.location.hostname;if(!de(c,a))return b&&(b=a.replace(/\./g,"-")+".cdn.ampproject.org",de(c,b))?void 0:c}},za=function(a,b){if(1==b.length&&null!=b[0]&&"object"===typeof b[0])return b[0];for(var c={},d=Math.min(a.length+1,b.length),e=0;eg.length||!c&&3!==g.length)a&&(a.na=!0);else if(Number(g[1])){d[b[e].ja]?a&&(a.pa=!0):d[b[e].ja]=[];var ca={version:g[0],timestamp:1E3*Number(g[1]),qa:g[2]};c&&3=b.length)wc(a,b,c);else if(8192>=b.length)x(a,b,c)||wd(a,b,c)||wc(a,b,c);else throw ge("len",b.length),new Da(b.length);},pe=function(a,b, c,d){d=d||ua;wd(a+"?"+b,"",d,c)},wc=function(a,b,c){var d=ta(a+"?"+b);d.onload=d.onerror=function(){d.onload=null;d.onerror=null;c()}},wd=function(a,b,c,d){var e=O.XMLHttpRequest;if(!e)return!1;var g=new e;if(!("withCredentials"in g))return!1;a=a.replace(/^http:/,"https:");g.open("POST",a,!0);g.withCredentials=!0;g.setRequestHeader("Content-Type","text/plain");g.onreadystatechange=function(){if(4==g.readyState){if(d&&"text/plain"===g.getResponseHeader("Content-Type"))try{Ea(d,g.responseText,c)}catch(ca){ge("xhr", "rsp"),c()}else c();g=null}};g.send(b);return!0},Ea=function(a,b,c){if(1>b.length)ge("xhr","ver","0"),c();else if(3=100*R(a,Ka))throw"abort";}function Ma(a){if(G(P(a,Na)))throw"abort";}function Oa(){var a=M.location.protocol;if("http:"!=a&&"https:"!=a)throw"abort";} function pf(a){var b=!1,c=!1;if(vd.get(89)){c=!0;var d=a.get(kb),e=M.location;if(e){var g=e.pathname||"";"/"!=g.charAt(0)&&(g="/"+g);e=e.protocol+"//"+e.hostname+g+e.search;d&&0===d.indexOf(e)||(b=!0)}}!c&&vd.get(90)&&(c=!0,d=a.get(lb),e=of(!!a.get(ec),!!a.get(Kd)),d!==e&&(b=!0));!c&&vd.get(91)&&(c=!0,a.get(qf)!==M.title&&(b=!0));return c&&!b} function Pa(a){try{O.navigator.sendBeacon?J(42):O.XMLHttpRequest&&"withCredentials"in new O.XMLHttpRequest&&J(40)}catch(c){}a.set(ld,Td(a),!0);a.set(Ac,R(a,Ac)+1);var b=[];ue.map(function(c,d){d.F&&(c=a.get(c),void 0!=c&&c!=d.defaultValue&&("boolean"==typeof c&&(c*=1),b.push(d.F+"="+K(""+c))))});!1===a.get(xe)&&b.push("npa=1");b.push("z="+Bd());pf(a)&&J(109);a.set(Ra,b.join("&"),!0)} function Sa(a){var b=P(a,fa);!b&&a.get(Vd)&&(b="beacon");var c=P(a,gd),d=P(a,oe),e=c||(d||bd(!1)+"")+"/collect",g=a.Z(Ia),ca=P(a,Ra),l=P(a,Na);switch(P(a,ad)){case "d":e=c||(d||bd(!1)+"")+"/j/collect";b=a.get(qe)||void 0;pe(e,ca,b,g);break;default:b?(g=g||ua,"image"==b?wc(e,ca,g):"xhr"==b&&wd(e,ca,g)||"beacon"==b&&x(e,ca,g)||ba(e,ca,g)):ba(e,ca,g)}ca=h(l);g=ca.hitcount;ca.hitcount=g?g+1:1;ca.first_hit||(ca.first_hit=(new Date).getTime());delete h(l).pending_experiments;a.set(Ia,ua,!0);if(rf(a))if(ca= P(a,Na),l=sf[ca])for(ca=0;cag.length&&g.push&&g.push(uf(a));else vf[ca]=vf[ca]||[],30>vf[ca].length&&vf[ca].push(uf(a))}function Hc(a){qc().expId&&a.set(Nc,qc().expId);qc().expVar&&a.set(Oc,qc().expVar);var b=P(a,Na);if(b=h(b).pending_experiments){var c=[];for(d in b)b.hasOwnProperty(d)&&b[d]&&c.push(encodeURIComponent(d)+"."+encodeURIComponent(b[d]));var d=c.join("!")}else d=void 0;d&&((b=a.get(m))&&(d=b+"!"+d),a.set(m,d,!0))} function cd(){if(O.navigator&&"preview"==O.navigator.loadPurpose)throw"abort";}function yd(a){var b=O.gaDevIds||[];if(ka(b)){var c=a.get("&did");qa(c)&&0b-9E5};var hd=function(){return Math.round(2147483647*Math.random())},Bd=function(){try{var a=new Uint32Array(1);O.crypto.getRandomValues(a);return a[0]&2147483647}catch(b){return hd()}};function Ta(a){var b=R(a,Ua);500<=b&&J(15);var c=P(a,Va);if("transaction"!=c&&"item"!=c){c=R(a,Wa);var d=(new Date).getTime(),e=R(a,Xa);0==e&&a.set(Xa,d);e=Math.round(2*(d-e)/1E3);0=c)throw"abort";a.set(Wa,--c)}a.set(Ua,++b)};var Ya=function(){this.data=new ee};Ya.prototype.get=function(a){var b=$a(a),c=this.data.get(a);b&&void 0==c&&(c=ea(b.defaultValue)?b.defaultValue():b.defaultValue);return b&&b.Z?b.Z(this,a,c):c};var P=function(a,b){a=a.get(b);return void 0==a?"":""+a},R=function(a,b){a=a.get(b);return void 0==a||""===a?0:Number(a)};Ya.prototype.Z=function(a){return(a=this.get(a))&&ea(a)?a:ua}; Ya.prototype.set=function(a,b,c){if(a)if("object"===typeof a)for(var d in a)a.hasOwnProperty(d)&&ab(this,d,a[d],c);else ab(this,a,b,c)}; var ab=function(a,b,c,d){if(void 0!=c)switch(b){case Na:wb.test(c)}var e=$a(b);e&&e.o?e.o(a,b,c,d):a.data.set(b,c,d)},gf={hitPayload:88,location:89,referrer:90,title:91,buildHitTask:93,sendHitTask:94,displayFeaturesTask:95,customTask:97,cookieName:98,cookieDomain:99,cookiePath:100,cookieExpires:101,cookieUpdate:102,cookieFlags:103,storage:104,_x_19:105,transportUrl:106,allowAdFeatures:107,sampleRate:108}; function hf(a,b){var c=gf[a];c&&J(c);"displayFeaturesTask"===a&&void 0==b&&J(96);/.*Task$/.test(a)&&J(92)}function mf(a,b){if(a)if("object"===typeof a)for(var c in a)a.hasOwnProperty(c)&&hf(c,b);else hf(a,b)};var ue=new ee,ve=[],bb=function(a,b,c,d,e){this.name=a;this.F=b;this.Z=d;this.o=e;this.defaultValue=c};function $a(a){var b=ue.get(a);if(!b)for(var c=0;c=b?!1:!0},gc=function(a){var b={};if(Ec(b)||Fc(b)){var c=b[Eb];void 0==c||Infinity==c||isNaN(c)||(0c)a[b]=void 0},Fd=function(a){return function(b){if("pageview"==b.get(Va)&&!a.I){a.I=!0;var c=aa(b),d=0a.length)J(12);else{for(var d=[],e=0;e=a&&d.push({hash:ca[0],R:e[g],O:ca})}if(0!=d.length)return 1==d.length?d[0]:Zc(b,d)||Zc(c,d)||Zc(null,d)||d[0]}function Zc(a,b){if(null==a)var c=a=1;else c=La(a),a=La(D(a,".")?a.substring(1):"."+a);for(var d=0;de?"?":"&")+a+b.substring(e);b=b.replace(/&+_ga=/,"&_ga=");return b=b.replace(RegExp("&+_gac="),"&_gac=")},rd=function(a,b){if(b&&b.action)if("get"==b.method.toLowerCase()){a=a.target.get("linkerParam").split("&"); for(var c=0;carguments.length)){if("string"===typeof arguments[0]){var b=arguments[0];var c=[].slice.call(arguments,1)}else b=arguments[0]&&arguments[0][Va],c=arguments;b&&(c=za(me[b]||[],c),c[Va]=b,mf(c),this.model.set(c,void 0,!0),this.filters.D(this.model),this.model.data.m={})}};pc.prototype.ma=function(a,b){var c=this;u(a,c,b)||(v(a,function(){u(a,c,b)}),y(String(c.get(V)),a,void 0,b,!0))}; function td(a,b){var c=P(a,U);a.data.set(la,"_ga"==c?"_gid":c+"_gid");if("cookie"==P(a,ac)){hc=!1;c=Ca(P(a,U));c=Xd(a,c);if(!c){c=P(a,W);var d=P(a,$b)||xa();c=Xc("__utma",d,c);void 0!=c?(J(10),c=c.O[1]+"."+c.O[2]):c=void 0}c&&(hc=!0);if(d=c&&!a.get(Hd))if(d=c.split("."),2!=d.length)d=!1;else if(d=Number(d[1])){var e=R(a,Zb);d=d+e<(new Date).getTime()/1E3}else d=!1;d&&(c=void 0);c&&(a.data.set(xd,c),a.data.set(Q,c),(c=uc(a))&&a.data.set(I,c));a.get(je)&&(c=a.get(ce),d=a.get(ie),!c||d&&"aw.ds"!=d)&& (c={},d=(M?df(c):{})[P(a,Na)],le(c),d&&0!=d.length&&(c=d[0],a.data.set(fe,c.timestamp/1E3),a.data.set(ce,c.qa)));a.get(je)&&(c=a.get(Se),d={},e=(M?df(d,"_gac_gb",!0):{})[P(a,Na)],ef(d),e&&0!=e.length&&(d=e[0],e=d.qa,c&&c!==e||(d.labels&&d.labels.length&&(e+="."+d.labels.join(".")),a.data.set(Te,d.timestamp/1E3),a.data.set(Se,e))))}if(a.get(Hd)){c=be("_ga",!!a.get(cc));var g=be("_gl",!!a.get(cc));d=De.get(a.get(cc));e=d._ga;g&&0=ca[0]||0>= ca[1]?"":ca.join("x");a.set(rb,c);c=a.set;var l;if((e=(e=O.navigator)?e.plugins:null)&&e.length)for(g=0;ga.split("/")[0].indexOf(":")&&(a=g+d[2].substring(0,d[2].lastIndexOf("/"))+"/"+a);b.href=a;c=kf(b);return{protocol:(b.protocol||"").toLowerCase(),host:c[0],port:c[1],path:c[2],query:b.search||"",url:a||""}},cf=function(a,b){return a&& 0<=a.indexOf("/")?a:(b||bd(!1))+"/plugins/ua/"+a};var Z={ga:function(){Z.fa=[]}};Z.ga();Z.D=function(a){var b=Z.J.apply(Z,arguments);b=Z.fa.concat(b);for(Z.fa=[];0c;c++){var d=b[c].src;if(d&&0==d.indexOf(bd(!0)+"/analytics")){b=!0;break a}}b=!1}b&&(Ba=!0)}(O.gaplugins=O.gaplugins||{}).Linker=Dc;b=Dc.prototype;C("linker",Dc);X("decorate",b,b.ca,20);X("autoLink",b,b.S,25);X("passthrough",b,b.$,25);C("displayfeatures",fd);C("adfeatures",fd);Z.D.apply(N,a)}};var xf=N.N,yf=O[gb];yf&&yf.r?xf():z(xf);z(function(){Z.D(["provide","render",ua])});})(window); ================================================ FILE: tests_data/basic/jinja/example.j2 ================================================ {% extends "base.html" %} {% block content %}

Welcome, {{ user.username }}

{% if user.is_admin %}

Admin privileges granted. You can go to the admin dashboard.

{% else %}

Regular user access. You can view your profile.

{% endif %}
    {% for item in items %}
  • {{ item.name }} - {{ item.price | round(2) }} {{ currency_symbol }}
  • {% else %}
  • No items available.
  • {% endfor %}
{% set now = current_time() %}

Page generated at: {{ now.strftime('%Y-%m-%d %H:%M:%S') }}

{% endblock %} ================================================ FILE: tests_data/basic/json/doc.json ================================================ { "3gp": { "name": "3gp", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ace": { "name": "ace", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "aff": { "name": "aff", "extensions": [ "aff" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ai": { "name": "ai", "extensions": [ "ai" ], "mime_type": "application/pdf", "group": "document", "magic": "PDF document", "description": "Adobe Illustrator Artwork", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "ai", "target_label": "ai", "correct_labels": [ "ai", "pdf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "algol68": { "name": "algol68", "extensions": [ "a68" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "apk": { "name": "apk", "extensions": [ "apk" ], "mime_type": "application/vnd.android.package-archive", "group": "executable", "magic": "Java archive data", "description": "Android package", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "apk", "correct_labels": [ "apk" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "appleplist": { "name": "appleplist", "extensions": [ "plist" ], "mime_type": "application/x-plist", "group": "application", "magic": "Apple binary property list", "description": "Android property list", "vt_type": "appleplist", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "text" ], "model_target_label": "appleplist", "target_label": "appleplist", "correct_labels": [ "appleplist" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "arj": { "name": "arj", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "asm": { "name": "asm", "extensions": [ "S", "asm" ], "mime_type": "text/x-asm", "group": "code", "magic": "assembler source", "description": "Assembly", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "asm", "target_label": "asm", "correct_labels": [ "asm" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "asp": { "name": "asp", "extensions": [ "aspx", "asp" ], "mime_type": "text/html", "group": "code", "magic": "HTML document", "description": "ASP source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "asp", "target_label": "asp", "correct_labels": [ "asp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "avi": { "name": "avi", "extensions": [ "avi" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ax": { "name": "ax", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "batch": { "name": "batch", "extensions": [ "bat" ], "mime_type": "text/x-msdos-batch", "group": "code", "magic": "DOS batch file", "description": "DOS batch file", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "batch", "target_label": "batch", "correct_labels": [ "batch" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "bcad": { "name": "bcad", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "bib": { "name": "bib", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "bmp": { "name": "bmp", "extensions": [ "bmp" ], "mime_type": "image/bmp", "group": "image", "magic": "PC bitmap", "description": "BMP image data", "vt_type": "bmp", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "bmp", "target_label": "bmp", "correct_labels": [ "bmp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "bpl": { "name": "bpl", "extensions": [ "bpl" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "brainfuck": { "name": "brainfuck", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "bzip": { "name": "bzip", "extensions": [ "bz2", "tbz2", "tar.bz2" ], "mime_type": "application/x-bzip2", "group": "archive", "magic": "bzip2 compressed data", "description": "bzip2 compressed data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "bzip", "target_label": "bzip", "correct_labels": [ "bzip" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "c": { "name": "c", "extensions": [ "c", "cpp", "h", "hpp", "cc" ], "mime_type": "text/x-c", "group": "code", "magic": "C source", "description": "C source", "vt_type": "c,cpp", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "c", "target_label": "c", "correct_labels": [ "c", "cpp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cab": { "name": "cab", "extensions": [ "cab" ], "mime_type": "application/vnd.ms-cab-compressed", "group": "archive", "magic": "Microsoft Cabinet archive data", "description": "Microsoft Cabinet archive data", "vt_type": "cab", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "cab", "target_label": "cab", "correct_labels": [ "cab" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cad": { "name": "cad", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "cat": { "name": "cat", "extensions": [ "cat" ], "mime_type": "application/octet-stream", "group": "application", "magic": "data", "description": "Windows Catalog file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "cat", "target_label": "cat", "correct_labels": [ "cat", "ctl" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cdf": { "name": "cdf", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "chm": { "name": "chm", "extensions": [ "chm" ], "mime_type": "application/chm", "group": "application", "magic": "MS Windows HtmlHelp Data", "description": "MS Windows HtmlHelp Data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "chm", "target_label": "chm", "correct_labels": [ "chm" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "clojure": { "name": "clojure", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "cmake": { "name": "cmake", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "cobol": { "name": "cobol", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "coff": { "name": "coff", "extensions": [], "mime_type": "application/x-coff", "group": "executable", "magic": "Intel 80386 COFF", "description": "Intel 80386 COFF", "vt_type": "coff", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary" ], "model_target_label": "coff", "target_label": "coff", "correct_labels": [ "coff", "exp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "coffee": { "name": "coffee", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "com": { "name": "com", "extensions": [], "mime_type": "application/x-dosexec", "group": null, "magic": null, "description": null, "vt_type": "com", "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "cpl": { "name": "cpl", "extensions": [ "cpl" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE32 executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "cpl" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cpp": { "name": "cpp", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "crx": { "name": "crx", "extensions": [ "crx" ], "mime_type": "application/x-chrome-extension", "group": "executable", "magic": "Google Chrome extension", "description": "Google Chrome extension", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "crx", "target_label": "crx", "correct_labels": [ "crx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "cs": { "name": "cs", "extensions": [ "cs" ], "mime_type": "text/plain", "group": "code", "magic": "ASCII text", "description": "C# source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "cs", "target_label": "cs", "correct_labels": [ "cs" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "css": { "name": "css", "extensions": [ "css" ], "mime_type": "text/css", "group": "code", "magic": "ASCII text", "description": "CSS source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "css", "target_label": "css", "correct_labels": [ "css" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "csv": { "name": "csv", "extensions": [ "csv" ], "mime_type": "text/csv", "group": "code", "magic": "CSV text", "description": "CSV document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "csv", "target_label": "csv", "correct_labels": [ "csv" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ctl": { "name": "ctl", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "dart": { "name": "dart", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "deb": { "name": "deb", "extensions": [ "deb" ], "mime_type": "application/vnd.debian.binary-package", "group": "archive", "magic": "Debian binary package", "description": "Debian binary package", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "deb", "target_label": "deb", "correct_labels": [ "deb" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dex": { "name": "dex", "extensions": [ "dex" ], "mime_type": "application/x-android-dex", "group": "executable", "magic": "Dalvik dex file", "description": "Dalvik dex file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "dex", "target_label": "dex", "correct_labels": [ "dex" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dey": { "name": "dey", "extensions": [], "mime_type": "application/x-android-dey", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "diff": { "name": "diff", "extensions": [ "diff" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "directory": { "name": "directory", "extensions": [], "mime_type": "inode/directory", "group": "inode", "magic": "directory", "description": "A directory", "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": "directory", "correct_labels": [ "directory" ], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "dll": { "name": "dll", "extensions": [ "dll" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": "pedll", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "dll" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dm": { "name": "dm", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "dmg": { "name": "dmg", "extensions": [ "dmg" ], "mime_type": "application/x-apple-diskimage", "group": "archive", "magic": "Apple disk image", "description": "Apple disk image", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "dmg", "target_label": "dmg", "correct_labels": [ "dmg" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "doc": { "name": "doc", "extensions": [ "doc" ], "mime_type": "application/msword", "group": "document", "magic": "Composite Document File", "description": "Microsoft Word CDF document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "cdf", "target_label": "doc", "correct_labels": [ "doc" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dockerfile": { "name": "dockerfile", "extensions": [ "=Dockerfile" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "docx": { "name": "docx", "extensions": [ "docx", "docm" ], "mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "group": "document", "magic": "Microsoft Word 2007+", "description": "Microsoft Word 2007+ document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "ooxml", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "docx", "correct_labels": [ "docx", "tmdx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "dosmbr": { "name": "dosmbr", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "dylib": { "name": "dylib", "extensions": [ "dylib" ], "mime_type": "application/x-mach-o", "group": "executable", "magic": "Mach-O executable", "description": "Mach-O executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "macho" ], "model_target_label": "macho", "target_label": "macho", "correct_labels": [ "macho", "dylib" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "elf": { "name": "elf", "extensions": [ "elf", "so" ], "mime_type": "application/x-executable-elf", "group": "executable", "magic": "ELF executable", "description": "ELF executable", "vt_type": "elf", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary", "elf" ], "model_target_label": "elf", "target_label": "elf", "correct_labels": [ "elf", "so" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "elixir": { "name": "elixir", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "emf": { "name": "emf", "extensions": [ "emf" ], "mime_type": "application/octet-stream", "group": "application", "magic": "Windows Enhanced Metafile", "description": "Windows Enhanced Metafile image data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "emf", "target_label": "emf", "correct_labels": [ "emf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "eml": { "name": "eml", "extensions": [ "eml" ], "mime_type": "message/rfc822", "group": "text", "magic": "RFC 822 mail", "description": "RFC 822 mail", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "eml", "target_label": "eml", "correct_labels": [ "eml" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "empty": { "name": "empty", "extensions": [], "mime_type": "inode/x-empty", "group": "inode", "magic": "empty", "description": "Empty file", "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": "empty", "correct_labels": [ "empty" ], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "epub": { "name": "epub", "extensions": [ "epub" ], "mime_type": "application/epub+zip", "group": "document", "magic": "EPUB document", "description": "EPUB document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "epub", "correct_labels": [ "epub" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "erlang": { "name": "erlang", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ese": { "name": "ese", "extensions": [], "mime_type": "application/x-ms-ese", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "exe": { "name": "exe", "extensions": [ "exe" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "ELF executable", "description": "ELF executable", "vt_type": "peexe", "datasets": [ "vt-ext", "vt-ext-malicious" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "exe" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "exp": { "name": "exp", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "flac": { "name": "flac", "extensions": [ "flac" ], "mime_type": "audio/flac", "group": "audio", "magic": "FLAC audio bitstream data", "description": "FLAC audio bitstream data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "flac", "target_label": "flac", "correct_labels": [ "flac" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "fortran": { "name": "fortran", "extensions": [ "f90", "f95", "f03" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "fpx": { "name": "fpx", "extensions": [ "fpx" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": "fpx", "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "gif": { "name": "gif", "extensions": [ "gif" ], "mime_type": "image/gif", "group": "image", "magic": "GIF image data", "description": "GIF image data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "gif", "target_label": "gif", "correct_labels": [ "gif" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "go": { "name": "go", "extensions": [ "go" ], "mime_type": "text/x-golang", "group": "code", "magic": "ASCII text", "description": "Golang source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "go", "target_label": "go", "correct_labels": [ "go" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "gpx": { "name": "gpx", "extensions": [ "gpx" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "groovy": { "name": "groovy", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "gzip": { "name": "gzip", "extensions": [ "gz", "gzip", "tgz", "tar.gz" ], "mime_type": "application/gzip", "group": "archive", "magic": "gzip compressed data", "description": "gzip compressed data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "gzip", "target_label": "gzip", "correct_labels": [ "gzip" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "haskell": { "name": "haskell", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "hfs": { "name": "hfs", "extensions": [ "hfs" ], "mime_type": "application/x-hfs", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "hlp": { "name": "hlp", "extensions": [ "hlp" ], "mime_type": "application/winhlp", "group": "application", "magic": "MS Windows help", "description": "MS Windows help", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "hlp", "target_label": "hlp", "correct_labels": [ "hlp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "hta": { "name": "hta", "extensions": [ "hta" ], "mime_type": "application/hta", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "html": { "name": "html", "extensions": [ "html", "htm", "xhtml", "xht" ], "mime_type": "text/html", "group": "code", "magic": "HTML document", "description": "HTML document", "vt_type": "html", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "html", "target_label": "html", "correct_labels": [ "html" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "hve": { "name": "hve", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ico": { "name": "ico", "extensions": [ "ico" ], "mime_type": "image/vnd.microsoft.icon", "group": "image", "magic": "MS Windows icon resource", "description": "MS Windows icon resource", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "ico", "target_label": "ico", "correct_labels": [ "ico" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "img": { "name": "img", "extensions": [ "img" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ini": { "name": "ini", "extensions": [ "ini" ], "mime_type": "text/plain", "group": "text", "magic": "Generic INItialization configuration", "description": "INI configuration file", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "ini", "target_label": "ini", "correct_labels": [ "ini" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "internetshortcut": { "name": "internetshortcut", "extensions": [ "url" ], "mime_type": "application/x-mswinurl", "group": "application", "magic": "MS Windows 95 Internet shortcut", "description": "MS Windows Internet shortcut", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "internetshortcut", "target_label": "internetshortcut", "correct_labels": [ "internetshortcut" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "iosapp": { "name": "iosapp", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "iso": { "name": "iso", "extensions": [ "iso" ], "mime_type": "application/x-iso9660-image", "group": "archive", "magic": "ISO 9660 CD-ROM filesystem data", "description": "ISO 9660 CD-ROM filesystem data", "vt_type": "isoimage", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "iso", "target_label": "iso", "correct_labels": [ "iso", "udf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "jar": { "name": "jar", "extensions": [ "jar" ], "mime_type": "application/java-archive", "group": "archive", "magic": "Java archive data (JAR)", "description": "Java archive data (JAR)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "jar", "target_label": "jar", "correct_labels": [ "jar" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "java": { "name": "java", "extensions": [ "java" ], "mime_type": "text/x-java", "group": "code", "magic": "Java source", "description": "Java source", "vt_type": "java", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "java", "target_label": "java", "correct_labels": [ "java" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "javabytecode": { "name": "javabytecode", "extensions": [ "class" ], "mime_type": "application/x-java-applet", "group": "executable", "magic": "compiled Java class data", "description": "Java compiled bytecode", "vt_type": "class", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "javabytecode", "target_label": "javabytecode", "correct_labels": [ "javabytecode" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "javascript": { "name": "javascript", "extensions": [ "js" ], "mime_type": "application/javascript", "group": "code", "magic": "Node.js script text executable", "description": "Javascript source", "vt_type": "javascript", "datasets": [ "github", "vt-ext", "vt-ext-malicious" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "javascript", "target_label": "javascript", "correct_labels": [ "javascript", "typescript" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "jpeg": { "name": "jpeg", "extensions": [ "jpg", "jpeg" ], "mime_type": "image/jpeg", "group": "image", "magic": "JPEG image data", "description": "JPEG image data", "vt_type": "jpeg", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "jpeg", "target_label": "jpeg", "correct_labels": [ "jpeg" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "json": { "name": "json", "extensions": [ "json" ], "mime_type": "application/json", "group": "code", "magic": "JSON data", "description": "JSON document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "json", "target_label": "json", "correct_labels": [ "json" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "julia": { "name": "julia", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ko": { "name": "ko", "extensions": [ "ko" ], "mime_type": "application/x-executable-elf", "group": "executable", "magic": "ELF executable", "description": "ELF executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "elf" ], "model_target_label": "elf", "target_label": "elf", "correct_labels": [ "elf", "ko" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "kotlin": { "name": "kotlin", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "latex": { "name": "latex", "extensions": [ "tex" ], "mime_type": "text/x-tex", "group": "text", "magic": "LaTeX document", "description": "LaTeX document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "latex", "target_label": "latex", "correct_labels": [ "latex" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "lisp": { "name": "lisp", "extensions": [ "lisp" ], "mime_type": "text/x-lisp", "group": "code", "magic": "Lisp/Scheme program", "description": "Lisp source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "lisp", "target_label": "lisp", "correct_labels": [ "lisp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "lnk": { "name": "lnk", "extensions": [ "lnk" ], "mime_type": "application/x-ms-shortcut", "group": "application", "magic": "MS Windows shortcut", "description": "MS Windows shortcut", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "lnk", "target_label": "lnk", "correct_labels": [ "lnk" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "lua": { "name": "lua", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "m3u": { "name": "m3u", "extensions": [ "m3u8", "m3u" ], "mime_type": "text/plain", "group": "application", "magic": "M3U playlist", "description": "M3U playlist", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "m3u", "target_label": "m3u", "correct_labels": [ "m3u" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "macho": { "name": "macho", "extensions": [], "mime_type": "application/x-mach-o", "group": "executable", "magic": "Mach-O executable", "description": "Mach-O executable", "vt_type": "macho", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary", "macho" ], "model_target_label": "macho", "target_label": "macho", "correct_labels": [ "macho", "dylib" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "maff": { "name": "maff", "extensions": [ "maff" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "makefile": { "name": "makefile", "extensions": [ "=Makefile" ], "mime_type": "text/x-makefile", "group": "code", "magic": "makefile script", "description": "Makefile source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "makefile", "target_label": "makefile", "correct_labels": [ "makefile" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "markdown": { "name": "markdown", "extensions": [ "md" ], "mime_type": "text/markdown", "group": "text", "magic": "ASCII text", "description": "Markdown document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "markdown", "target_label": "markdown", "correct_labels": [ "markdown" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "matlab": { "name": "matlab", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "mht": { "name": "mht", "extensions": [ "mht" ], "mime_type": "application/x-mimearchive", "group": "code", "magic": "HTML document", "description": "MHTML document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "mht", "target_label": "mht", "correct_labels": [ "mht" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mkv": { "name": "mkv", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "mov": { "name": "mov", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "mp3": { "name": "mp3", "extensions": [ "mp3" ], "mime_type": "audio/mpeg", "group": "audio", "magic": "Audio file with ID3", "description": "MP3 media file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "mp3", "target_label": "mp3", "correct_labels": [ "mp3" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mp4": { "name": "mp4", "extensions": [ "mp4" ], "mime_type": "video/mp4", "group": "video", "magic": "ISO Media", "description": "MP4 medial", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "mp4", "target_label": "mp4", "correct_labels": [ "mp4" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mscompress": { "name": "mscompress", "extensions": [], "mime_type": "application/x-ms-compress-szdd", "group": "archive", "magic": "MS Compress archive data", "description": "MS Compress archive data", "vt_type": "mscompress", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary" ], "model_target_label": "mscompress", "target_label": "mscompress", "correct_labels": [ "mscompress" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "msi": { "name": "msi", "extensions": [ "msi" ], "mime_type": "application/x-msi", "group": "archive", "magic": "Composite Document File", "description": "Microsoft Installer file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "msi", "target_label": "msi", "correct_labels": [ "msi" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mst": { "name": "mst", "extensions": [ "mst" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "msvisio": { "name": "msvisio", "extensions": [], "mime_type": "application/vnd.ms-visio.drawing.main+xml", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "mui": { "name": "mui", "extensions": [ "mui" ], "mime_type": "application/x-dosexec", "group": "application", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "mui" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mum": { "name": "mum", "extensions": [ "mum" ], "mime_type": "text/xml", "group": "application", "magic": "XML document", "description": "Windows Update Package file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "mum", "target_label": "mum", "correct_labels": [ "mum" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "mun": { "name": "mun", "extensions": [ "mun" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "nim": { "name": "nim", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "null": { "name": "null", "extensions": [ "null" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "object": { "name": "object", "extensions": [ "o" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "objectivec": { "name": "objectivec", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ocaml": { "name": "ocaml", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ocx": { "name": "ocx", "extensions": [ "ocx" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "ax", "ocx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "odex": { "name": "odex", "extensions": [ "odex" ], "mime_type": "application/x-executable-elf", "group": "executable", "magic": "ELF executable", "description": "ODEX ELF executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "elf" ], "model_target_label": "odex", "target_label": "odex", "correct_labels": [ "odex", "elf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "odp": { "name": "odp", "extensions": [ "odp" ], "mime_type": "application/vnd.oasis.opendocument.presentation", "group": "document", "magic": "OpenDocument Presentation", "description": "OpenDocument Presentation", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive" ], "model_target_label": "odp", "target_label": "odp", "correct_labels": [ "odp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ods": { "name": "ods", "extensions": [ "ods" ], "mime_type": "application/vnd.oasis.opendocument.spreadsheet", "group": "document", "magic": "OpenDocument Spreadsheet", "description": "OpenDocument Spreadsheet", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive" ], "model_target_label": "ods", "target_label": "ods", "correct_labels": [ "ods" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "odt": { "name": "odt", "extensions": [ "odt" ], "mime_type": "application/vnd.oasis.opendocument.text", "group": "document", "magic": "OpenDocument Text", "description": "OpenDocument Text", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive" ], "model_target_label": "odt", "target_label": "odt", "correct_labels": [ "odt" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ogg": { "name": "ogg", "extensions": [ "ogg" ], "mime_type": "audio/ogg", "group": "audio", "magic": "Ogg data", "description": "Ogg data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "ogg", "target_label": "ogg", "correct_labels": [ "ogg" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ole": { "name": "ole", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ooxml": { "name": "ooxml", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "outlook": { "name": "outlook", "extensions": [], "mime_type": "application/vnd.ms-outlook", "group": "application", "magic": "CDFV2 Microsoft Outlook Message", "description": "MS Outlook Message", "vt_type": "outlook", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "cdf", "target_label": "outlook", "correct_labels": [ "outlook" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "palmos": { "name": "palmos", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": "palmos", "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "pascal": { "name": "pascal", "extensions": [ "pascal" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "pbm": { "name": "pbm", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "pcap": { "name": "pcap", "extensions": [ "pcap", "pcapng" ], "mime_type": "application/vnd.tcpdump.pcap", "group": "application", "magic": "pcap capture file", "description": "pcap capture file", "vt_type": "pcap", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "pcap", "target_label": "pcap", "correct_labels": [ "pcap" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pdf": { "name": "pdf", "extensions": [ "pdf" ], "mime_type": "application/pdf", "group": "document", "magic": "PDF document", "description": "PDF document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "pdf", "target_label": "pdf", "correct_labels": [ "pdf", "ai" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pebin": { "name": "pebin", "extensions": [ "exe", "dll", "sys" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE executable", "description": "PE executable", "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "pem": { "name": "pem", "extensions": [ "pem" ], "mime_type": "application/x-pem-file", "group": "application", "magic": "PEM certificate", "description": "PEM certificate", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text" ], "model_target_label": "pem", "target_label": "pem", "correct_labels": [ "pem", "pgpkey" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "perl": { "name": "perl", "extensions": [ "pl" ], "mime_type": "text/x-perl", "group": "code", "magic": "Perl script text executable", "description": "Perl source", "vt_type": "perl", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "perl", "target_label": "perl", "correct_labels": [ "perl" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pgpkey": { "name": "pgpkey", "extensions": [], "mime_type": "application/pgp-keys", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "php": { "name": "php", "extensions": [ "php" ], "mime_type": "text/x-php", "group": "code", "magic": "PHP script", "description": "PHP source", "vt_type": "php", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "php", "target_label": "php", "correct_labels": [ "php" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "png": { "name": "png", "extensions": [ "png" ], "mime_type": "image/png", "group": "image", "magic": "PNG image data", "description": "PNG image data", "vt_type": "png", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "png", "target_label": "png", "correct_labels": [ "png" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "postscript": { "name": "postscript", "extensions": [ "ps" ], "mime_type": "application/postscript", "group": "document", "magic": "PostScript document text", "description": "PostScript document", "vt_type": "postscript", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "postscript", "target_label": "postscript", "correct_labels": [ "postscript" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "powershell": { "name": "powershell", "extensions": [ "ps1" ], "mime_type": "application/x-powershell", "group": "code", "magic": "a powershell script", "description": "Powershell source", "vt_type": "ps", "datasets": [ "github", "vt-ext", "vt-ext-malicious" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "powershell", "target_label": "powershell", "correct_labels": [ "powershell" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ppt": { "name": "ppt", "extensions": [ "ppt" ], "mime_type": "application/vnd.ms-powerpoint", "group": "document", "magic": "Composite Document File", "description": "Microsoft PowerPoint CDF document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "cdf", "target_label": "ppt", "correct_labels": [ "ppt" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pptx": { "name": "pptx", "extensions": [ "pptx", "pptm" ], "mime_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "group": "document", "magic": "Microsoft PowerPoint 2007+", "description": "Microsoft PowerPoint 2007+ document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "ooxml", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "pptx", "correct_labels": [ "pptx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "printfox": { "name": "printfox", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "prolog": { "name": "prolog", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "pub": { "name": "pub", "extensions": [ "pub" ], "mime_type": "application/x-mspublisher", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "python": { "name": "python", "extensions": [ "py" ], "mime_type": "text/x-python", "group": "code", "magic": "Python script", "description": "Python source", "vt_type": "python", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "python", "target_label": "python", "correct_labels": [ "python" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pythonbytecode": { "name": "pythonbytecode", "extensions": [ "pyc", "pyo" ], "mime_type": "application/x-bytecode.python", "group": "executable", "magic": "python byte-compiled", "description": "Python compiled bytecode", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "pythonbytecode", "target_label": "pythonbytecode", "correct_labels": [ "pythonbytecode" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "pythonpar": { "name": "pythonpar", "extensions": [ "par" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "r": { "name": "r", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "randombytes": { "name": "randombytes", "extensions": [], "mime_type": "application/octet-stream", "group": "unknown", "magic": "data", "description": "Random bytes", "vt_type": null, "datasets": [ "synthetic" ], "parent": null, "tags": [], "model_target_label": "unknown", "target_label": "unknown", "correct_labels": [ "unknown" ], "in_scope_for_output_content_type": false, "in_scope_for_training": true }, "rar": { "name": "rar", "extensions": [ "rar" ], "mime_type": "application/x-rar", "group": "archive", "magic": "RAR archive data", "description": "RAR archive data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "rar", "target_label": "rar", "correct_labels": [ "rar" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rdf": { "name": "rdf", "extensions": [ "rdf" ], "mime_type": "application/rdf+xml", "group": "text", "magic": "XML document", "description": "Resource Description Framework document (RDF)", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text" ], "model_target_label": "rdf", "target_label": "rdf", "correct_labels": [ "rdf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rll": { "name": "rll", "extensions": [ "rll" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "rpm": { "name": "rpm", "extensions": [ "rpm" ], "mime_type": "application/x-rpm", "group": "archive", "magic": "RPM", "description": "RedHat Package Manager archive (RPM)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "rpm", "target_label": "rpm", "correct_labels": [ "rpm" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rst": { "name": "rst", "extensions": [ "rst" ], "mime_type": "text/x-rst", "group": "text", "magic": "ReStructuredText file", "description": "ReStructuredText document", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "rst", "target_label": "rst", "correct_labels": [ "rst" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rtf": { "name": "rtf", "extensions": [ "rtf" ], "mime_type": "text/rtf", "group": "text", "magic": "Rich Text Format data", "description": "Rich Text Format document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text" ], "model_target_label": "rtf", "target_label": "rtf", "correct_labels": [ "rtf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "ruby": { "name": "ruby", "extensions": [ "rb" ], "mime_type": "application/x-ruby", "group": "code", "magic": "Ruby script", "description": "Ruby source", "vt_type": "ruby", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "ruby", "target_label": "ruby", "correct_labels": [ "ruby" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "rust": { "name": "rust", "extensions": [ "rs" ], "mime_type": "application/x-rust", "group": "code", "magic": "ASCII text", "description": "Rust source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "rust", "target_label": "rust", "correct_labels": [ "rust" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "s": { "name": "s", "extensions": [ "s" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "scala": { "name": "scala", "extensions": [ "scala" ], "mime_type": "application/x-scala", "group": "code", "magic": "ASCII text", "description": "Scala source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "scala", "target_label": "scala", "correct_labels": [ "scala" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "scr": { "name": "scr", "extensions": [ "scr" ], "mime_type": "application/x-dosexec", "group": "executable", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "scr" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "scriptwsf": { "name": "scriptwsf", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "sevenzip": { "name": "sevenzip", "extensions": [ "7z" ], "mime_type": "application/x-7z-compressed", "group": "archive", "magic": "7-zip archive data", "description": "7-zip archive data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "sevenzip", "target_label": "sevenzip", "correct_labels": [ "sevenzip" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "sgml": { "name": "sgml", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "sh3d": { "name": "sh3d", "extensions": [ "sh3d" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "shell": { "name": "shell", "extensions": [ "sh" ], "mime_type": "text/x-shellscript", "group": "code", "magic": "shell script", "description": "Shell script", "vt_type": "shell", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "shell", "target_label": "shell", "correct_labels": [ "shell" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "smali": { "name": "smali", "extensions": [ "smali" ], "mime_type": "application/x-smali", "group": "code", "magic": "ASCII text", "description": "Smali source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text" ], "model_target_label": "smali", "target_label": "smali", "correct_labels": [ "smali" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "so": { "name": "so", "extensions": [ "so" ], "mime_type": "application/x-executable-elf", "group": "executable", "magic": "ELF executable", "description": "ELF executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "elf" ], "model_target_label": "elf", "target_label": "elf", "correct_labels": [ "elf", "so" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "sql": { "name": "sql", "extensions": [ "sql" ], "mime_type": "application/x-sql", "group": "code", "magic": "ASCII text", "description": "SQL source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "sql", "target_label": "sql", "correct_labels": [ "sql" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "squashfs": { "name": "squashfs", "extensions": [], "mime_type": "application/octet-stream", "group": "archive", "magic": "Squashfs filesystem", "description": "Squash filesystem", "vt_type": "squashfs", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary" ], "model_target_label": "squashfs", "target_label": "squashfs", "correct_labels": [ "squashfs" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "svd": { "name": "svd", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "svg": { "name": "svg", "extensions": [ "svg" ], "mime_type": "image/svg+xml", "group": "image", "magic": "SVG Scalable Vector Graphics image", "description": "SVG Scalable Vector Graphics image data", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text" ], "model_target_label": "svg", "target_label": "svg", "correct_labels": [ "svg" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "swf": { "name": "swf", "extensions": [ "swf" ], "mime_type": "application/x-shockwave-flash", "group": "executable", "magic": "Macromedia Flash data", "description": "Macromedia Flash data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "swf", "target_label": "swf", "correct_labels": [ "swf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "swift": { "name": "swift", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "symlinktext": { "name": "symlinktext", "extensions": [], "mime_type": "text/plain", "group": "application", "magic": "ASCII text", "description": "Symbolic link (textual representation)", "vt_type": null, "datasets": [ "synthetic" ], "parent": null, "tags": [ "text" ], "model_target_label": "symlinktext", "target_label": "symlinktext", "correct_labels": [ "symlinktext", "txt" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "symlink": { "name": "symlink", "extensions": [], "mime_type": "inode/symlink", "group": "inode", "magic": "symbolic link to ", "description": "Symbolic link to ", "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": "symlink", "correct_labels": [ "symlink" ], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "sys": { "name": "sys", "extensions": [ "sys" ], "mime_type": "application/x-windows-driver", "group": "executable", "magic": "PE Windows executable", "description": "PE Windows executable", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "pebin" ], "model_target_label": "pebin", "target_label": "pebin", "correct_labels": [ "pebin", "sys" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "tar": { "name": "tar", "extensions": [ "tar" ], "mime_type": "application/x-tar", "group": "archive", "magic": "POSIX tar archive", "description": "POSIX tar archive", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "tar", "target_label": "tar", "correct_labels": [ "tar" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "tga": { "name": "tga", "extensions": [ "tga" ], "mime_type": "image/x-tga", "group": "image", "magic": "Targa image data", "description": "Targa image data", "vt_type": "targa", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "tga", "target_label": "tga", "correct_labels": [ "tga" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "tiff": { "name": "tiff", "extensions": [ "tiff", "tif" ], "mime_type": "image/tiff", "group": "image", "magic": "TIFF image data", "description": "TIFF image data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "tiff", "target_label": "tiff", "correct_labels": [ "tiff" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "tmdx": { "name": "tmdx", "extensions": [ "tmdx", "tmvx" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "toml": { "name": "toml", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "torrent": { "name": "torrent", "extensions": [ "torrent" ], "mime_type": "application/x-bittorrent", "group": "application", "magic": "BitTorrent file", "description": "BitTorrent file", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "torrent", "target_label": "torrent", "correct_labels": [ "torrent" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "troff": { "name": "troff", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "ttf": { "name": "ttf", "extensions": [ "ttf" ], "mime_type": "font/sfnt", "group": "font", "magic": "TrueType Font data", "description": "TrueType Font data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "ttf", "target_label": "ttf", "correct_labels": [ "ttf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "txt": { "name": "txt", "extensions": [ "txt" ], "mime_type": "text/plain", "group": "text", "magic": "ASCII text", "description": "Generic text document", "vt_type": null, "datasets": [ "github", "synthetic" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "txt", "target_label": "txt", "correct_labels": [ "txt" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "typescript": { "name": "typescript", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "udf": { "name": "udf", "extensions": [], "mime_type": "application/x-udf-image", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "unixcompress": { "name": "unixcompress", "extensions": [ "z" ], "mime_type": "application/x-compress", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "unknown": { "name": "unknown", "extensions": [], "mime_type": "application/octet-stream", "group": "unknown", "magic": "data", "description": "Unknown type", "vt_type": null, "datasets": [], "parent": null, "tags": [ "binary" ], "model_target_label": "unknown", "target_label": "unknown", "correct_labels": [ "unknown" ], "in_scope_for_output_content_type": true, "in_scope_for_training": false }, "vba": { "name": "vba", "extensions": [ "vbs" ], "mime_type": "text/vbscript", "group": "code", "magic": "ASCII text", "description": "MS Visual Basic source (VBA)", "vt_type": "vba", "datasets": [ "vt-ext", "vt-ext-malicious" ], "parent": null, "tags": [ "text" ], "model_target_label": "vba", "target_label": "vba", "correct_labels": [ "vba" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "verilog": { "name": "verilog", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "vhd": { "name": "vhd", "extensions": [], "mime_type": "application/x-vhd", "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "wasm": { "name": "wasm", "extensions": [ "wasm" ], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "wav": { "name": "wav", "extensions": [ "wav" ], "mime_type": "audio/x-wav", "group": "audio", "magic": "RIFF data", "description": "Waveform Audio file (WAV)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "wav", "target_label": "wav", "correct_labels": [ "wav" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "webm": { "name": "webm", "extensions": [ "webm" ], "mime_type": "video/webm", "group": "video", "magic": "WebM", "description": "WebM data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "webm", "target_label": "webm", "correct_labels": [ "webm" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "webp": { "name": "webp", "extensions": [ "webp" ], "mime_type": "image/webp", "group": "image", "magic": "RIFF data", "description": "WebP data", "vt_type": "webp", "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "media" ], "model_target_label": "webp", "target_label": "webp", "correct_labels": [ "webp" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "winregistry": { "name": "winregistry", "extensions": [ "reg" ], "mime_type": "text/x-ms-regedit", "group": "application", "magic": "Windows Registry text", "description": "Windows Registry text", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "winregistry", "target_label": "winregistry", "correct_labels": [ "winregistry" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "wmf": { "name": "wmf", "extensions": [ "wmf" ], "mime_type": "image/wmf", "group": "image", "magic": "Windows metafile", "description": "Windows metafile", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary" ], "model_target_label": "wmf", "target_label": "wmf", "correct_labels": [ "wmf" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "woff": { "name": "woff", "extensions": [], "mime_type": null, "group": null, "magic": null, "description": null, "vt_type": null, "datasets": [], "parent": null, "tags": [], "model_target_label": null, "target_label": null, "correct_labels": [], "in_scope_for_output_content_type": false, "in_scope_for_training": false }, "xar": { "name": "xar", "extensions": [ "pkg" ], "mime_type": "application/x-xar", "group": "archive", "magic": "xar archive compressed", "description": "XAR archive compressed data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "xar", "target_label": "xar", "correct_labels": [ "xar" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xls": { "name": "xls", "extensions": [ "xls" ], "mime_type": "application/vnd.ms-excel", "group": "document", "magic": "Composite Document File", "description": "Microsoft Excel CDF document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "cdf" ], "model_target_label": "cdf", "target_label": "xls", "correct_labels": [ "xls" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xlsb": { "name": "xlsb", "extensions": [ "xlsb" ], "mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "group": "document", "magic": "Microsoft Excel 2007+", "description": "Microsoft Excel 2007+ document (binary format)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "ooxml", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "xlsb", "correct_labels": [ "xlsb", "xlsx" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xlsx": { "name": "xlsx", "extensions": [ "xlsx", "xlsm" ], "mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "group": "document", "magic": "Microsoft Excel 2007+", "description": "Microsoft Excel 2007+ document", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "ooxml", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "xlsx", "correct_labels": [ "xlsx", "xlsb" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xml": { "name": "xml", "extensions": [ "xml" ], "mime_type": "text/xml", "group": "code", "magic": "XML document", "description": "XML document", "vt_type": "xml", "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "xml", "target_label": "xml", "correct_labels": [ "xml" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xpi": { "name": "xpi", "extensions": [ "xpi" ], "mime_type": "application/zip", "group": "archive", "magic": "Zip archive data", "description": "Compressed installation archive (XPI)", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "xpi", "correct_labels": [ "xpi" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "xz": { "name": "xz", "extensions": [ "xz" ], "mime_type": "application/x-xz", "group": "archive", "magic": "XZ compressed data", "description": "XZ compressed data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "archive" ], "model_target_label": "xz", "target_label": "xz", "correct_labels": [ "xz" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "yaml": { "name": "yaml", "extensions": [ "yml", "yaml" ], "mime_type": "application/x-yaml", "group": "code", "magic": "ASCII text", "description": "YAML source", "vt_type": null, "datasets": [ "github" ], "parent": null, "tags": [ "text", "dl_target" ], "model_target_label": "yaml", "target_label": "yaml", "correct_labels": [ "yaml" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "zip": { "name": "zip", "extensions": [ "zip" ], "mime_type": "application/zip", "group": "archive", "magic": "Zip archive data", "description": "Zip archive data", "vt_type": null, "datasets": [ "vt-ext" ], "parent": null, "tags": [ "binary", "zip_archive", "archive" ], "model_target_label": "zip", "target_label": "zip", "correct_labels": [ "zip" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true }, "zlibstream": { "name": "zlibstream", "extensions": [], "mime_type": "application/zlib", "group": "application", "magic": "zlib compressed data", "description": "zlib compressed data", "vt_type": "zlib", "datasets": [ "vt-type" ], "parent": null, "tags": [ "binary" ], "model_target_label": "zlibstream", "target_label": "zlibstream", "correct_labels": [ "zlibstream" ], "in_scope_for_output_content_type": true, "in_scope_for_training": true } } ================================================ FILE: tests_data/basic/latex/sample.tex ================================================ \documentclass{article} \usepackage{todonotes} \title{Example of a LaTeX document} \begin{document} \maketitle This is just an example used for testing Magika. \end{document} ================================================ FILE: tests_data/basic/makefile/simple.Makefile ================================================ prog: prog.o gcc -o prog prog.o prog.o: prog.c lib.c gcc -c prog.c lib.c ================================================ FILE: tests_data/basic/markdown/README.md ================================================ # Magika Python Package Magika is a novel AI powered file type detection tool that rely on the recent advance of deep learning to provide accurate detection. Under the hood, Magika employs a custom, highly optimized Keras model that only weighs about 1MB, and enables precise file identification within milliseconds, even when running on a single CPU. Use Magika as a command line client or in your Python code! Please check out Magika on GitHub for more information and documentation: [https://github.com/google/magika](https://github.com/google/magika). ## Installing Magika ```shell $ pip install magika ``` If you intend to use Magika only as a command line, you may want to use `$ pipx install magika` instead. ## Using Magika as a command-line tool ```shell $ magika examples/* code.asm: Assembly (code) code.py: Python source (code) doc.docx: Microsoft Word 2007+ document (document) doc.ini: INI configuration file (text) elf64.elf: ELF executable (executable) flac.flac: FLAC audio bitstream data (audio) image.bmp: BMP image data (image) java.class: Java compiled bytecode (executable) jpg.jpg: JPEG image data (image) pdf.pdf: PDF document (document) pe32.exe: PE executable (executable) png.png: PNG image data (image) README.md: Markdown document (text) tar.tar: POSIX tar archive (archive) webm.webm: WebM data (video) ``` ```help $ magika --help Usage: magika [OPTIONS] [FILE]... Magika - Determine type of FILEs with deep-learning. Options: -r, --recursive When passing this option, magika scans every file within directories, instead of outputting "directory" --json Output in JSON format. --jsonl Output in JSONL format. -i, --mime-type Output the MIME type instead of a verbose content type description. -l, --label Output a simple label instead of a verbose content type description. Use --list-output- content-types for the list of supported output. -c, --compatibility-mode Compatibility mode: output is as close as possible to `file` and colors are disabled. -s, --output-score Output the prediction score in addition to the content type. -m, --prediction-mode [best-guess|medium-confidence|high-confidence] --batch-size INTEGER How many files to process in one batch. --no-dereference This option causes symlinks not to be followed. By default, symlinks are dereferenced. --colors / --no-colors Enable/disable use of colors. -v, --verbose Enable more verbose output. -vv, --debug Enable debug logging. --generate-report Generate report useful when reporting feedback. --version Print the version and exit. --list-output-content-types Show a list of supported content types. --model-dir DIRECTORY Use a custom model. -h, --help Show this message and exit. Magika version: "0.5.0" Default model: "standard_v1" Send any feedback to magika-dev@google.com or via GitHub issues. ``` ## Using Magika as a Python module ```python from magika import Magika magika = Magika() result = magika.identify_bytes(b"# Example\nThis is an example of markdown!") print(result.output.ct_label) # Output: "markdown" ``` ## Citation If you use this software for your research, please cite it as: ```bibtex @software{magika, author = {Fratantonio, Yanick and Bursztein, Elie and Invernizzi, Luca and Zhang, Marina and Metitieri, Giancarlo and Kurt, Thomas and Galilee, Francois and Petit-Bianco, Alexandre and Farah, Loua and Albertini, Ange}, title = {{Magika content-type scanner}}, url = {https://github.com/google/magika} } ``` ================================================ FILE: tests_data/basic/markdown/magika_test.md ================================================ # Introduction This is a test document for Magika, yay\! We are going to take this file and convert it in a number of other formats. ================================================ FILE: tests_data/basic/markdown/simple.md ================================================ # This is the greatest markdown test sample ## Introduction Test! ## Main content Lorem ipsum? ## Conclusions Nothing much to say! ================================================ FILE: tests_data/basic/mht/sample.mht ================================================ MIME-Version: 1.0 Content-Type: multipart/related; boundary="----=_NextPart_01DBCCE0.CD23D5E0" This document is a Single File Web Page, also known as a Web Archive file. If you are seeing this message, your browser or editor doesn't support Web Archive files. Please download a browser that supports Web Archive. ------=_NextPart_01DBCCE0.CD23D5E0 Content-Location: file:///C:/790962C5/sample-anonymouse.htm Content-Transfer-Encoding: base64 Content-Type: text/html; charset="unicode" //48AGgAdABtAGwAIAB4AG0AbABuAHMAOgBvAD0AIgB1AHIAbgA6AHMAYwBoAGUAbQBhAHMALQBt AGkAYwByAG8AcwBvAGYAdAAtAGMAbwBtADoAbwBmAGYAaQBjAGUAOgBvAGYAZgBpAGMAZQAiAA0A CgB4AG0AbABuAHMAOgB3AD0AIgB1AHIAbgA6AHMAYwBoAGUAbQBhAHMALQBtAGkAYwByAG8AcwBv AGYAdAAtAGMAbwBtADoAbwBmAGYAaQBjAGUAOgB3AG8AcgBkACIADQAKAHgAbQBsAG4AcwA6AG0A PQAiAGgAdAB0AHAAOgAvAC8AcwBjAGgAZQBtAGEAcwAuAG0AaQBjAHIAbwBzAG8AZgB0AC4AYwBv AG0ALwBvAGYAZgBpAGMAZQAvADIAMAAwADQALwAxADIALwBvAG0AbQBsACIADQAKAHgAbQBsAG4A cwA9ACIAaAB0AHQAcAA6AC8ALwB3AHcAdwAuAHcAMwAuAG8AcgBnAC8AVABSAC8AUgBFAEMALQBo AHQAbQBsADQAMAAiAD4ADQAKAA0ACgA8AGgAZQBhAGQAPgANAAoAPABtAGUAdABhACAAaAB0AHQA cAAtAGUAcQB1AGkAdgA9AEMAbwBuAHQAZQBuAHQALQBUAHkAcABlACAAYwBvAG4AdABlAG4AdAA9 ACIAdABlAHgAdAAvAGgAdABtAGwAOwAgAGMAaABhAHIAcwBlAHQAPQB1AG4AaQBjAG8AZABlACIA PgANAAoAPABtAGUAdABhACAAbgBhAG0AZQA9AFAAcgBvAGcASQBkACAAYwBvAG4AdABlAG4AdAA9 AFcAbwByAGQALgBEAG8AYwB1AG0AZQBuAHQAPgANAAoAPABtAGUAdABhACAAbgBhAG0AZQA9AEcA ZQBuAGUAcgBhAHQAbwByACAAYwBvAG4AdABlAG4AdAA9ACIATQBpAGMAcgBvAHMAbwBmAHQAIABX AG8AcgBkACAAMQA1ACIAPgANAAoAPABtAGUAdABhACAAbgBhAG0AZQA9AE8AcgBpAGcAaQBuAGEA dABvAHIAIABjAG8AbgB0AGUAbgB0AD0AIgBNAGkAYwByAG8AcwBvAGYAdAAgAFcAbwByAGQAIAAx ADUAIgA+AA0ACgA8AGwAaQBuAGsAIAByAGUAbAA9AEYAaQBsAGUALQBMAGkAcwB0ACAAaAByAGUA ZgA9ACIAcwBhAG0AcABsAGUALQBhAG4AbwBuAHkAbQBvAHUAcwBlAF8AZgBpAGwAZQBzAC8AZgBp AGwAZQBsAGkAcwB0AC4AeABtAGwAIgA+AA0ACgA8AGwAaQBuAGsAIAByAGUAbAA9AEUAZABpAHQA LQBUAGkAbQBlAC0ARABhAHQAYQAgAGgAcgBlAGYAPQAiAHMAYQBtAHAAbABlAC0AYQBuAG8AbgB5 AG0AbwB1AHMAZQBfAGYAaQBsAGUAcwAvAGUAZABpAHQAZABhAHQAYQAuAG0AcwBvACIAPgANAAoA PAAhAC0ALQBbAGkAZgAgAGcAdABlACAAbQBzAG8AIAA5AF0APgA8AHgAbQBsAD4ADQAKACAAPABv ADoATwBmAGYAaQBjAGUARABvAGMAdQBtAGUAbgB0AFMAZQB0AHQAaQBuAGcAcwA+AA0ACgAgACAA PABvADoAQQBsAGwAbwB3AFAATgBHAC8APgANAAoAIAA8AC8AbwA6AE8AZgBmAGkAYwBlAEQAbwBj AHUAbQBlAG4AdABTAGUAdAB0AGkAbgBnAHMAPgANAAoAPAAvAHgAbQBsAD4APAAhAFsAZQBuAGQA aQBmAF0ALQAtAD4ADQAKADwAbABpAG4AawAgAHIAZQBsAD0AdABoAGUAbQBlAEQAYQB0AGEAIABo AHIAZQBmAD0AIgBzAGEAbQBwAGwAZQAtAGEAbgBvAG4AeQBtAG8AdQBzAGUAXwBmAGkAbABlAHMA LwB0AGgAZQBtAGUAZABhAHQAYQAuAHQAaABtAHgAIgA+AA0ACgA8AGwAaQBuAGsAIAByAGUAbAA9 AGMAbwBsAG8AcgBTAGMAaABlAG0AZQBNAGEAcABwAGkAbgBnAA0ACgBoAHIAZQBmAD0AIgBzAGEA bQBwAGwAZQAtAGEAbgBvAG4AeQBtAG8AdQBzAGUAXwBmAGkAbABlAHMALwBjAG8AbABvAHIAcwBj AGgAZQBtAGUAbQBhAHAAcABpAG4AZwAuAHgAbQBsACIAPgANAAoAPAAhAC0ALQBbAGkAZgAgAGcA dABlACAAbQBzAG8AIAA5AF0APgA8AHgAbQBsAD4ADQAKACAAPAB3ADoAVwBvAHIAZABEAG8AYwB1 AG0AZQBuAHQAPgANAAoAIAAgADwAdwA6AFoAbwBvAG0APgAwADwALwB3ADoAWgBvAG8AbQA+AA0A CgAgACAAPAB3ADoARABvAGMAdQBtAGUAbgB0AEsAaQBuAGQAPgBEAG8AYwB1AG0AZQBuAHQARQBt AGEAaQBsADwALwB3ADoARABvAGMAdQBtAGUAbgB0AEsAaQBuAGQAPgANAAoAIAAgADwAdwA6AFQA cgBhAGMAawBNAG8AdgBlAHMALwA+AA0ACgAgACAAPAB3ADoAVAByAGEAYwBrAEYAbwByAG0AYQB0 AHQAaQBuAGcALwA+AA0ACgAgACAAPAB3ADoAVgBhAGwAaQBkAGEAdABlAEEAZwBhAGkAbgBzAHQA UwBjAGgAZQBtAGEAcwAvAD4ADQAKACAAIAA8AHcAOgBTAGEAdgBlAEkAZgBYAE0ATABJAG4AdgBh AGwAaQBkAD4AZgBhAGwAcwBlADwALwB3ADoAUwBhAHYAZQBJAGYAWABNAEwASQBuAHYAYQBsAGkA ZAA+AA0ACgAgACAAPAB3ADoASQBnAG4AbwByAGUATQBpAHgAZQBkAEMAbwBuAHQAZQBuAHQAPgBm AGEAbABzAGUAPAAvAHcAOgBJAGcAbgBvAHIAZQBNAGkAeABlAGQAQwBvAG4AdABlAG4AdAA+AA0A CgAgACAAPAB3ADoAQQBsAHcAYQB5AHMAUwBoAG8AdwBQAGwAYQBjAGUAaABvAGwAZABlAHIAVABl AHgAdAA+AGYAYQBsAHMAZQA8AC8AdwA6AEEAbAB3AGEAeQBzAFMAaABvAHcAUABsAGEAYwBlAGgA bwBsAGQAZQByAFQAZQB4AHQAPgANAAoAIAAgADwAdwA6AEQAbwBOAG8AdABQAHIAbwBtAG8AdABl AFEARgAvAD4ADQAKACAAIAA8AHcAOgBMAGkAZABUAGgAZQBtAGUATwB0AGgAZQByAD4ARQBOAC0A VQBTADwALwB3ADoATABpAGQAVABoAGUAbQBlAE8AdABoAGUAcgA+AA0ACgAgACAAPAB3ADoATABp AGQAVABoAGUAbQBlAEEAcwBpAGEAbgA+AFgALQBOAE8ATgBFADwALwB3ADoATABpAGQAVABoAGUA bQBlAEEAcwBpAGEAbgA+AA0ACgAgACAAPAB3ADoATABpAGQAVABoAGUAbQBlAEMAbwBtAHAAbABl AHgAUwBjAHIAaQBwAHQAPgBYAC0ATgBPAE4ARQA8AC8AdwA6AEwAaQBkAFQAaABlAG0AZQBDAG8A bQBwAGwAZQB4AFMAYwByAGkAcAB0AD4ADQAKACAAIAA8AHcAOgBDAG8AbQBwAGEAdABpAGIAaQBs AGkAdAB5AD4ADQAKACAAIAAgADwAdwA6AEQAbwBOAG8AdABFAHgAcABhAG4AZABTAGgAaQBmAHQA UgBlAHQAdQByAG4ALwA+AA0ACgAgACAAIAA8AHcAOgBCAHIAZQBhAGsAVwByAGEAcABwAGUAZABU AGEAYgBsAGUAcwAvAD4ADQAKACAAIAAgADwAdwA6AFMAbgBhAHAAVABvAEcAcgBpAGQASQBuAEMA ZQBsAGwALwA+AA0ACgAgACAAIAA8AHcAOgBXAHIAYQBwAFQAZQB4AHQAVwBpAHQAaABQAHUAbgBj AHQALwA+AA0ACgAgACAAIAA8AHcAOgBVAHMAZQBBAHMAaQBhAG4AQgByAGUAYQBrAFIAdQBsAGUA cwAvAD4ADQAKACAAIAAgADwAdwA6AEQAbwBuAHQARwByAG8AdwBBAHUAdABvAGYAaQB0AC8APgAN AAoAIAAgACAAPAB3ADoAUwBwAGwAaQB0AFAAZwBCAHIAZQBhAGsAQQBuAGQAUABhAHIAYQBNAGEA cgBrAC8APgANAAoAIAAgACAAPAB3ADoARQBuAGEAYgBsAGUATwBwAGUAbgBUAHkAcABlAEsAZQBy AG4AaQBuAGcALwA+AA0ACgAgACAAIAA8AHcAOgBEAG8AbgB0AEYAbABpAHAATQBpAHIAcgBvAHIA SQBuAGQAZQBuAHQAcwAvAD4ADQAKACAAIAAgADwAdwA6AE8AdgBlAHIAcgBpAGQAZQBUAGEAYgBs AGUAUwB0AHkAbABlAEgAcABzAC8APgANAAoAIAAgADwALwB3ADoAQwBvAG0AcABhAHQAaQBiAGkA bABpAHQAeQA+AA0ACgAgACAAPABtADoAbQBhAHQAaABQAHIAPgANAAoAIAAgACAAPABtADoAbQBh AHQAaABGAG8AbgB0ACAAbQA6AHYAYQBsAD0AIgBDAGEAbQBiAHIAaQBhACAATQBhAHQAaAAiAC8A PgANAAoAIAAgACAAPABtADoAYgByAGsAQgBpAG4AIABtADoAdgBhAGwAPQAiAGIAZQBmAG8AcgBl ACIALwA+AA0ACgAgACAAIAA8AG0AOgBiAHIAawBCAGkAbgBTAHUAYgAgAG0AOgB2AGEAbAA9ACIA JgAjADQANQA7AC0AIgAvAD4ADQAKACAAIAAgADwAbQA6AHMAbQBhAGwAbABGAHIAYQBjACAAbQA6 AHYAYQBsAD0AIgBvAGYAZgAiAC8APgANAAoAIAAgACAAPABtADoAZABpAHMAcABEAGUAZgAvAD4A DQAKACAAIAAgADwAbQA6AGwATQBhAHIAZwBpAG4AIABtADoAdgBhAGwAPQAiADAAIgAvAD4ADQAK ACAAIAAgADwAbQA6AHIATQBhAHIAZwBpAG4AIABtADoAdgBhAGwAPQAiADAAIgAvAD4ADQAKACAA IAAgADwAbQA6AGQAZQBmAEoAYwAgAG0AOgB2AGEAbAA9ACIAYwBlAG4AdABlAHIARwByAG8AdQBw ACIALwA+AA0ACgAgACAAIAA8AG0AOgB3AHIAYQBwAEkAbgBkAGUAbgB0ACAAbQA6AHYAYQBsAD0A IgAxADQANAAwACIALwA+AA0ACgAgACAAIAA8AG0AOgBpAG4AdABMAGkAbQAgAG0AOgB2AGEAbAA9 ACIAcwB1AGIAUwB1AHAAIgAvAD4ADQAKACAAIAAgADwAbQA6AG4AYQByAHkATABpAG0AIABtADoA dgBhAGwAPQAiAHUAbgBkAE8AdgByACIALwA+AA0ACgAgACAAPAAvAG0AOgBtAGEAdABoAFAAcgA+ ADwALwB3ADoAVwBvAHIAZABEAG8AYwB1AG0AZQBuAHQAPgANAAoAPAAvAHgAbQBsAD4APAAhAFsA ZQBuAGQAaQBmAF0ALQAtAD4APAAhAC0ALQBbAGkAZgAgAGcAdABlACAAbQBzAG8AIAA5AF0APgA8 AHgAbQBsAD4ADQAKACAAPAB3ADoATABhAHQAZQBuAHQAUwB0AHkAbABlAHMAIABEAGUAZgBMAG8A YwBrAGUAZABTAHQAYQB0AGUAPQAiAGYAYQBsAHMAZQAiACAARABlAGYAVQBuAGgAaQBkAGUAVwBo AGUAbgBVAHMAZQBkAD0AIgBmAGEAbABzAGUAIgANAAoAIAAgAEQAZQBmAFMAZQBtAGkASABpAGQA ZABlAG4APQAiAGYAYQBsAHMAZQAiACAARABlAGYAUQBGAG8AcgBtAGEAdAA9ACIAZgBhAGwAcwBl ACIAIABEAGUAZgBQAHIAaQBvAHIAaQB0AHkAPQAiADkAOQAiAA0ACgAgACAATABhAHQAZQBuAHQA UwB0AHkAbABlAEMAbwB1AG4AdAA9ACIAMwA3ADYAIgA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4 AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIA aQB0AHkAPQAiADAAIgAgAFEARgBvAHIAbQBhAHQAPQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAi AE4AbwByAG0AYQBsACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADkAIgAg AFEARgBvAHIAbQBhAHQAPQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAiAGgAZQBhAGQAaQBuAGcA IAAxACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBr AGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADkAIgAgAFMAZQBtAGkA SABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgANAAoAIAAgACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBV AHMAZQBkAD0AIgB0AHIAdQBlACIAIABRAEYAbwByAG0AYQB0AD0AIgB0AHIAdQBlACIAIABOAGEA bQBlAD0AIgBoAGUAYQBkAGkAbgBnACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABj AGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkA dAB5AD0AIgA5ACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIADQAKACAAIAAg AFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiACAAUQBGAG8AcgBtAGEA dAA9ACIAdAByAHUAZQAiACAATgBhAG0AZQA9ACIAaABlAGEAZABpAG4AZwAgADMAIgAvAD4ADQAK ACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEA bABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIAOQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9 ACIAdAByAHUAZQAiAA0ACgAgACAAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQA cgB1AGUAIgAgAFEARgBvAHIAbQBhAHQAPQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAiAGgAZQBh AGQAaQBuAGcAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADkAIgAg AFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgANAAoAIAAgACAAVQBuAGgAaQBkAGUA VwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIAIABRAEYAbwByAG0AYQB0AD0AIgB0AHIAdQBl ACIAIABOAGEAbQBlAD0AIgBoAGUAYQBkAGkAbgBnACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwA cwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUABy AGkAbwByAGkAdAB5AD0AIgA5ACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIA DQAKACAAIAAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiACAAUQBG AG8AcgBtAGEAdAA9ACIAdAByAHUAZQAiACAATgBhAG0AZQA9ACIAaABlAGEAZABpAG4AZwAgADYA IgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBk AD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIAOQAiACAAUwBlAG0AaQBIAGkA ZABkAGUAbgA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBl AGQAPQAiAHQAcgB1AGUAIgAgAFEARgBvAHIAbQBhAHQAPQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUA PQAiAGgAZQBhAGQAaQBuAGcAIAA3ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBw AHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkA PQAiADkAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgANAAoAIAAgACAAVQBu AGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIAIABRAEYAbwByAG0AYQB0AD0A IgB0AHIAdQBlACIAIABOAGEAbQBlAD0AIgBoAGUAYQBkAGkAbgBnACAAOAAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA5ACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0 AHIAdQBlACIADQAKACAAIAAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUA ZQAiACAAUQBGAG8AcgBtAGEAdAA9ACIAdAByAHUAZQAiACAATgBhAG0AZQA9ACIAaABlAGEAZABp AG4AZwAgADkAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1 AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAA IABOAGEAbQBlAD0AIgBpAG4AZABlAHgAIAAxACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4 AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgA aQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAi AHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAaQBuAGQAZQB4ACAAMgAiAC8APgANAAoA IAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBs AHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUA VwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAGkAbgBk AGUAeAAgADMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1 AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAA IABOAGEAbQBlAD0AIgBpAG4AZABlAHgAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4 AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgA aQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAi AHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAaQBuAGQAZQB4ACAANQAiAC8APgANAAoA IAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBs AHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUA VwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAGkAbgBk AGUAeAAgADYAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1 AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAA IABOAGEAbQBlAD0AIgBpAG4AZABlAHgAIAA3ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4 AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgA aQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAi AHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAaQBuAGQAZQB4ACAAOAAiAC8APgANAAoA IAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBs AHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUA VwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAGkAbgBk AGUAeAAgADkAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIAMwA5ACIAIABT AGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAFUAbgBoAGkAZABlAFcA aABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiACAATgBhAG0AZQA9ACIAdABvAGMAIAAxACIALwA+ AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIA ZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADMAOQAiACAAUwBlAG0AaQBIAGkAZABk AGUAbgA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQA PQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAiAHQAbwBjACAAMgAiAC8APgANAAoAIAAgADwAdwA6 AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAA UAByAGkAbwByAGkAdAB5AD0AIgAzADkAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1 AGUAIgANAAoAIAAgACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIA IABOAGEAbQBlAD0AIgB0AG8AYwAgADMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBl AHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQA eQA9ACIAMwA5ACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIADQAKACAAIAAg AFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiACAATgBhAG0AZQA9ACIA dABvAGMAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABM AG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADMAOQAiACAA UwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABVAG4AaABpAGQAZQBX AGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAiAHQAbwBjACAANQAiAC8A PgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAi AGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgAzADkAIgAgAFMAZQBtAGkASABpAGQA ZABlAG4APQAiAHQAcgB1AGUAIgANAAoAIAAgACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBk AD0AIgB0AHIAdQBlACIAIABOAGEAbQBlAD0AIgB0AG8AYwAgADYAIgAvAD4ADQAKACAAIAA8AHcA OgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAg AFAAcgBpAG8AcgBpAHQAeQA9ACIAMwA5ACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIA dQBlACIADQAKACAAIAAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAi ACAATgBhAG0AZQA9ACIAdABvAGMAIAA3ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMA ZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0 AHkAPQAiADMAOQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiAA0ACgAgACAA IABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAi AHQAbwBjACAAOAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAA TABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgAzADkAIgAg AFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgANAAoAIAAgACAAVQBuAGgAaQBkAGUA VwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIAIABOAGEAbQBlAD0AIgB0AG8AYwAgADkAIgAv AD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0A IgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBo AGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0A IgBOAG8AcgBtAGEAbAAgAEkAbgBkAGUAbgB0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4 AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgA aQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAi AHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAZgBvAG8AdABuAG8AdABlACAAdABlAHgA dAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAA VQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBt AGUAPQAiAGEAbgBuAG8AdABhAHQAaQBvAG4AIAB0AGUAeAB0ACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABT AGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUA cwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAaABlAGEAZABlAHIAIgAv AD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0A IgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBo AGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0A IgBmAG8AbwB0AGUAcgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBu ACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIA dAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAK ACAAIAAgAE4AYQBtAGUAPQAiAGkAbgBkAGUAeAAgAGgAZQBhAGQAaQBuAGcAIgAvAD4ADQAKACAA IAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABz AGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIAMwA1ACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0A IgB0AHIAdQBlACIADQAKACAAIAAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdABy AHUAZQAiACAAUQBGAG8AcgBtAGEAdAA9ACIAdAByAHUAZQAiACAATgBhAG0AZQA9ACIAYwBhAHAA dABpAG8AbgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABv AGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUA ZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAg AE4AYQBtAGUAPQAiAHQAYQBiAGwAZQAgAG8AZgAgAGYAaQBnAHUAcgBlAHMAIgAvAD4ADQAKACAA IAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABz AGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcA aABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBlAG4AdgBl AGwAbwBwAGUAIABhAGQAZAByAGUAcwBzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMA ZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBk AGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQA cgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAZQBuAHYAZQBsAG8AcABlACAAcgBlAHQAdQBy AG4AIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsA ZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAg AFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEA bQBlAD0AIgBmAG8AbwB0AG4AbwB0AGUAIAByAGUAZgBlAHIAZQBuAGMAZQAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBo AGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAGEAbgBuAG8A dABhAHQAaQBvAG4AIAByAGUAZgBlAHIAZQBuAGMAZQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBk AEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0A aQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBk AD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAGwAaQBuAGUAIABuAHUAbQBiAGUA cgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAA VQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBt AGUAPQAiAHAAYQBnAGUAIABuAHUAbQBiAGUAcgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUA eABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBI AGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0A IgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAGUAbgBkAG4AbwB0AGUAIAByAGUAZgBl AHIAZQBuAGMAZQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAA TABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdABy AHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAA IAAgAE4AYQBtAGUAPQAiAGUAbgBkAG4AbwB0AGUAIAB0AGUAeAB0ACIALwA+AA0ACgAgACAAPAB3 ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIA IABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBu AFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAdABhAGIAbABlACAA bwBmACAAYQB1AHQAaABvAHIAaQB0AGkAZQBzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4 AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgA aQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAi AHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAbQBhAGMAcgBvACIALwA+AA0ACgAgACAA PAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBl ACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgA ZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAdABvAGEAIABo AGUAYQBkAGkAbgBnACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0 AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoA IAAgACAATgBhAG0AZQA9ACIATABpAHMAdAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABj AGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkA ZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0 AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABCAHUAbABsAGUAdAAiAC8A PgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAi AGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgA aQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAi AEwAaQBzAHQAIABOAHUAbQBiAGUAcgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUA cAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABk AGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIA dQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIAAyACIALwA+AA0ACgAgACAAPAB3 ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIA IABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBu AFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIATABpAHMAdAAgADMA IgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBk AD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUA bgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBl AD0AIgBMAGkAcwB0ACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9 ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIA DQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIAA1ACIALwA+AA0ACgAgACAAPAB3ADoATABz AGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUA bQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBl AGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIATABpAHMAdAAgAEIAdQBsAGwA ZQB0ACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABv AGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUA ZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAg AE4AYQBtAGUAPQAiAEwAaQBzAHQAIABCAHUAbABsAGUAdAAgADMAIgAvAD4ADQAKACAAIAA8AHcA OgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAg AFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4A VQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAQgB1 AGwAbABlAHQAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0 AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoA IAAgACAATgBhAG0AZQA9ACIATABpAHMAdAAgAEIAdQBsAGwAZQB0ACAANQAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBo AGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQA IABOAHUAbQBiAGUAcgAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABp AG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4A PQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAi AA0ACgAgACAAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAATgB1AG0AYgBlAHIAIAAzACIALwA+AA0A CgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBh AGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQA ZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIATABp AHMAdAAgAE4AdQBtAGIAZQByACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUA cAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABk AGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIA dQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABOAHUAbQBiAGUAcgAgADUAIgAv AD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0A IgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIAMQAwACIAIABRAEYAbwByAG0AYQB0 AD0AIgB0AHIAdQBlACIAIABOAGEAbQBlAD0AIgBUAGkAdABsAGUAIgAvAD4ADQAKACAAIAA8AHcA OgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAg AFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4A VQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBDAGwAbwBzAGkAbgBn ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUA ZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABV AG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0A ZQA9ACIAUwBpAGcAbgBhAHQAdQByAGUAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBl AHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQA eQA9ACIAMQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABV AG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAiAEQA ZQBmAGEAdQBsAHQAIABQAGEAcgBhAGcAcgBhAHAAaAAgAEYAbwBuAHQAIgAvAD4ADQAKACAAIAA8 AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUA IgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABl AG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBCAG8AZAB5ACAA VABlAHgAdAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABv AGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUA ZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAg AE4AYQBtAGUAPQAiAEIAbwBkAHkAIABUAGUAeAB0ACAASQBuAGQAZQBuAHQAIgAvAD4ADQAKACAA IAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABz AGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcA aABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBMAGkAcwB0 ACAAQwBvAG4AdABpAG4AdQBlACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQA aQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBu AD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUA IgANAAoAIAAgACAATgBhAG0AZQA9ACIATABpAHMAdAAgAEMAbwBuAHQAaQBuAHUAZQAgADIAIgAv AD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0A IgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBo AGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0A IgBMAGkAcwB0ACAAQwBvAG4AdABpAG4AdQBlACAAMwAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBk AEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0A aQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBk AD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABDAG8AbgB0AGkA bgB1AGUAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABM AG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIA dQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAg ACAATgBhAG0AZQA9ACIATABpAHMAdAAgAEMAbwBuAHQAaQBuAHUAZQAgADUAIgAvAD4ADQAKACAA IAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABz AGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcA aABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBNAGUAcwBz AGEAZwBlACAASABlAGEAZABlAHIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAA dABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9 ACIAMQAxACIAIABRAEYAbwByAG0AYQB0AD0AIgB0AHIAdQBlACIAIABOAGEAbQBlAD0AIgBTAHUA YgB0AGkAdABsAGUAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAg AEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQA cgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAg ACAAIABOAGEAbQBlAD0AIgBTAGEAbAB1AHQAYQB0AGkAbwBuACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABT AGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUA cwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIARABhAHQAZQAiAC8APgAN AAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYA YQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBk AGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEIA bwBkAHkAIABUAGUAeAB0ACAARgBpAHIAcwB0ACAASQBuAGQAZQBuAHQAIgAvAD4ADQAKACAAIAA8 AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUA IgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABl AG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBCAG8AZAB5ACAA VABlAHgAdAAgAEYAaQByAHMAdAAgAEkAbgBkAGUAbgB0ACAAMgAiAC8APgANAAoAIAAgADwAdwA6 AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAA UwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBV AHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAE4AbwB0AGUAIABIAGUA YQBkAGkAbgBnACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABM AG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIA dQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAg ACAATgBhAG0AZQA9ACIAQgBvAGQAeQAgAFQAZQB4AHQAIAAyACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABT AGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUA cwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAQgBvAGQAeQAgAFQAZQB4 AHQAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8A YwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBl ACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAA TgBhAG0AZQA9ACIAQgBvAGQAeQAgAFQAZQB4AHQAIABJAG4AZABlAG4AdAAgADIAIgAvAD4ADQAK ACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEA bABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABl AFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBCAG8A ZAB5ACAAVABlAHgAdAAgAEkAbgBkAGUAbgB0ACAAMwAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBk AEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0A aQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBk AD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEIAbABvAGMAawAgAFQAZQB4AHQA IgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBk AD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUA bgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBl AD0AIgBIAHkAcABlAHIAbABpAG4AawAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUA cAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABk AGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIA dQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEYAbwBsAGwAbwB3AGUAZABIAHkAcABlAHIAbABp AG4AawAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMA awBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgAyADIAIgAgAFEARgBv AHIAbQBhAHQAPQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAiAFMAdAByAG8AbgBnACIALwA+AA0A CgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBh AGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADIAMAAiACAAUQBGAG8AcgBtAGEAdAA9ACIA dAByAHUAZQAiACAATgBhAG0AZQA9ACIARQBtAHAAaABhAHMAaQBzACIALwA+AA0ACgAgACAAPAB3 ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIA IABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBu AFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIARABvAGMAdQBtAGUA bgB0ACAATQBhAHAAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAg AEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQA cgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAg ACAAIABOAGEAbQBlAD0AIgBQAGwAYQBpAG4AIABUAGUAeAB0ACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABT AGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUA cwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIARQAtAG0AYQBpAGwAIABT AGkAZwBuAGEAdAB1AHIAZQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9 ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIA DQAKACAAIAAgAE4AYQBtAGUAPQAiAEgAVABNAEwAIABUAG8AcAAgAG8AZgAgAEYAbwByAG0AIgAv AD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0A IgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBo AGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0A IgBIAFQATQBMACAAQgBvAHQAdABvAG0AIABvAGYAIABGAG8AcgBtACIALwA+AA0ACgAgACAAPAB3 ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIA IABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBu AFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIATgBvAHIAbQBhAGwA IAAoAFcAZQBiACkAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAg AEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQA cgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAg ACAAIABOAGEAbQBlAD0AIgBIAFQATQBMACAAQQBjAHIAbwBuAHkAbQAiAC8APgANAAoAIAAgADwA dwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAi ACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUA bgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEgAVABNAEwAIABB AGQAZAByAGUAcwBzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0 AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoA IAAgACAATgBhAG0AZQA9ACIASABUAE0ATAAgAEMAaQB0AGUAIgAvAD4ADQAKACAAIAA8AHcAOgBM AHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMA ZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBz AGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBIAFQATQBMACAAQwBvAGQA ZQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAA VQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBt AGUAPQAiAEgAVABNAEwAIABEAGUAZgBpAG4AaQB0AGkAbwBuACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABT AGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUA cwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIASABUAE0ATAAgAEsAZQB5 AGIAbwBhAHIAZAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAA TABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdABy AHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAA IAAgAE4AYQBtAGUAPQAiAEgAVABNAEwAIABQAHIAZQBmAG8AcgBtAGEAdAB0AGUAZAAiAC8APgAN AAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYA YQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBk AGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEgA VABNAEwAIABTAGEAbQBwAGwAZQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0 AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUA bgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBl ACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEgAVABNAEwAIABUAHkAcABlAHcAcgBpAHQAZQByACIA LwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9 ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4A aABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9 ACIASABUAE0ATAAgAFYAYQByAGkAYQBiAGwAZQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUA eABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBI AGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0A IgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAE4AbwByAG0AYQBsACAAVABhAGIAbABl ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUA ZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABV AG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0A ZQA9ACIAYQBuAG4AbwB0AGEAdABpAG8AbgAgAHMAdQBiAGoAZQBjAHQAIgAvAD4ADQAKACAAIAA8 AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUA IgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABl AG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBOAG8AIABMAGkA cwB0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBr AGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIA IABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBh AG0AZQA9ACIATwB1AHQAbABpAG4AZQAgAEwAaQBzAHQAIAAxACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABT AGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUA cwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIATwB1AHQAbABpAG4AZQAg AEwAaQBzAHQAIAAyACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0 AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoA IAAgACAATgBhAG0AZQA9ACIATwB1AHQAbABpAG4AZQAgAEwAaQBzAHQAIAAzACIALwA+AA0ACgAg ACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwA cwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBX AGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAVABhAGIA bABlACAAUwBpAG0AcABsAGUAIAAxACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBw AHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQA ZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1 AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAAUwBpAG0AcABsAGUAIAAyACIA LwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9 ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4A aABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9 ACIAVABhAGIAbABlACAAUwBpAG0AcABsAGUAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQA RQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBp AEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQA PQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAAQwBsAGEAcwBz AGkAYwAgADEAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1 AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAA IABOAGEAbQBlAD0AIgBUAGEAYgBsAGUAIABDAGwAYQBzAHMAaQBjACAAMgAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBo AGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwA ZQAgAEMAbABhAHMAcwBpAGMAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBw AHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQA ZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1 AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAAQwBsAGEAcwBzAGkAYwAgADQA IgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBk AD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUA bgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBl AD0AIgBUAGEAYgBsAGUAIABDAG8AbABvAHIAZgB1AGwAIAAxACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABT AGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUA cwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAAQwBv AGwAbwByAGYAdQBsACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9 ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIA DQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwAZQAgAEMAbwBsAG8AcgBmAHUAbAAgADMAIgAv AD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0A IgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBo AGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0A IgBUAGEAYgBsAGUAIABDAG8AbAB1AG0AbgBzACAAMQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBk AEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0A aQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBk AD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwAZQAgAEMAbwBsAHUA bQBuAHMAIAAyACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABM AG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIA dQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAg ACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAAQwBvAGwAdQBtAG4AcwAgADMAIgAvAD4ADQAKACAA IAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABz AGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcA aABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBUAGEAYgBs AGUAIABDAG8AbAB1AG0AbgBzACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUA cAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABk AGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIA dQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwAZQAgAEMAbwBsAHUAbQBuAHMAIAA1 ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUA ZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABV AG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0A ZQA9ACIAVABhAGIAbABlACAARwByAGkAZAAgADEAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABF AHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkA SABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9 ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBUAGEAYgBsAGUAIABHAHIAaQBkACAA MgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAA VQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBt AGUAPQAiAFQAYQBiAGwAZQAgAEcAcgBpAGQAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQA RQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBp AEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQA PQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAARwByAGkAZAAg ADQAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsA ZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAg AFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEA bQBlAD0AIgBUAGEAYgBsAGUAIABHAHIAaQBkACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBk AEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0A aQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBk AD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwAZQAgAEcAcgBpAGQA IAA2ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBr AGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIA IABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBh AG0AZQA9ACIAVABhAGIAbABlACAARwByAGkAZAAgADcAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMA ZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBt AGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUA ZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBUAGEAYgBsAGUAIABHAHIAaQBk ACAAOAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMA awBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAi ACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4A YQBtAGUAPQAiAFQAYQBiAGwAZQAgAEwAaQBzAHQAIAAxACIALwA+AA0ACgAgACAAPAB3ADoATABz AGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUA bQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBl AGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAATABpAHMA dAAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBj AGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUA IgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABO AGEAbQBlAD0AIgBUAGEAYgBsAGUAIABMAGkAcwB0ACAAMwAiAC8APgANAAoAIAAgADwAdwA6AEwA cwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBl AG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMA ZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwAZQAgAEwAaQBz AHQAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8A YwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBl ACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAA TgBhAG0AZQA9ACIAVABhAGIAbABlACAATABpAHMAdAAgADUAIgAvAD4ADQAKACAAIAA8AHcAOgBM AHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMA ZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBz AGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBUAGEAYgBsAGUAIABMAGkA cwB0ACAANgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABv AGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUA ZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAg AE4AYQBtAGUAPQAiAFQAYQBiAGwAZQAgAEwAaQBzAHQAIAA3ACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABT AGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUA cwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAATABp AHMAdAAgADgAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4APQAiAHQAcgB1 AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAiAA0ACgAgACAA IABOAGEAbQBlAD0AIgBUAGEAYgBsAGUAIAAzAEQAIABlAGYAZgBlAGMAdABzACAAMQAiAC8APgAN AAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYA YQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBk AGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQA YQBiAGwAZQAgADMARAAgAGUAZgBmAGUAYwB0AHMAIAAyACIALwA+AA0ACgAgACAAPAB3ADoATABz AGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUA bQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBXAGgAZQBuAFUAcwBl AGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAAMwBEACAA ZQBmAGYAZQBjAHQAcwAgADMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABp AG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBtAGkASABpAGQAZABlAG4A PQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdAByAHUAZQAi AA0ACgAgACAAIABOAGEAbQBlAD0AIgBUAGEAYgBsAGUAIABDAG8AbgB0AGUAbQBwAG8AcgBhAHIA eQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAA VQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBt AGUAPQAiAFQAYQBiAGwAZQAgAEUAbABlAGcAYQBuAHQAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMA ZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFMAZQBt AGkASABpAGQAZABlAG4APQAiAHQAcgB1AGUAIgAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUA ZAA9ACIAdAByAHUAZQAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBUAGEAYgBsAGUAIABQAHIAbwBm AGUAcwBzAGkAbwBuAGEAbAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9 ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIA DQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwAZQAgAFMAdQBiAHQAbABlACAAMQAiAC8APgAN AAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYA YQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBk AGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQA YQBiAGwAZQAgAFMAdQBiAHQAbABlACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABj AGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkA ZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0 AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwAZQAgAFcAZQBiACAAMQAiAC8A PgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAi AGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgA aQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAi AFQAYQBiAGwAZQAgAFcAZQBiACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUA cAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABk AGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIA dQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwAZQAgAFcAZQBiACAAMwAiAC8APgAN AAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYA YQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBk AGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEIA YQBsAGwAbwBvAG4AIABUAGUAeAB0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBw AHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkA PQAiADMAOQAiACAATgBhAG0AZQA9ACIAVABhAGIAbABlACAARwByAGkAZAAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUAZQAiACAAVQBuAGgAaQBkAGUAVwBo AGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAFQAYQBiAGwA ZQAgAFQAaABlAG0AZQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBu ACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIA dAByAHUAZQAiACAATgBhAG0AZQA9ACIAUABsAGEAYwBlAGgAbwBsAGQAZQByACAAVABlAHgAdAAi AC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQA PQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgAxACIAIABRAEYAbwByAG0AYQB0 AD0AIgB0AHIAdQBlACIAIABOAGEAbQBlAD0AIgBOAG8AIABTAHAAYQBjAGkAbgBnACIALwA+AA0A CgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBh AGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAMAAiACAATgBhAG0AZQA9ACIATABpAGcA aAB0ACAAUwBoAGEAZABpAG4AZwAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0 AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0A IgA2ADEAIgAgAE4AYQBtAGUAPQAiAEwAaQBnAGgAdAAgAEwAaQBzAHQAIgAvAD4ADQAKACAAIAA8 AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUA IgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgAyACIAIABOAGEAbQBlAD0AIgBMAGkAZwBoAHQAIABH AHIAaQBkACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8A YwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAMwAiACAATgBh AG0AZQA9ACIATQBlAGQAaQB1AG0AIABTAGgAYQBkAGkAbgBnACAAMQAiAC8APgANAAoAIAAgADwA dwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAi ACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADQAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAA UwBoAGEAZABpAG4AZwAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABp AG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIA NgA1ACIAIABOAGEAbQBlAD0AIgBNAGUAZABpAHUAbQAgAEwAaQBzAHQAIAAxACIALwA+AA0ACgAg ACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwA cwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYANgAiACAATgBhAG0AZQA9ACIATQBlAGQAaQB1 AG0AIABMAGkAcwB0ACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2 ADcAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAARwByAGkAZAAgADEAIgAvAD4ADQAKACAA IAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABz AGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgA4ACIAIABOAGEAbQBlAD0AIgBNAGUAZABpAHUA bQAgAEcAcgBpAGQAIAAyACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBv AG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYA OQAiACAATgBhAG0AZQA9ACIATQBlAGQAaQB1AG0AIABHAHIAaQBkACAAMwAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA3ADAAIgAgAE4AYQBtAGUAPQAiAEQAYQByAGsAIABM AGkAcwB0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8A YwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADcAMQAiACAATgBh AG0AZQA9ACIAQwBvAGwAbwByAGYAdQBsACAAUwBoAGEAZABpAG4AZwAiAC8APgANAAoAIAAgADwA dwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAi ACAAUAByAGkAbwByAGkAdAB5AD0AIgA3ADIAIgAgAE4AYQBtAGUAPQAiAEMAbwBsAG8AcgBmAHUA bAAgAEwAaQBzAHQAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAg AEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANwAzACIA IABOAGEAbQBlAD0AIgBDAG8AbABvAHIAZgB1AGwAIABHAHIAaQBkACIALwA+AA0ACgAgACAAPAB3 ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIA IABQAHIAaQBvAHIAaQB0AHkAPQAiADYAMAAiACAATgBhAG0AZQA9ACIATABpAGcAaAB0ACAAUwBo AGEAZABpAG4AZwAgAEEAYwBjAGUAbgB0ACAAMQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUA eABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwBy AGkAdAB5AD0AIgA2ADEAIgAgAE4AYQBtAGUAPQAiAEwAaQBnAGgAdAAgAEwAaQBzAHQAIABBAGMA YwBlAG4AdAAgADEAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAg AEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgAyACIA IABOAGEAbQBlAD0AIgBMAGkAZwBoAHQAIABHAHIAaQBkACAAQQBjAGMAZQBuAHQAIAAxACIALwA+ AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIA ZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAMwAiACAATgBhAG0AZQA9ACIATQBl AGQAaQB1AG0AIABTAGgAYQBkAGkAbgBnACAAMQAgAEEAYwBjAGUAbgB0ACAAMQAiAC8APgANAAoA IAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBs AHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADQAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkA dQBtACAAUwBoAGEAZABpAG4AZwAgADIAIABBAGMAYwBlAG4AdAAgADEAIgAvAD4ADQAKACAAIAA8 AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUA IgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgA1ACIAIABOAGEAbQBlAD0AIgBNAGUAZABpAHUAbQAg AEwAaQBzAHQAIAAxACAAQQBjAGMAZQBuAHQAIAAxACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQA RQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABTAGUAbQBp AEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABOAGEAbQBlAD0AIgBSAGUAdgBpAHMAaQBvAG4A IgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBk AD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIAMwA0ACIAIABRAEYAbwByAG0A YQB0AD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABQAGEAcgBh AGcAcgBhAHAAaAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAA TABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgAyADkAIgAg AFEARgBvAHIAbQBhAHQAPQAiAHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAiAFEAdQBvAHQAZQAiAC8A PgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAi AGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgAzADAAIgAgAFEARgBvAHIAbQBhAHQA PQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIASQBuAHQAZQBuAHMAZQAgAFEAdQBv AHQAZQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMA awBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADYAIgAgAE4AYQBt AGUAPQAiAE0AZQBkAGkAdQBtACAATABpAHMAdAAgADIAIABBAGMAYwBlAG4AdAAgADEAIgAvAD4A DQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBm AGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgA3ACIAIABOAGEAbQBlAD0AIgBNAGUA ZABpAHUAbQAgAEcAcgBpAGQAIAAxACAAQQBjAGMAZQBuAHQAIAAxACIALwA+AA0ACgAgACAAPAB3 ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIA IABQAHIAaQBvAHIAaQB0AHkAPQAiADYAOAAiACAATgBhAG0AZQA9ACIATQBlAGQAaQB1AG0AIABH AHIAaQBkACAAMgAgAEEAYwBjAGUAbgB0ACAAMQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUA eABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwBy AGkAdAB5AD0AIgA2ADkAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAARwByAGkAZAAgADMA IABBAGMAYwBlAG4AdAAgADEAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABp AG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIA NwAwACIAIABOAGEAbQBlAD0AIgBEAGEAcgBrACAATABpAHMAdAAgAEEAYwBjAGUAbgB0ACAAMQAi AC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQA PQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA3ADEAIgAgAE4AYQBtAGUAPQAi AEMAbwBsAG8AcgBmAHUAbAAgAFMAaABhAGQAaQBuAGcAIABBAGMAYwBlAG4AdAAgADEAIgAvAD4A DQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBm AGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANwAyACIAIABOAGEAbQBlAD0AIgBDAG8A bABvAHIAZgB1AGwAIABMAGkAcwB0ACAAQQBjAGMAZQBuAHQAIAAxACIALwA+AA0ACgAgACAAPAB3 ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIA IABQAHIAaQBvAHIAaQB0AHkAPQAiADcAMwAiACAATgBhAG0AZQA9ACIAQwBvAGwAbwByAGYAdQBs ACAARwByAGkAZAAgAEEAYwBjAGUAbgB0ACAAMQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUA eABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwBy AGkAdAB5AD0AIgA2ADAAIgAgAE4AYQBtAGUAPQAiAEwAaQBnAGgAdAAgAFMAaABhAGQAaQBuAGcA IABBAGMAYwBlAG4AdAAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABp AG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIA NgAxACIAIABOAGEAbQBlAD0AIgBMAGkAZwBoAHQAIABMAGkAcwB0ACAAQQBjAGMAZQBuAHQAIAAy ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUA ZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAMgAiACAATgBhAG0AZQA9 ACIATABpAGcAaAB0ACAARwByAGkAZAAgAEEAYwBjAGUAbgB0ACAAMgAiAC8APgANAAoAIAAgADwA dwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAi ACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADMAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAA UwBoAGEAZABpAG4AZwAgADEAIABBAGMAYwBlAG4AdAAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBM AHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAA cgBpAG8AcgBpAHQAeQA9ACIANgA0ACIAIABOAGEAbQBlAD0AIgBNAGUAZABpAHUAbQAgAFMAaABh AGQAaQBuAGcAIAAyACAAQQBjAGMAZQBuAHQAIAAyACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQA RQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBv AHIAaQB0AHkAPQAiADYANQAiACAATgBhAG0AZQA9ACIATQBlAGQAaQB1AG0AIABMAGkAcwB0ACAA MQAgAEEAYwBjAGUAbgB0ACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0 AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0A IgA2ADYAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAATABpAHMAdAAgADIAIABBAGMAYwBl AG4AdAAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgA3ACIAIABO AGEAbQBlAD0AIgBNAGUAZABpAHUAbQAgAEcAcgBpAGQAIAAxACAAQQBjAGMAZQBuAHQAIAAyACIA LwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9 ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAOAAiACAATgBhAG0AZQA9ACIA TQBlAGQAaQB1AG0AIABHAHIAaQBkACAAMgAgAEEAYwBjAGUAbgB0ACAAMgAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADkAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBt ACAARwByAGkAZAAgADMAIABBAGMAYwBlAG4AdAAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMA ZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBp AG8AcgBpAHQAeQA9ACIANwAwACIAIABOAGEAbQBlAD0AIgBEAGEAcgBrACAATABpAHMAdAAgAEEA YwBjAGUAbgB0ACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBu ACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA3ADEA IgAgAE4AYQBtAGUAPQAiAEMAbwBsAG8AcgBmAHUAbAAgAFMAaABhAGQAaQBuAGcAIABBAGMAYwBl AG4AdAAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANwAyACIAIABO AGEAbQBlAD0AIgBDAG8AbABvAHIAZgB1AGwAIABMAGkAcwB0ACAAQQBjAGMAZQBuAHQAIAAyACIA LwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9 ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADcAMwAiACAATgBhAG0AZQA9ACIA QwBvAGwAbwByAGYAdQBsACAARwByAGkAZAAgAEEAYwBjAGUAbgB0ACAAMgAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADAAIgAgAE4AYQBtAGUAPQAiAEwAaQBnAGgAdAAg AFMAaABhAGQAaQBuAGcAIABBAGMAYwBlAG4AdAAgADMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMA ZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBp AG8AcgBpAHQAeQA9ACIANgAxACIAIABOAGEAbQBlAD0AIgBMAGkAZwBoAHQAIABMAGkAcwB0ACAA QQBjAGMAZQBuAHQAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBv AG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYA MgAiACAATgBhAG0AZQA9ACIATABpAGcAaAB0ACAARwByAGkAZAAgAEEAYwBjAGUAbgB0ACAAMwAi AC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQA PQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADMAIgAgAE4AYQBtAGUAPQAi AE0AZQBkAGkAdQBtACAAUwBoAGEAZABpAG4AZwAgADEAIABBAGMAYwBlAG4AdAAgADMAIgAvAD4A DQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBm AGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgA0ACIAIABOAGEAbQBlAD0AIgBNAGUA ZABpAHUAbQAgAFMAaABhAGQAaQBuAGcAIAAyACAAQQBjAGMAZQBuAHQAIAAzACIALwA+AA0ACgAg ACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwA cwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYANQAiACAATgBhAG0AZQA9ACIATQBlAGQAaQB1 AG0AIABMAGkAcwB0ACAAMQAgAEEAYwBjAGUAbgB0ACAAMwAiAC8APgANAAoAIAAgADwAdwA6AEwA cwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUABy AGkAbwByAGkAdAB5AD0AIgA2ADYAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAATABpAHMA dAAgADIAIABBAGMAYwBlAG4AdAAgADMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBl AHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQA eQA9ACIANgA3ACIAIABOAGEAbQBlAD0AIgBNAGUAZABpAHUAbQAgAEcAcgBpAGQAIAAxACAAQQBj AGMAZQBuAHQAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAOAAi ACAATgBhAG0AZQA9ACIATQBlAGQAaQB1AG0AIABHAHIAaQBkACAAMgAgAEEAYwBjAGUAbgB0ACAA MwAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADkAIgAgAE4AYQBtAGUA PQAiAE0AZQBkAGkAdQBtACAARwByAGkAZAAgADMAIABBAGMAYwBlAG4AdAAgADMAIgAvAD4ADQAK ACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEA bABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANwAwACIAIABOAGEAbQBlAD0AIgBEAGEAcgBr ACAATABpAHMAdAAgAEEAYwBjAGUAbgB0ACAAMwAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUA eABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwBy AGkAdAB5AD0AIgA3ADEAIgAgAE4AYQBtAGUAPQAiAEMAbwBsAG8AcgBmAHUAbAAgAFMAaABhAGQA aQBuAGcAIABBAGMAYwBlAG4AdAAgADMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBl AHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQA eQA9ACIANwAyACIAIABOAGEAbQBlAD0AIgBDAG8AbABvAHIAZgB1AGwAIABMAGkAcwB0ACAAQQBj AGMAZQBuAHQAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADcAMwAi ACAATgBhAG0AZQA9ACIAQwBvAGwAbwByAGYAdQBsACAARwByAGkAZAAgAEEAYwBjAGUAbgB0ACAA MwAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADAAIgAgAE4AYQBtAGUA PQAiAEwAaQBnAGgAdAAgAFMAaABhAGQAaQBuAGcAIABBAGMAYwBlAG4AdAAgADQAIgAvAD4ADQAK ACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEA bABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgAxACIAIABOAGEAbQBlAD0AIgBMAGkAZwBo AHQAIABMAGkAcwB0ACAAQQBjAGMAZQBuAHQAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQA RQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBv AHIAaQB0AHkAPQAiADYAMgAiACAATgBhAG0AZQA9ACIATABpAGcAaAB0ACAARwByAGkAZAAgAEEA YwBjAGUAbgB0ACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBu ACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADMA IgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAAUwBoAGEAZABpAG4AZwAgADEAIABBAGMAYwBl AG4AdAAgADQAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgA0ACIAIABO AGEAbQBlAD0AIgBNAGUAZABpAHUAbQAgAFMAaABhAGQAaQBuAGcAIAAyACAAQQBjAGMAZQBuAHQA IAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBr AGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYANQAiACAATgBhAG0A ZQA9ACIATQBlAGQAaQB1AG0AIABMAGkAcwB0ACAAMQAgAEEAYwBjAGUAbgB0ACAANAAiAC8APgAN AAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYA YQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADYAIgAgAE4AYQBtAGUAPQAiAE0AZQBk AGkAdQBtACAATABpAHMAdAAgADIAIABBAGMAYwBlAG4AdAAgADQAIgAvAD4ADQAKACAAIAA8AHcA OgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAg AFAAcgBpAG8AcgBpAHQAeQA9ACIANgA3ACIAIABOAGEAbQBlAD0AIgBNAGUAZABpAHUAbQAgAEcA cgBpAGQAIAAxACAAQQBjAGMAZQBuAHQAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4 AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIA aQB0AHkAPQAiADYAOAAiACAATgBhAG0AZQA9ACIATQBlAGQAaQB1AG0AIABHAHIAaQBkACAAMgAg AEEAYwBjAGUAbgB0ACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2 ADkAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAARwByAGkAZAAgADMAIABBAGMAYwBlAG4A dAAgADQAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBj AGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANwAwACIAIABOAGEA bQBlAD0AIgBEAGEAcgBrACAATABpAHMAdAAgAEEAYwBjAGUAbgB0ACAANAAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA3ADEAIgAgAE4AYQBtAGUAPQAiAEMAbwBsAG8AcgBm AHUAbAAgAFMAaABhAGQAaQBuAGcAIABBAGMAYwBlAG4AdAAgADQAIgAvAD4ADQAKACAAIAA8AHcA OgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAg AFAAcgBpAG8AcgBpAHQAeQA9ACIANwAyACIAIABOAGEAbQBlAD0AIgBDAG8AbABvAHIAZgB1AGwA IABMAGkAcwB0ACAAQQBjAGMAZQBuAHQAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4 AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIA aQB0AHkAPQAiADcAMwAiACAATgBhAG0AZQA9ACIAQwBvAGwAbwByAGYAdQBsACAARwByAGkAZAAg AEEAYwBjAGUAbgB0ACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2 ADAAIgAgAE4AYQBtAGUAPQAiAEwAaQBnAGgAdAAgAFMAaABhAGQAaQBuAGcAIABBAGMAYwBlAG4A dAAgADUAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBj AGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgAxACIAIABOAGEA bQBlAD0AIgBMAGkAZwBoAHQAIABMAGkAcwB0ACAAQQBjAGMAZQBuAHQAIAA1ACIALwA+AA0ACgAg ACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwA cwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAMgAiACAATgBhAG0AZQA9ACIATABpAGcAaAB0 ACAARwByAGkAZAAgAEEAYwBjAGUAbgB0ACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUA eABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwBy AGkAdAB5AD0AIgA2ADMAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAAUwBoAGEAZABpAG4A ZwAgADEAIABBAGMAYwBlAG4AdAAgADUAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBl AHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQA eQA9ACIANgA0ACIAIABOAGEAbQBlAD0AIgBNAGUAZABpAHUAbQAgAFMAaABhAGQAaQBuAGcAIAAy ACAAQQBjAGMAZQBuAHQAIAA1ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQA aQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAi ADYANQAiACAATgBhAG0AZQA9ACIATQBlAGQAaQB1AG0AIABMAGkAcwB0ACAAMQAgAEEAYwBjAGUA bgB0ACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABv AGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADYAIgAgAE4A YQBtAGUAPQAiAE0AZQBkAGkAdQBtACAATABpAHMAdAAgADIAIABBAGMAYwBlAG4AdAAgADUAIgAv AD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0A IgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgA3ACIAIABOAGEAbQBlAD0AIgBN AGUAZABpAHUAbQAgAEcAcgBpAGQAIAAxACAAQQBjAGMAZQBuAHQAIAA1ACIALwA+AA0ACgAgACAA PAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBl ACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAOAAiACAATgBhAG0AZQA9ACIATQBlAGQAaQB1AG0A IABHAHIAaQBkACAAMgAgAEEAYwBjAGUAbgB0ACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBk AEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkA bwByAGkAdAB5AD0AIgA2ADkAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAARwByAGkAZAAg ADMAIABBAGMAYwBlAG4AdAAgADUAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAA dABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9 ACIANwAwACIAIABOAGEAbQBlAD0AIgBEAGEAcgBrACAATABpAHMAdAAgAEEAYwBjAGUAbgB0ACAA NQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA3ADEAIgAgAE4AYQBtAGUA PQAiAEMAbwBsAG8AcgBmAHUAbAAgAFMAaABhAGQAaQBuAGcAIABBAGMAYwBlAG4AdAAgADUAIgAv AD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0A IgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANwAyACIAIABOAGEAbQBlAD0AIgBD AG8AbABvAHIAZgB1AGwAIABMAGkAcwB0ACAAQQBjAGMAZQBuAHQAIAA1ACIALwA+AA0ACgAgACAA PAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBl ACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADcAMwAiACAATgBhAG0AZQA9ACIAQwBvAGwAbwByAGYA dQBsACAARwByAGkAZAAgAEEAYwBjAGUAbgB0ACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBk AEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkA bwByAGkAdAB5AD0AIgA2ADAAIgAgAE4AYQBtAGUAPQAiAEwAaQBnAGgAdAAgAFMAaABhAGQAaQBu AGcAIABBAGMAYwBlAG4AdAAgADYAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAA dABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9 ACIANgAxACIAIABOAGEAbQBlAD0AIgBMAGkAZwBoAHQAIABMAGkAcwB0ACAAQQBjAGMAZQBuAHQA IAA2ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBr AGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAMgAiACAATgBhAG0A ZQA9ACIATABpAGcAaAB0ACAARwByAGkAZAAgAEEAYwBjAGUAbgB0ACAANgAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADMAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBt ACAAUwBoAGEAZABpAG4AZwAgADEAIABBAGMAYwBlAG4AdAAgADYAIgAvAD4ADQAKACAAIAA8AHcA OgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAg AFAAcgBpAG8AcgBpAHQAeQA9ACIANgA0ACIAIABOAGEAbQBlAD0AIgBNAGUAZABpAHUAbQAgAFMA aABhAGQAaQBuAGcAIAAyACAAQQBjAGMAZQBuAHQAIAA2ACIALwA+AA0ACgAgACAAPAB3ADoATABz AGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIA aQBvAHIAaQB0AHkAPQAiADYANQAiACAATgBhAG0AZQA9ACIATQBlAGQAaQB1AG0AIABMAGkAcwB0 ACAAMQAgAEEAYwBjAGUAbgB0ACAANgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUA cAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5 AD0AIgA2ADYAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkAdQBtACAATABpAHMAdAAgADIAIABBAGMA YwBlAG4AdAAgADYAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAg AEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANgA3ACIA IABOAGEAbQBlAD0AIgBNAGUAZABpAHUAbQAgAEcAcgBpAGQAIAAxACAAQQBjAGMAZQBuAHQAIAA2 ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUA ZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADYAOAAiACAATgBhAG0AZQA9 ACIATQBlAGQAaQB1AG0AIABHAHIAaQBkACAAMgAgAEEAYwBjAGUAbgB0ACAANgAiAC8APgANAAoA IAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBs AHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA2ADkAIgAgAE4AYQBtAGUAPQAiAE0AZQBkAGkA dQBtACAARwByAGkAZAAgADMAIABBAGMAYwBlAG4AdAAgADYAIgAvAD4ADQAKACAAIAA8AHcAOgBM AHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAA cgBpAG8AcgBpAHQAeQA9ACIANwAwACIAIABOAGEAbQBlAD0AIgBEAGEAcgBrACAATABpAHMAdAAg AEEAYwBjAGUAbgB0ACAANgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA3 ADEAIgAgAE4AYQBtAGUAPQAiAEMAbwBsAG8AcgBmAHUAbAAgAFMAaABhAGQAaQBuAGcAIABBAGMA YwBlAG4AdAAgADYAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAg AEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANwAyACIA IABOAGEAbQBlAD0AIgBDAG8AbABvAHIAZgB1AGwAIABMAGkAcwB0ACAAQQBjAGMAZQBuAHQAIAA2 ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUA ZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADcAMwAiACAATgBhAG0AZQA9 ACIAQwBvAGwAbwByAGYAdQBsACAARwByAGkAZAAgAEEAYwBjAGUAbgB0ACAANgAiAC8APgANAAoA IAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBs AHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgAxADkAIgAgAFEARgBvAHIAbQBhAHQAPQAiAHQA cgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAUwB1AGIAdABsAGUAIABFAG0AcABoAGEAcwBp AHMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsA ZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIAMgAxACIAIABRAEYAbwBy AG0AYQB0AD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEkAbgB0AGUAbgBzAGUA IABFAG0AcABoAGEAcwBpAHMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABp AG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIA MwAxACIAIABRAEYAbwByAG0AYQB0AD0AIgB0AHIAdQBlACIADQAKACAAIAAgAE4AYQBtAGUAPQAi AFMAdQBiAHQAbABlACAAUgBlAGYAZQByAGUAbgBjAGUAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMA ZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBp AG8AcgBpAHQAeQA9ACIAMwAyACIAIABRAEYAbwByAG0AYQB0AD0AIgB0AHIAdQBlACIADQAKACAA IAAgAE4AYQBtAGUAPQAiAEkAbgB0AGUAbgBzAGUAIABSAGUAZgBlAHIAZQBuAGMAZQAiAC8APgAN AAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYA YQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgAzADMAIgAgAFEARgBvAHIAbQBhAHQAPQAi AHQAcgB1AGUAIgAgAE4AYQBtAGUAPQAiAEIAbwBvAGsAIABUAGkAdABsAGUAIgAvAD4ADQAKACAA IAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABz AGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIAMwA3ACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0A IgB0AHIAdQBlACIADQAKACAAIAAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdABy AHUAZQAiACAATgBhAG0AZQA9ACIAQgBpAGIAbABpAG8AZwByAGEAcABoAHkAIgAvAD4ADQAKACAA IAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABz AGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIAMwA5ACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0A IgB0AHIAdQBlACIADQAKACAAIAAgAFUAbgBoAGkAZABlAFcAaABlAG4AVQBzAGUAZAA9ACIAdABy AHUAZQAiACAAUQBGAG8AcgBtAGEAdAA9ACIAdAByAHUAZQAiACAATgBhAG0AZQA9ACIAVABPAEMA IABIAGUAYQBkAGkAbgBnACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBv AG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQA MQAiACAATgBhAG0AZQA9ACIAUABsAGEAaQBuACAAVABhAGIAbABlACAAMQAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA0ADIAIgAgAE4AYQBtAGUAPQAiAFAAbABhAGkAbgAg AFQAYQBiAGwAZQAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8A bgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAAz ACIAIABOAGEAbQBlAD0AIgBQAGwAYQBpAG4AIABUAGEAYgBsAGUAIAAzACIALwA+AA0ACgAgACAA PAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBl ACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQANAAiACAATgBhAG0AZQA9ACIAUABsAGEAaQBuACAA VABhAGIAbABlACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBu ACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA0ADUA IgAgAE4AYQBtAGUAPQAiAFAAbABhAGkAbgAgAFQAYQBiAGwAZQAgADUAIgAvAD4ADQAKACAAIAA8 AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUA IgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAAwACIAIABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABh AGIAbABlACAATABpAGcAaAB0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQA aQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAi ADQANgAiACAATgBhAG0AZQA9ACIARwByAGkAZAAgAFQAYQBiAGwAZQAgADEAIABMAGkAZwBoAHQA IgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBk AD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAA3ACIAIABOAGEAbQBlAD0A IgBHAHIAaQBkACAAVABhAGIAbABlACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABj AGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkA dAB5AD0AIgA0ADgAIgAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAAzACIALwA+ AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIA ZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQAOQAiACAATgBhAG0AZQA9ACIARwBy AGkAZAAgAFQAYQBiAGwAZQAgADQAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAA dABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9 ACIANQAwACIAIABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABhAGIAbABlACAANQAgAEQAYQByAGsA IgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBk AD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANQAxACIAIABOAGEAbQBlAD0A IgBHAHIAaQBkACAAVABhAGIAbABlACAANgAgAEMAbwBsAG8AcgBmAHUAbAAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADIAIgAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABU AGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYAdQBsACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQA RQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBv AHIAaQB0AHkAPQAiADQANgAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABhAGIA bABlACAAMQAgAEwAaQBnAGgAdAAgAEEAYwBjAGUAbgB0ACAAMQAiAC8APgANAAoAIAAgADwAdwA6 AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAA UAByAGkAbwByAGkAdAB5AD0AIgA0ADcAIgAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBs AGUAIAAyACAAQQBjAGMAZQBuAHQAIAAxACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMA ZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0 AHkAPQAiADQAOAAiACAATgBhAG0AZQA9ACIARwByAGkAZAAgAFQAYQBiAGwAZQAgADMAIABBAGMA YwBlAG4AdAAgADEAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAg AEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAA5ACIA IABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABhAGIAbABlACAANAAgAEEAYwBjAGUAbgB0ACAAMQAi AC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQA PQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADAAIgAgAE4AYQBtAGUAPQAi AEcAcgBpAGQAIABUAGEAYgBsAGUAIAA1ACAARABhAHIAawAgAEEAYwBjAGUAbgB0ACAAMQAiAC8A PgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAi AGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADEAIgANAAoAIAAgACAATgBhAG0A ZQA9ACIARwByAGkAZAAgAFQAYQBiAGwAZQAgADYAIABDAG8AbABvAHIAZgB1AGwAIABBAGMAYwBl AG4AdAAgADEAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANQAyACIADQAK ACAAIAAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYA dQBsACAAQQBjAGMAZQBuAHQAIAAxACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBw AHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkA PQAiADQANgAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABhAGIAbABlACAAMQAg AEwAaQBnAGgAdAAgAEEAYwBjAGUAbgB0ACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUA eABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwBy AGkAdAB5AD0AIgA0ADcAIgAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAAyACAA QQBjAGMAZQBuAHQAIAAyACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBv AG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQA OAAiACAATgBhAG0AZQA9ACIARwByAGkAZAAgAFQAYQBiAGwAZQAgADMAIABBAGMAYwBlAG4AdAAg ADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsA ZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAA5ACIAIABOAGEAbQBl AD0AIgBHAHIAaQBkACAAVABhAGIAbABlACAANAAgAEEAYwBjAGUAbgB0ACAAMgAiAC8APgANAAoA IAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBs AHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADAAIgAgAE4AYQBtAGUAPQAiAEcAcgBpAGQA IABUAGEAYgBsAGUAIAA1ACAARABhAHIAawAgAEEAYwBjAGUAbgB0ACAAMgAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADEAIgANAAoAIAAgACAATgBhAG0AZQA9ACIARwBy AGkAZAAgAFQAYQBiAGwAZQAgADYAIABDAG8AbABvAHIAZgB1AGwAIABBAGMAYwBlAG4AdAAgADIA IgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBk AD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANQAyACIADQAKACAAIAAgAE4A YQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYAdQBsACAAQQBj AGMAZQBuAHQAIAAyACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQANgAi AA0ACgAgACAAIABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABhAGIAbABlACAAMQAgAEwAaQBnAGgA dAAgAEEAYwBjAGUAbgB0ACAAMwAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0 AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0A IgA0ADcAIgAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAAyACAAQQBjAGMAZQBu AHQAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8A YwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQAOAAiACAATgBh AG0AZQA9ACIARwByAGkAZAAgAFQAYQBiAGwAZQAgADMAIABBAGMAYwBlAG4AdAAgADMAIgAvAD4A DQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBm AGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAA5ACIAIABOAGEAbQBlAD0AIgBHAHIA aQBkACAAVABhAGIAbABlACAANAAgAEEAYwBjAGUAbgB0ACAAMwAiAC8APgANAAoAIAAgADwAdwA6 AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAA UAByAGkAbwByAGkAdAB5AD0AIgA1ADAAIgAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBs AGUAIAA1ACAARABhAHIAawAgAEEAYwBjAGUAbgB0ACAAMwAiAC8APgANAAoAIAAgADwAdwA6AEwA cwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUABy AGkAbwByAGkAdAB5AD0AIgA1ADEAIgANAAoAIAAgACAATgBhAG0AZQA9ACIARwByAGkAZAAgAFQA YQBiAGwAZQAgADYAIABDAG8AbABvAHIAZgB1AGwAIABBAGMAYwBlAG4AdAAgADMAIgAvAD4ADQAK ACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEA bABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANQAyACIADQAKACAAIAAgAE4AYQBtAGUAPQAi AEcAcgBpAGQAIABUAGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYAdQBsACAAQQBjAGMAZQBuAHQA IAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBr AGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQANgAiAA0ACgAgACAA IABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABhAGIAbABlACAAMQAgAEwAaQBnAGgAdAAgAEEAYwBj AGUAbgB0ACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAA TABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA0ADcAIgAg AE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAAyACAAQQBjAGMAZQBuAHQAIAA0ACIA LwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9 ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQAOAAiACAATgBhAG0AZQA9ACIA RwByAGkAZAAgAFQAYQBiAGwAZQAgADMAIABBAGMAYwBlAG4AdAAgADQAIgAvAD4ADQAKACAAIAA8 AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUA IgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAA5ACIAIABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABh AGIAbABlACAANAAgAEEAYwBjAGUAbgB0ACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUA eABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwBy AGkAdAB5AD0AIgA1ADAAIgAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAA1ACAA RABhAHIAawAgAEEAYwBjAGUAbgB0ACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABj AGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkA dAB5AD0AIgA1ADEAIgANAAoAIAAgACAATgBhAG0AZQA9ACIARwByAGkAZAAgAFQAYQBiAGwAZQAg ADYAIABDAG8AbABvAHIAZgB1AGwAIABBAGMAYwBlAG4AdAAgADQAIgAvAD4ADQAKACAAIAA8AHcA OgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAg AFAAcgBpAG8AcgBpAHQAeQA9ACIANQAyACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEcAcgBpAGQA IABUAGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYAdQBsACAAQQBjAGMAZQBuAHQAIAA0ACIALwA+ AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIA ZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQANgAiAA0ACgAgACAAIABOAGEAbQBl AD0AIgBHAHIAaQBkACAAVABhAGIAbABlACAAMQAgAEwAaQBnAGgAdAAgAEEAYwBjAGUAbgB0ACAA NQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA0ADcAIgAgAE4AYQBtAGUA PQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAAyACAAQQBjAGMAZQBuAHQAIAA1ACIALwA+AA0ACgAg ACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwA cwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQAOAAiACAATgBhAG0AZQA9ACIARwByAGkAZAAg AFQAYQBiAGwAZQAgADMAIABBAGMAYwBlAG4AdAAgADUAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMA ZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBp AG8AcgBpAHQAeQA9ACIANAA5ACIAIABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABhAGIAbABlACAA NAAgAEEAYwBjAGUAbgB0ACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0 AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0A IgA1ADAAIgAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAA1ACAARABhAHIAawAg AEEAYwBjAGUAbgB0ACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1 ADEAIgANAAoAIAAgACAATgBhAG0AZQA9ACIARwByAGkAZAAgAFQAYQBiAGwAZQAgADYAIABDAG8A bABvAHIAZgB1AGwAIABBAGMAYwBlAG4AdAAgADUAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABF AHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8A cgBpAHQAeQA9ACIANQAyACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBs AGUAIAA3ACAAQwBvAGwAbwByAGYAdQBsACAAQQBjAGMAZQBuAHQAIAA1ACIALwA+AA0ACgAgACAA PAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBl ACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQANgAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBHAHIA aQBkACAAVABhAGIAbABlACAAMQAgAEwAaQBnAGgAdAAgAEEAYwBjAGUAbgB0ACAANgAiAC8APgAN AAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYA YQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA0ADcAIgAgAE4AYQBtAGUAPQAiAEcAcgBp AGQAIABUAGEAYgBsAGUAIAAyACAAQQBjAGMAZQBuAHQAIAA2ACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQ AHIAaQBvAHIAaQB0AHkAPQAiADQAOAAiACAATgBhAG0AZQA9ACIARwByAGkAZAAgAFQAYQBiAGwA ZQAgADMAIABBAGMAYwBlAG4AdAAgADYAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBl AHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQA eQA9ACIANAA5ACIAIABOAGEAbQBlAD0AIgBHAHIAaQBkACAAVABhAGIAbABlACAANAAgAEEAYwBj AGUAbgB0ACAANgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAA TABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADAAIgAg AE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAA1ACAARABhAHIAawAgAEEAYwBjAGUA bgB0ACAANgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABv AGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADEAIgANAAoA IAAgACAATgBhAG0AZQA9ACIARwByAGkAZAAgAFQAYQBiAGwAZQAgADYAIABDAG8AbABvAHIAZgB1 AGwAIABBAGMAYwBlAG4AdAAgADYAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAA dABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9 ACIANQAyACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEcAcgBpAGQAIABUAGEAYgBsAGUAIAA3ACAA QwBvAGwAbwByAGYAdQBsACAAQQBjAGMAZQBuAHQAIAA2ACIALwA+AA0ACgAgACAAPAB3ADoATABz AGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIA aQBvAHIAaQB0AHkAPQAiADQANgAiACAATgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBiAGwAZQAg ADEAIABMAGkAZwBoAHQAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8A bgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAA3 ACIAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABlACAAMgAiAC8APgANAAoAIAAgADwA dwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAi ACAAUAByAGkAbwByAGkAdAB5AD0AIgA0ADgAIgAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEA YgBsAGUAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABM AG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQAOQAiACAA TgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBiAGwAZQAgADQAIgAvAD4ADQAKACAAIAA8AHcAOgBM AHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAA cgBpAG8AcgBpAHQAeQA9ACIANQAwACIAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABl ACAANQAgAEQAYQByAGsAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8A bgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANQAx ACIAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABlACAANgAgAEMAbwBsAG8AcgBmAHUA bAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADIAIgAgAE4AYQBtAGUA PQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYAdQBsACIALwA+AA0ACgAg ACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwA cwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQANgAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBM AGkAcwB0ACAAVABhAGIAbABlACAAMQAgAEwAaQBnAGgAdAAgAEEAYwBjAGUAbgB0ACAAMQAiAC8A PgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAi AGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA0ADcAIgAgAE4AYQBtAGUAPQAiAEwA aQBzAHQAIABUAGEAYgBsAGUAIAAyACAAQQBjAGMAZQBuAHQAIAAxACIALwA+AA0ACgAgACAAPAB3 ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIA IABQAHIAaQBvAHIAaQB0AHkAPQAiADQAOAAiACAATgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBi AGwAZQAgADMAIABBAGMAYwBlAG4AdAAgADEAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgA YwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBp AHQAeQA9ACIANAA5ACIAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABlACAANAAgAEEA YwBjAGUAbgB0ACAAMQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBu ACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADAA IgAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAA1ACAARABhAHIAawAgAEEAYwBj AGUAbgB0ACAAMQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAA TABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADEAIgAN AAoAIAAgACAATgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBiAGwAZQAgADYAIABDAG8AbABvAHIA ZgB1AGwAIABBAGMAYwBlAG4AdAAgADEAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBl AHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQA eQA9ACIANQAyACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAA3 ACAAQwBvAGwAbwByAGYAdQBsACAAQQBjAGMAZQBuAHQAIAAxACIALwA+AA0ACgAgACAAPAB3ADoA TABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQ AHIAaQBvAHIAaQB0AHkAPQAiADQANgAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAA VABhAGIAbABlACAAMQAgAEwAaQBnAGgAdAAgAEEAYwBjAGUAbgB0ACAAMgAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA0ADcAIgAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABU AGEAYgBsAGUAIAAyACAAQQBjAGMAZQBuAHQAIAAyACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQA RQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBv AHIAaQB0AHkAPQAiADQAOAAiACAATgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBiAGwAZQAgADMA IABBAGMAYwBlAG4AdAAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABp AG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIA NAA5ACIAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABlACAANAAgAEEAYwBjAGUAbgB0 ACAAMgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMA awBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADAAIgAgAE4AYQBt AGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAA1ACAARABhAHIAawAgAEEAYwBjAGUAbgB0ACAA MgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBl AGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADEAIgANAAoAIAAgACAA TgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBiAGwAZQAgADYAIABDAG8AbABvAHIAZgB1AGwAIABB AGMAYwBlAG4AdAAgADIAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8A bgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANQAy ACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAA3ACAAQwBvAGwA bwByAGYAdQBsACAAQQBjAGMAZQBuAHQAIAAyACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4 AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIA aQB0AHkAPQAiADQANgAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABl ACAAMQAgAEwAaQBnAGgAdAAgAEEAYwBjAGUAbgB0ACAAMwAiAC8APgANAAoAIAAgADwAdwA6AEwA cwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUABy AGkAbwByAGkAdAB5AD0AIgA0ADcAIgAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUA IAAyACAAQQBjAGMAZQBuAHQAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBw AHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkA PQAiADQAOAAiACAATgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBiAGwAZQAgADMAIABBAGMAYwBl AG4AdAAgADMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwA bwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAA5ACIAIABO AGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABlACAANAAgAEEAYwBjAGUAbgB0ACAAMwAiAC8A PgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAi AGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADAAIgAgAE4AYQBtAGUAPQAiAEwA aQBzAHQAIABUAGEAYgBsAGUAIAA1ACAARABhAHIAawAgAEEAYwBjAGUAbgB0ACAAMwAiAC8APgAN AAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYA YQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADEAIgANAAoAIAAgACAATgBhAG0AZQA9 ACIATABpAHMAdAAgAFQAYQBiAGwAZQAgADYAIABDAG8AbABvAHIAZgB1AGwAIABBAGMAYwBlAG4A dAAgADMAIgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBj AGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANQAyACIADQAKACAA IAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYAdQBs ACAAQQBjAGMAZQBuAHQAIAAzACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQA aQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAi ADQANgAiAA0ACgAgACAAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABlACAAMQAgAEwA aQBnAGgAdAAgAEEAYwBjAGUAbgB0ACAANAAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABj AGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkA dAB5AD0AIgA0ADcAIgAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAAyACAAQQBj AGMAZQBuAHQAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4A IABMAG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQAOAAi ACAATgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBiAGwAZQAgADMAIABBAGMAYwBlAG4AdAAgADQA IgAvAD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBk AD0AIgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAA5ACIAIABOAGEAbQBlAD0A IgBMAGkAcwB0ACAAVABhAGIAbABlACAANAAgAEEAYwBjAGUAbgB0ACAANAAiAC8APgANAAoAIAAg ADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMA ZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADAAIgAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABU AGEAYgBsAGUAIAA1ACAARABhAHIAawAgAEEAYwBjAGUAbgB0ACAANAAiAC8APgANAAoAIAAgADwA dwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAi ACAAUAByAGkAbwByAGkAdAB5AD0AIgA1ADEAIgANAAoAIAAgACAATgBhAG0AZQA9ACIATABpAHMA dAAgAFQAYQBiAGwAZQAgADYAIABDAG8AbABvAHIAZgB1AGwAIABBAGMAYwBlAG4AdAAgADQAIgAv AD4ADQAKACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0A IgBmAGEAbABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANQAyACIADQAKACAAIAAgAE4AYQBt AGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYAdQBsACAAQQBjAGMA ZQBuAHQAIAA0ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABM AG8AYwBrAGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQANgAiAA0A CgAgACAAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABlACAAMQAgAEwAaQBnAGgAdAAg AEEAYwBjAGUAbgB0ACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkA bwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA0 ADcAIgAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAAyACAAQQBjAGMAZQBuAHQA IAA1ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBr AGUAZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQAOAAiACAATgBhAG0A ZQA9ACIATABpAHMAdAAgAFQAYQBiAGwAZQAgADMAIABBAGMAYwBlAG4AdAAgADUAIgAvAD4ADQAK ACAAIAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEA bABzAGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANAA5ACIAIABOAGEAbQBlAD0AIgBMAGkAcwB0 ACAAVABhAGIAbABlACAANAAgAEEAYwBjAGUAbgB0ACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwA cwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUABy AGkAbwByAGkAdAB5AD0AIgA1ADAAIgAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUA IAA1ACAARABhAHIAawAgAEEAYwBjAGUAbgB0ACAANQAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBk AEUAeABjAGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkA bwByAGkAdAB5AD0AIgA1ADEAIgANAAoAIAAgACAATgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBi AGwAZQAgADYAIABDAG8AbABvAHIAZgB1AGwAIABBAGMAYwBlAG4AdAAgADUAIgAvAD4ADQAKACAA IAA8AHcAOgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABz AGUAIgAgAFAAcgBpAG8AcgBpAHQAeQA9ACIANQAyACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwA aQBzAHQAIABUAGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYAdQBsACAAQQBjAGMAZQBuAHQAIAA1 ACIALwA+AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUA ZAA9ACIAZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQANgAiAA0ACgAgACAAIABO AGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIAbABlACAAMQAgAEwAaQBnAGgAdAAgAEEAYwBjAGUA bgB0ACAANgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABv AGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5AD0AIgA0ADcAIgAgAE4A YQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAAyACAAQQBjAGMAZQBuAHQAIAA2ACIALwA+ AA0ACgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIA ZgBhAGwAcwBlACIAIABQAHIAaQBvAHIAaQB0AHkAPQAiADQAOAAiACAATgBhAG0AZQA9ACIATABp AHMAdAAgAFQAYQBiAGwAZQAgADMAIABBAGMAYwBlAG4AdAAgADYAIgAvAD4ADQAKACAAIAA8AHcA OgBMAHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAg AFAAcgBpAG8AcgBpAHQAeQA9ACIANAA5ACIAIABOAGEAbQBlAD0AIgBMAGkAcwB0ACAAVABhAGIA bABlACAANAAgAEEAYwBjAGUAbgB0ACAANgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABj AGUAcAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkA dAB5AD0AIgA1ADAAIgAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABUAGEAYgBsAGUAIAA1ACAARABh AHIAawAgAEEAYwBjAGUAbgB0ACAANgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUA cAB0AGkAbwBuACAATABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUAByAGkAbwByAGkAdAB5 AD0AIgA1ADEAIgANAAoAIAAgACAATgBhAG0AZQA9ACIATABpAHMAdAAgAFQAYQBiAGwAZQAgADYA IABDAG8AbABvAHIAZgB1AGwAIABBAGMAYwBlAG4AdAAgADYAIgAvAD4ADQAKACAAIAA8AHcAOgBM AHMAZABFAHgAYwBlAHAAdABpAG8AbgAgAEwAbwBjAGsAZQBkAD0AIgBmAGEAbABzAGUAIgAgAFAA cgBpAG8AcgBpAHQAeQA9ACIANQAyACIADQAKACAAIAAgAE4AYQBtAGUAPQAiAEwAaQBzAHQAIABU AGEAYgBsAGUAIAA3ACAAQwBvAGwAbwByAGYAdQBsACAAQQBjAGMAZQBuAHQAIAA2ACIALwA+AA0A CgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBh AGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQA ZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIATQBl AG4AdABpAG8AbgAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAA TABvAGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdABy AHUAZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAA IAAgAE4AYQBtAGUAPQAiAFMAbQBhAHIAdAAgAEgAeQBwAGUAcgBsAGkAbgBrACIALwA+AA0ACgAg ACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBhAGwA cwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQAZQBX AGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIASABhAHMA aAB0AGEAZwAiAC8APgANAAoAIAAgADwAdwA6AEwAcwBkAEUAeABjAGUAcAB0AGkAbwBuACAATABv AGMAawBlAGQAPQAiAGYAYQBsAHMAZQAiACAAUwBlAG0AaQBIAGkAZABkAGUAbgA9ACIAdAByAHUA ZQAiACAAVQBuAGgAaQBkAGUAVwBoAGUAbgBVAHMAZQBkAD0AIgB0AHIAdQBlACIADQAKACAAIAAg AE4AYQBtAGUAPQAiAFUAbgByAGUAcwBvAGwAdgBlAGQAIABNAGUAbgB0AGkAbwBuACIALwA+AA0A CgAgACAAPAB3ADoATABzAGQARQB4AGMAZQBwAHQAaQBvAG4AIABMAG8AYwBrAGUAZAA9ACIAZgBh AGwAcwBlACIAIABTAGUAbQBpAEgAaQBkAGQAZQBuAD0AIgB0AHIAdQBlACIAIABVAG4AaABpAGQA ZQBXAGgAZQBuAFUAcwBlAGQAPQAiAHQAcgB1AGUAIgANAAoAIAAgACAATgBhAG0AZQA9ACIAUwBt AGEAcgB0ACAATABpAG4AawAiAC8APgANAAoAIAA8AC8AdwA6AEwAYQB0AGUAbgB0AFMAdAB5AGwA ZQBzAD4ADQAKADwALwB4AG0AbAA+ADwAIQBbAGUAbgBkAGkAZgBdAC0ALQA+AA0ACgA8AHMAdAB5 AGwAZQA+AA0ACgA8ACEALQAtAA0ACgAgAC8AKgAgAEYAbwBuAHQAIABEAGUAZgBpAG4AaQB0AGkA bwBuAHMAIAAqAC8ADQAKACAAQABmAG8AbgB0AC0AZgBhAGMAZQANAAoACQB7AGYAbwBuAHQALQBm AGEAbQBpAGwAeQA6ACIAQwBhAG0AYgByAGkAYQAgAE0AYQB0AGgAIgA7AA0ACgAJAHAAYQBuAG8A cwBlAC0AMQA6ADIAIAA0ACAANQAgADMAIAA1ACAANAAgADYAIAAzACAAMgAgADQAOwANAAoACQBt AHMAbwAtAGYAbwBuAHQALQBjAGgAYQByAHMAZQB0ADoAMAA7AA0ACgAJAG0AcwBvAC0AZwBlAG4A ZQByAGkAYwAtAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6AHIAbwBtAGEAbgA7AA0ACgAJAG0AcwBv AC0AZgBvAG4AdAAtAHAAaQB0AGMAaAA6AHYAYQByAGkAYQBiAGwAZQA7AA0ACgAJAG0AcwBvAC0A ZgBvAG4AdAAtAHMAaQBnAG4AYQB0AHUAcgBlADoALQA1ADMANgA4ADYAOQAxADIAMQAgADEAMQAw ADcAMwAwADUANwAyADcAIAAzADMANQA1ADQANAAzADIAIAAwACAANAAxADUAIAAwADsAfQANAAoA QABmAG8AbgB0AC0AZgBhAGMAZQANAAoACQB7AGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6AEMAYQBs AGkAYgByAGkAOwANAAoACQBwAGEAbgBvAHMAZQAtADEAOgAyACAAMQA1ACAANQAgADIAIAAyACAA MgAgADQAIAAzACAAMgAgADQAOwANAAoACQBtAHMAbwAtAGYAbwBuAHQALQBjAGgAYQByAHMAZQB0 ADoAMAA7AA0ACgAJAG0AcwBvAC0AZwBlAG4AZQByAGkAYwAtAGYAbwBuAHQALQBmAGEAbQBpAGwA eQA6AHMAdwBpAHMAcwA7AA0ACgAJAG0AcwBvAC0AZgBvAG4AdAAtAHAAaQB0AGMAaAA6AHYAYQBy AGkAYQBiAGwAZQA7AA0ACgAJAG0AcwBvAC0AZgBvAG4AdAAtAHMAaQBnAG4AYQB0AHUAcgBlADoA LQA0ADYAOQA3ADUAMAAwADEANwAgAC0AMQAwADQAMAAxADcAOAAwADUAMwAgADkAIAAwACAANQAx ADEAIAAwADsAfQANAAoAQABmAG8AbgB0AC0AZgBhAGMAZQANAAoACQB7AGYAbwBuAHQALQBmAGEA bQBpAGwAeQA6AEEAcAB0AG8AcwA7AA0ACgAJAG0AcwBvAC0AZgBvAG4AdAAtAGMAaABhAHIAcwBl AHQAOgAwADsADQAKAAkAbQBzAG8ALQBnAGUAbgBlAHIAaQBjAC0AZgBvAG4AdAAtAGYAYQBtAGkA bAB5ADoAcwB3AGkAcwBzADsADQAKAAkAbQBzAG8ALQBmAG8AbgB0AC0AcABpAHQAYwBoADoAdgBh AHIAaQBhAGIAbABlADsADQAKAAkAbQBzAG8ALQBmAG8AbgB0AC0AcwBpAGcAbgBhAHQAdQByAGUA OgA1ADMANgA4ADcAMQA1ADUAOQAgADMAIAAwACAAMAAgADQAMQA1ACAAMAA7AH0ADQAKAEAAZgBv AG4AdAAtAGYAYQBjAGUADQAKAAkAewBmAG8AbgB0AC0AZgBhAG0AaQBsAHkAOgAiAFMAZQBnAG8A ZQAgAFUASQAgAEUAbQBvAGoAaQAiADsADQAKAAkAcABhAG4AbwBzAGUALQAxADoAMgAgADEAMQAg ADUAIAAyACAANAAgADIAIAA0ACAAMgAgADIAIAAzADsADQAKAAkAbQBzAG8ALQBmAG8AbgB0AC0A YwBoAGEAcgBzAGUAdAA6ADAAOwANAAoACQBtAHMAbwAtAGcAZQBuAGUAcgBpAGMALQBmAG8AbgB0 AC0AZgBhAG0AaQBsAHkAOgBzAHcAaQBzAHMAOwANAAoACQBtAHMAbwAtAGYAbwBuAHQALQBwAGkA dABjAGgAOgB2AGEAcgBpAGEAYgBsAGUAOwANAAoACQBtAHMAbwAtAGYAbwBuAHQALQBzAGkAZwBu AGEAdAB1AHIAZQA6ADMAIAAzADMANQA1ADQANAAzADIAIAAxADMANAAyADEANwA3ADIAOAAgADAA IAAxACAAMAA7AH0ADQAKACAALwAqACAAUwB0AHkAbABlACAARABlAGYAaQBuAGkAdABpAG8AbgBz ACAAKgAvAA0ACgAgAHAALgBNAHMAbwBOAG8AcgBtAGEAbAAsACAAbABpAC4ATQBzAG8ATgBvAHIA bQBhAGwALAAgAGQAaQB2AC4ATQBzAG8ATgBvAHIAbQBhAGwADQAKAAkAewBtAHMAbwAtAHMAdAB5 AGwAZQAtAHUAbgBoAGkAZABlADoAbgBvADsADQAKAAkAbQBzAG8ALQBzAHQAeQBsAGUALQBxAGYA bwByAG0AYQB0ADoAeQBlAHMAOwANAAoACQBtAHMAbwAtAHMAdAB5AGwAZQAtAHAAYQByAGUAbgB0 ADoAIgAiADsADQAKAAkAbQBhAHIAZwBpAG4AOgAwAGkAbgA7AA0ACgAJAG0AcwBvAC0AcABhAGcA aQBuAGEAdABpAG8AbgA6AHcAaQBkAG8AdwAtAG8AcgBwAGgAYQBuADsADQAKAAkAZgBvAG4AdAAt AHMAaQB6AGUAOgAxADIALgAwAHAAdAA7AA0ACgAJAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6ACIA QQBwAHQAbwBzACIALABzAGEAbgBzAC0AcwBlAHIAaQBmADsADQAKAAkAbQBzAG8ALQBmAGEAcgBl AGEAcwB0AC0AZgBvAG4AdAAtAGYAYQBtAGkAbAB5ADoAQQBwAHQAbwBzADsADQAKAAkAbQBzAG8A LQBmAGEAcgBlAGEAcwB0AC0AdABoAGUAbQBlAC0AZgBvAG4AdAA6AG0AaQBuAG8AcgAtAGwAYQB0 AGkAbgA7AA0ACgAJAG0AcwBvAC0AYgBpAGQAaQAtAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6AEEA cAB0AG8AcwA7AH0ADQAKAHAADQAKAAkAewBtAHMAbwAtAHMAdAB5AGwAZQAtAG4AbwBzAGgAbwB3 ADoAeQBlAHMAOwANAAoACQBtAHMAbwAtAHMAdAB5AGwAZQAtAHAAcgBpAG8AcgBpAHQAeQA6ADkA OQA7AA0ACgAJAG0AcwBvAC0AbQBhAHIAZwBpAG4ALQB0AG8AcAAtAGEAbAB0ADoAYQB1AHQAbwA7 AA0ACgAJAG0AYQByAGcAaQBuAC0AcgBpAGcAaAB0ADoAMABpAG4AOwANAAoACQBtAHMAbwAtAG0A YQByAGcAaQBuAC0AYgBvAHQAdABvAG0ALQBhAGwAdAA6AGEAdQB0AG8AOwANAAoACQBtAGEAcgBn AGkAbgAtAGwAZQBmAHQAOgAwAGkAbgA7AA0ACgAJAG0AcwBvAC0AcABhAGcAaQBuAGEAdABpAG8A bgA6AHcAaQBkAG8AdwAtAG8AcgBwAGgAYQBuADsADQAKAAkAZgBvAG4AdAAtAHMAaQB6AGUAOgAx ADIALgAwAHAAdAA7AA0ACgAJAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6ACIAQQBwAHQAbwBzACIA LABzAGEAbgBzAC0AcwBlAHIAaQBmADsADQAKAAkAbQBzAG8ALQBmAGEAcgBlAGEAcwB0AC0AZgBv AG4AdAAtAGYAYQBtAGkAbAB5ADoAQQBwAHQAbwBzADsADQAKAAkAbQBzAG8ALQBmAGEAcgBlAGEA cwB0AC0AdABoAGUAbQBlAC0AZgBvAG4AdAA6AG0AaQBuAG8AcgAtAGwAYQB0AGkAbgA7AA0ACgAJ AG0AcwBvAC0AYgBpAGQAaQAtAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6AEEAcAB0AG8AcwA7AH0A DQAKAHAALgBtAHMAbwBuAG8AcgBtAGEAbAAwACwAIABsAGkALgBtAHMAbwBuAG8AcgBtAGEAbAAw ACwAIABkAGkAdgAuAG0AcwBvAG4AbwByAG0AYQBsADAADQAKAAkAewBtAHMAbwAtAHMAdAB5AGwA ZQAtAG4AYQBtAGUAOgBtAHMAbwBuAG8AcgBtAGEAbAA7AA0ACgAJAG0AcwBvAC0AcwB0AHkAbABl AC0AdQBuAGgAaQBkAGUAOgBuAG8AOwANAAoACQBtAHMAbwAtAG0AYQByAGcAaQBuAC0AdABvAHAA LQBhAGwAdAA6AGEAdQB0AG8AOwANAAoACQBtAGEAcgBnAGkAbgAtAHIAaQBnAGgAdAA6ADAAaQBu ADsADQAKAAkAbQBzAG8ALQBtAGEAcgBnAGkAbgAtAGIAbwB0AHQAbwBtAC0AYQBsAHQAOgBhAHUA dABvADsADQAKAAkAbQBhAHIAZwBpAG4ALQBsAGUAZgB0ADoAMABpAG4AOwANAAoACQBtAHMAbwAt AHAAYQBnAGkAbgBhAHQAaQBvAG4AOgB3AGkAZABvAHcALQBvAHIAcABoAGEAbgA7AA0ACgAJAGYA bwBuAHQALQBzAGkAegBlADoAMQAyAC4AMABwAHQAOwANAAoACQBmAG8AbgB0AC0AZgBhAG0AaQBs AHkAOgAiAEEAcAB0AG8AcwAiACwAcwBhAG4AcwAtAHMAZQByAGkAZgA7AA0ACgAJAG0AcwBvAC0A ZgBhAHIAZQBhAHMAdAAtAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6AEEAcAB0AG8AcwA7AA0ACgAJ AG0AcwBvAC0AZgBhAHIAZQBhAHMAdAAtAHQAaABlAG0AZQAtAGYAbwBuAHQAOgBtAGkAbgBvAHIA LQBsAGEAdABpAG4AOwANAAoACQBtAHMAbwAtAGIAaQBkAGkALQBmAG8AbgB0AC0AZgBhAG0AaQBs AHkAOgBBAHAAdABvAHMAOwB9AA0ACgAuAE0AcwBvAEMAaABwAEQAZQBmAGEAdQBsAHQADQAKAAkA ewBtAHMAbwAtAHMAdAB5AGwAZQAtAHQAeQBwAGUAOgBlAHgAcABvAHIAdAAtAG8AbgBsAHkAOwAN AAoACQBtAHMAbwAtAGQAZQBmAGEAdQBsAHQALQBwAHIAbwBwAHMAOgB5AGUAcwA7AA0ACgAJAG0A cwBvAC0AYQBzAGMAaQBpAC0AZgBvAG4AdAAtAGYAYQBtAGkAbAB5ADoAQQBwAHQAbwBzADsADQAK AAkAbQBzAG8ALQBhAHMAYwBpAGkALQB0AGgAZQBtAGUALQBmAG8AbgB0ADoAbQBpAG4AbwByAC0A bABhAHQAaQBuADsADQAKAAkAbQBzAG8ALQBmAGEAcgBlAGEAcwB0AC0AZgBvAG4AdAAtAGYAYQBt AGkAbAB5ADoAQQBwAHQAbwBzADsADQAKAAkAbQBzAG8ALQBmAGEAcgBlAGEAcwB0AC0AdABoAGUA bQBlAC0AZgBvAG4AdAA6AG0AaQBuAG8AcgAtAGwAYQB0AGkAbgA7AA0ACgAJAG0AcwBvAC0AaABh AG4AcwBpAC0AZgBvAG4AdAAtAGYAYQBtAGkAbAB5ADoAQQBwAHQAbwBzADsADQAKAAkAbQBzAG8A LQBoAGEAbgBzAGkALQB0AGgAZQBtAGUALQBmAG8AbgB0ADoAbQBpAG4AbwByAC0AbABhAHQAaQBu ADsADQAKAAkAbQBzAG8ALQBiAGkAZABpAC0AZgBvAG4AdAAtAGYAYQBtAGkAbAB5ADoAIgBUAGkA bQBlAHMAIABOAGUAdwAgAFIAbwBtAGEAbgAiADsADQAKAAkAbQBzAG8ALQBiAGkAZABpAC0AdABo AGUAbQBlAC0AZgBvAG4AdAA6AG0AaQBuAG8AcgAtAGIAaQBkAGkAOwB9AA0ACgBAAHAAYQBnAGUA IABXAG8AcgBkAFMAZQBjAHQAaQBvAG4AMQANAAoACQB7AHMAaQB6AGUAOgA4AC4ANQBpAG4AIAAx ADEALgAwAGkAbgA7AA0ACgAJAG0AYQByAGcAaQBuADoAMQAuADAAaQBuACAAMQAuADAAaQBuACAA MQAuADAAaQBuACAAMQAuADAAaQBuADsADQAKAAkAbQBzAG8ALQBoAGUAYQBkAGUAcgAtAG0AYQBy AGcAaQBuADoALgA1AGkAbgA7AA0ACgAJAG0AcwBvAC0AZgBvAG8AdABlAHIALQBtAGEAcgBnAGkA bgA6AC4ANQBpAG4AOwANAAoACQBtAHMAbwAtAHAAYQBwAGUAcgAtAHMAbwB1AHIAYwBlADoAMAA7 AH0ADQAKAGQAaQB2AC4AVwBvAHIAZABTAGUAYwB0AGkAbwBuADEADQAKAAkAewBwAGEAZwBlADoA VwBvAHIAZABTAGUAYwB0AGkAbwBuADEAOwB9AA0ACgAtAC0APgANAAoAPAAvAHMAdAB5AGwAZQA+ AA0ACgA8ACEALQAtAFsAaQBmACAAZwB0AGUAIABtAHMAbwAgADEAMABdAD4ADQAKADwAcwB0AHkA bABlAD4ADQAKACAALwAqACAAUwB0AHkAbABlACAARABlAGYAaQBuAGkAdABpAG8AbgBzACAAKgAv AA0ACgAgAHQAYQBiAGwAZQAuAE0AcwBvAE4AbwByAG0AYQBsAFQAYQBiAGwAZQANAAoACQB7AG0A cwBvAC0AcwB0AHkAbABlAC0AbgBhAG0AZQA6ACIAVABhAGIAbABlACAATgBvAHIAbQBhAGwAIgA7 AA0ACgAJAG0AcwBvAC0AdABzAHQAeQBsAGUALQByAG8AdwBiAGEAbgBkAC0AcwBpAHoAZQA6ADAA OwANAAoACQBtAHMAbwAtAHQAcwB0AHkAbABlAC0AYwBvAGwAYgBhAG4AZAAtAHMAaQB6AGUAOgAw ADsADQAKAAkAbQBzAG8ALQBzAHQAeQBsAGUALQBuAG8AcwBoAG8AdwA6AHkAZQBzADsADQAKAAkA bQBzAG8ALQBzAHQAeQBsAGUALQBwAHIAaQBvAHIAaQB0AHkAOgA5ADkAOwANAAoACQBtAHMAbwAt AHMAdAB5AGwAZQAtAHAAYQByAGUAbgB0ADoAIgAiADsADQAKAAkAbQBzAG8ALQBwAGEAZABkAGkA bgBnAC0AYQBsAHQAOgAwAGkAbgAgADUALgA0AHAAdAAgADAAaQBuACAANQAuADQAcAB0ADsADQAK AAkAbQBzAG8ALQBwAGEAcgBhAC0AbQBhAHIAZwBpAG4AOgAwAGkAbgA7AA0ACgAJAG0AcwBvAC0A cABhAGcAaQBuAGEAdABpAG8AbgA6AHcAaQBkAG8AdwAtAG8AcgBwAGgAYQBuADsADQAKAAkAZgBv AG4AdAAtAHMAaQB6AGUAOgAxADIALgAwAHAAdAA7AA0ACgAJAGYAbwBuAHQALQBmAGEAbQBpAGwA eQA6ACIAQQBwAHQAbwBzACIALABzAGEAbgBzAC0AcwBlAHIAaQBmADsADQAKAAkAbQBzAG8ALQBh AHMAYwBpAGkALQBmAG8AbgB0AC0AZgBhAG0AaQBsAHkAOgBBAHAAdABvAHMAOwANAAoACQBtAHMA bwAtAGEAcwBjAGkAaQAtAHQAaABlAG0AZQAtAGYAbwBuAHQAOgBtAGkAbgBvAHIALQBsAGEAdABp AG4AOwANAAoACQBtAHMAbwAtAGgAYQBuAHMAaQAtAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6AEEA cAB0AG8AcwA7AA0ACgAJAG0AcwBvAC0AaABhAG4AcwBpAC0AdABoAGUAbQBlAC0AZgBvAG4AdAA6 AG0AaQBuAG8AcgAtAGwAYQB0AGkAbgA7AA0ACgAJAG0AcwBvAC0AZgBvAG4AdAAtAGsAZQByAG4A aQBuAGcAOgAxAC4AMABwAHQAOwANAAoACQBtAHMAbwAtAGwAaQBnAGEAdAB1AHIAZQBzADoAcwB0 AGEAbgBkAGEAcgBkAGMAbwBuAHQAZQB4AHQAdQBhAGwAOwB9AA0ACgA8AC8AcwB0AHkAbABlAD4A DQAKADwAIQBbAGUAbgBkAGkAZgBdAC0ALQA+AA0ACgA8AC8AaABlAGEAZAA+AA0ACgANAAoAPABi AG8AZAB5ACAAbABhAG4AZwA9AEUATgAtAFUAUwAgAHMAdAB5AGwAZQA9ACcAdABhAGIALQBpAG4A dABlAHIAdgBhAGwAOgAuADUAaQBuADsAdwBvAHIAZAAtAHcAcgBhAHAAOgBiAHIAZQBhAGsALQB3 AG8AcgBkACcAPgANAAoADQAKADwAZABpAHYAIABjAGwAYQBzAHMAPQBXAG8AcgBkAFMAZQBjAHQA aQBvAG4AMQA+AA0ACgANAAoAPABwACAAYwBsAGEAcwBzAD0ATQBzAG8ATgBvAHIAbQBhAGwAIABz AHQAeQBsAGUAPQAnAG0AYQByAGcAaQBuAC0AbABlAGYAdAA6ADIAMgA1AC4AMABwAHQAOwB0AGUA eAB0AC0AaQBuAGQAZQBuAHQAOgAtADIAMgA1AC4AMABwAHQAOwB0AGEAYgAtAHMAdABvAHAAcwA6 AA0ACgAyADIANQAuADAAcAB0ADsAbQBzAG8ALQBsAGEAeQBvAHUAdAAtAGcAcgBpAGQALQBhAGwA aQBnAG4AOgBuAG8AbgBlADsAdABlAHgAdAAtAGEAdQB0AG8AcwBwAGEAYwBlADoAbgBvAG4AZQAn AD4APABiAD4APABzAHAAYQBuAA0ACgBzAHQAeQBsAGUAPQAnAGYAbwBuAHQALQBzAGkAegBlADoA MQAxAC4AMABwAHQAOwBmAG8AbgB0AC0AZgBhAG0AaQBsAHkAOgAiAEMAYQBsAGkAYgByAGkAIgAs AHMAYQBuAHMALQBzAGUAcgBpAGYAOwBjAG8AbABvAHIAOgBiAGwAYQBjAGsAJwA+AEYAcgBvAG0A OgA8AHMAcABhAG4ADQAKAHMAdAB5AGwAZQA9ACcAbQBzAG8ALQB0AGEAYgAtAGMAbwB1AG4AdAA6 ADEAJwA+ACAAPAAvAHMAcABhAG4APgA8AC8AcwBwAGEAbgA+ADwALwBiAD4APABzAHAAYQBuACAA cwB0AHkAbABlAD0AJwBmAG8AbgB0AC0AcwBpAHoAZQA6ADEAMQAuADAAcAB0ADsADQAKAGYAbwBu AHQALQBmAGEAbQBpAGwAeQA6ACIAQwBhAGwAaQBiAHIAaQAiACwAcwBhAG4AcwAtAHMAZQByAGkA ZgA7AGMAbwBsAG8AcgA6AGIAbABhAGMAawAnAD4AQQBuAG8AbgB5AG0AbwB1AHMAZQBtAGEAaQBs AA0ACgAmAGwAdAA7AG4AbwByAGUAcABsAHkAQABhAG4AbwBuAHkAbQBvAHUAcwBlAG0AYQBpAGwA LgBlAHUAJgBnAHQAOwA8AG8AOgBwAD4APAAvAG8AOgBwAD4APAAvAHMAcABhAG4APgA8AC8AcAA+ AA0ACgANAAoAPABwACAAYwBsAGEAcwBzAD0ATQBzAG8ATgBvAHIAbQBhAGwAIABzAHQAeQBsAGUA PQAnAG0AYQByAGcAaQBuAC0AbABlAGYAdAA6ADIAMgA1AC4AMABwAHQAOwB0AGUAeAB0AC0AaQBu AGQAZQBuAHQAOgAtADIAMgA1AC4AMABwAHQAOwB0AGEAYgAtAHMAdABvAHAAcwA6AA0ACgAyADIA NQAuADAAcAB0ADsAbQBzAG8ALQBsAGEAeQBvAHUAdAAtAGcAcgBpAGQALQBhAGwAaQBnAG4AOgBu AG8AbgBlADsAdABlAHgAdAAtAGEAdQB0AG8AcwBwAGEAYwBlADoAbgBvAG4AZQAnAD4APABiAD4A PABzAHAAYQBuAA0ACgBzAHQAeQBsAGUAPQAnAGYAbwBuAHQALQBzAGkAegBlADoAMQAxAC4AMABw AHQAOwBmAG8AbgB0AC0AZgBhAG0AaQBsAHkAOgAiAEMAYQBsAGkAYgByAGkAIgAsAHMAYQBuAHMA LQBzAGUAcgBpAGYAOwBjAG8AbABvAHIAOgBiAGwAYQBjAGsAJwA+AFMAZQBuAHQAOgA8AHMAcABh AG4ADQAKAHMAdAB5AGwAZQA9ACcAbQBzAG8ALQB0AGEAYgAtAGMAbwB1AG4AdAA6ADEAJwA+ACAA PAAvAHMAcABhAG4APgA8AC8AcwBwAGEAbgA+ADwALwBiAD4APABzAHAAYQBuACAAcwB0AHkAbABl AD0AJwBmAG8AbgB0AC0AcwBpAHoAZQA6ADEAMQAuADAAcAB0ADsADQAKAGYAbwBuAHQALQBmAGEA bQBpAGwAeQA6ACIAQwBhAGwAaQBiAHIAaQAiACwAcwBhAG4AcwAtAHMAZQByAGkAZgA7AGMAbwBs AG8AcgA6AGIAbABhAGMAawAnAD4AUwBhAHQAdQByAGQAYQB5ACwAIABNAGEAeQAgADIANAAsACAA MgAwADIANQAgADQAOgAxADMAIABQAE0APABvADoAcAA+ADwALwBvADoAcAA+ADwALwBzAHAAYQBu AD4APAAvAHAAPgANAAoADQAKADwAcAAgAGMAbABhAHMAcwA9AE0AcwBvAE4AbwByAG0AYQBsACAA cwB0AHkAbABlAD0AJwBtAGEAcgBnAGkAbgAtAGwAZQBmAHQAOgAyADIANQAuADAAcAB0ADsAdABl AHgAdAAtAGkAbgBkAGUAbgB0ADoALQAyADIANQAuADAAcAB0ADsAdABhAGIALQBzAHQAbwBwAHMA OgANAAoAMgAyADUALgAwAHAAdAA7AG0AcwBvAC0AbABhAHkAbwB1AHQALQBnAHIAaQBkAC0AYQBs AGkAZwBuADoAbgBvAG4AZQA7AHQAZQB4AHQALQBhAHUAdABvAHMAcABhAGMAZQA6AG4AbwBuAGUA JwA+ADwAYgA+ADwAcwBwAGEAbgANAAoAcwB0AHkAbABlAD0AJwBmAG8AbgB0AC0AcwBpAHoAZQA6 ADEAMQAuADAAcAB0ADsAZgBvAG4AdAAtAGYAYQBtAGkAbAB5ADoAIgBDAGEAbABpAGIAcgBpACIA LABzAGEAbgBzAC0AcwBlAHIAaQBmADsAYwBvAGwAbwByADoAYgBsAGEAYwBrACcAPgBUAG8AOgA8 AHMAcABhAG4ADQAKAHMAdAB5AGwAZQA9ACcAbQBzAG8ALQB0AGEAYgAtAGMAbwB1AG4AdAA6ADEA JwA+ACAAPAAvAHMAcABhAG4APgA8AC8AcwBwAGEAbgA+ADwALwBiAD4APABzAHAAYQBuACAAcwB0 AHkAbABlAD0AJwBmAG8AbgB0AC0AcwBpAHoAZQA6ADEAMQAuADAAcAB0ADsADQAKAGYAbwBuAHQA LQBmAGEAbQBpAGwAeQA6ACIAQwBhAGwAaQBiAHIAaQAiACwAcwBhAG4AcwAtAHMAZQByAGkAZgA7 AGMAbwBsAG8AcgA6AGIAbABhAGMAawAnAD4AcwBhAG0AcABsAGUAbABlAGYAYQB5AEAAZwBtAGEA aQBsAC4AYwBvAG0APABvADoAcAA+ADwALwBvADoAcAA+ADwALwBzAHAAYQBuAD4APAAvAHAAPgAN AAoADQAKADwAcAAgAGMAbABhAHMAcwA9AE0AcwBvAE4AbwByAG0AYQBsACAAcwB0AHkAbABlAD0A JwBtAGEAcgBnAGkAbgAtAGwAZQBmAHQAOgAyADIANQAuADAAcAB0ADsAdABlAHgAdAAtAGkAbgBk AGUAbgB0ADoALQAyADIANQAuADAAcAB0ADsAdABhAGIALQBzAHQAbwBwAHMAOgANAAoAMgAyADUA LgAwAHAAdAA7AG0AcwBvAC0AbABhAHkAbwB1AHQALQBnAHIAaQBkAC0AYQBsAGkAZwBuADoAbgBv AG4AZQA7AHQAZQB4AHQALQBhAHUAdABvAHMAcABhAGMAZQA6AG4AbwBuAGUAJwA+ADwAYgA+ADwA cwBwAGEAbgANAAoAcwB0AHkAbABlAD0AJwBmAG8AbgB0AC0AcwBpAHoAZQA6ADEAMQAuADAAcAB0 ADsAZgBvAG4AdAAtAGYAYQBtAGkAbAB5ADoAIgBDAGEAbABpAGIAcgBpACIALABzAGEAbgBzAC0A cwBlAHIAaQBmADsAYwBvAGwAbwByADoAYgBsAGEAYwBrACcAPgBTAHUAYgBqAGUAYwB0ADoAPABz AHAAYQBuAA0ACgBzAHQAeQBsAGUAPQAnAG0AcwBvAC0AdABhAGIALQBjAG8AdQBuAHQAOgAxACcA PgAgADwALwBzAHAAYQBuAD4APAAvAHMAcABhAG4APgA8AC8AYgA+ADwAcwBwAGEAbgAgAHMAdAB5 AGwAZQA9ACcAZgBvAG4AdAAtAHMAaQB6AGUAOgAxADEALgAwAHAAdAA7AA0ACgBmAG8AbgB0AC0A ZgBhAG0AaQBsAHkAOgAiAEMAYQBsAGkAYgByAGkAIgAsAHMAYQBuAHMALQBzAGUAcgBpAGYAOwBj AG8AbABvAHIAOgBiAGwAYQBjAGsAJwA+AFcAZQBsAGMAbwBtAGUAIAB0AG8AIAB5AG8AdQByACAA cwBhAG0AcABsAGUAIABlAG0AYQBpAGwALgAgADwALwBzAHAAYQBuAD4APABzAHAAYQBuAA0ACgBz AHQAeQBsAGUAPQAnAGYAbwBuAHQALQBzAGkAegBlADoAMQAxAC4AMABwAHQAOwBmAG8AbgB0AC0A ZgBhAG0AaQBsAHkAOgAiAFMAZQBnAG8AZQAgAFUASQAgAEUAbQBvAGoAaQAiACwAcwBhAG4AcwAt AHMAZQByAGkAZgA7AG0AcwBvAC0AYgBpAGQAaQAtAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6AA0A CgAiAFMAZQBnAG8AZQAgAFUASQAgAEUAbQBvAGoAaQAiADsAYwBvAGwAbwByADoAYgBsAGEAYwBr ACcAPgAmACMAMQAyADgANQA3ADkAOwA8AC8AcwBwAGEAbgA+ADwAcwBwAGEAbgAgAHMAdAB5AGwA ZQA9ACcAZgBvAG4AdAAtAHMAaQB6AGUAOgAxADEALgAwAHAAdAA7AA0ACgBmAG8AbgB0AC0AZgBh AG0AaQBsAHkAOgAiAEMAYQBsAGkAYgByAGkAIgAsAHMAYQBuAHMALQBzAGUAcgBpAGYAOwBjAG8A bABvAHIAOgBiAGwAYQBjAGsAJwA+ADwAbwA6AHAAPgA8AC8AbwA6AHAAPgA8AC8AcwBwAGEAbgA+ ADwALwBwAD4ADQAKAA0ACgA8AHAAIABjAGwAYQBzAHMAPQBNAHMAbwBOAG8AcgBtAGEAbAA+ADwA bwA6AHAAPgAmAG4AYgBzAHAAOwA8AC8AbwA6AHAAPgA8AC8AcAA+AA0ACgANAAoAPABwAD4APABz AHAAYQBuACAAcwB0AHkAbABlAD0AJwBjAG8AbABvAHIAOgAjAEMAMAAzADkAMgBCACcAPgBQAG8A dwBlAHIAZQBkACAAYgB5ACAAPABzAHQAcgBvAG4AZwA+ADwAcwBwAGEAbgAgAHMAdAB5AGwAZQA9 ACcAZgBvAG4AdAAtAGYAYQBtAGkAbAB5ADoADQAKACIAQQBwAHQAbwBzACIALABzAGEAbgBzAC0A cwBlAHIAaQBmADsAbQBzAG8ALQBiAGkAZABpAC0AZgBvAG4AdAAtAGYAYQBtAGkAbAB5ADoAQQBw AHQAbwBzACcAPgBBAG4AbwBuAHkAbQBvAHUAcwBlAG0AYQBpAGwAPAAvAHMAcABhAG4APgA8AC8A cwB0AHIAbwBuAGcAPgA8AC8AcwBwAGEAbgA+ADwALwBwAD4ADQAKAA0ACgA8AHAAPgBZAG8AdQAn AHYAZQAgAGIAZQBlAG4AIABzAGUAbgB0ACAAYQAgAHMAYQBtAHAAbABlACAAZQBtAGEAaQBsACEA IAA8AHMAcABhAG4AIABzAHQAeQBsAGUAPQAnAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6ACIAUwBl AGcAbwBlACAAVQBJACAARQBtAG8AagBpACIALABzAGEAbgBzAC0AcwBlAHIAaQBmADsADQAKAG0A cwBvAC0AYgBpAGQAaQAtAGYAbwBuAHQALQBmAGEAbQBpAGwAeQA6ACIAUwBlAGcAbwBlACAAVQBJ ACAARQBtAG8AagBpACIAJwA+ACYAIwAxADIANwA4ADgAMQA7ADwALwBzAHAAYQBuAD4APAAvAHAA PgANAAoADQAKADwALwBkAGkAdgA+AA0ACgANAAoAPAAvAGIAbwBkAHkAPgANAAoADQAKADwALwBo AHQAbQBsAD4ADQAKAA== ------=_NextPart_01DBCCE0.CD23D5E0 Content-Location: file:///C:/790962C5/sample-anonymouse_files/themedata.thmx Content-Transfer-Encoding: base64 Content-Type: application/vnd.ms-officetheme UEsDBBQABgAIAAAAIQDp3g+//wAAABwCAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbKyRy07DMBBF 90j8g+UtSpyyQAgl6YLHjseifMDImSQWydiyp1X790zSVEKoIBZsLNkz954743K9Hwe1w5icp0qv 8kIrJOsbR12l3zdP2a1WiYEaGDxhpQ+Y9Lq+vCg3h4BJiZpSpXvmcGdMsj2OkHIfkKTS+jgCyzV2 JoD9gA7NdVHcGOuJkTjjyUPX5QO2sB1YPe7l+Zgk4pC0uj82TqxKQwiDs8CS1Oyo+UbJFkIuyrkn 9S6kK4mhzVnCVPkZsOheZTXRNajeIPILjBLDsAyJX89nIBkt5r87nons29ZZbLzdjrKOfDZezE7B /xRg9T/oE9PMf1t/AgAA//8DAFBLAwQUAAYACAAAACEApdan58AAAAA2AQAACwAAAF9yZWxzLy5y ZWxzhI/PasMwDIfvhb2D0X1R0sMYJXYvpZBDL6N9AOEof2giG9sb69tPxwYKuwiEpO/3qT3+rov5 4ZTnIBaaqgbD4kM/y2jhdj2/f4LJhaSnJQhbeHCGo3vbtV+8UNGjPM0xG6VItjCVEg+I2U+8Uq5C ZNHJENJKRds0YiR/p5FxX9cfmJ4Z4DZM0/UWUtc3YK6PqMn/s8MwzJ5PwX+vLOVFBG43lExp5GKh qC/jU72QqGWq1B7Qtbj51v0BAAD//wMAUEsDBBQABgAIAAAAIQBreZYWgwAAAIoAAAAcAAAAdGhl bWUvdGhlbWUvdGhlbWVNYW5hZ2VyLnhtbAzMTQrDIBBA4X2hd5DZN2O7KEVissuuu/YAQ5waQceg 0p/b1+XjgzfO3xTVm0sNWSycBw2KZc0uiLfwfCynG6jaSBzFLGzhxxXm6XgYybSNE99JyHNRfSPV kIWttd0g1rUr1SHvLN1euSRqPYtHV+jT9yniResrJgoCOP0BAAD//wMAUEsDBBQABgAIAAAAIQBb lZ6x9AcAABMiAAAWAAAAdGhlbWUvdGhlbWUvdGhlbWUxLnhtbOxaS48buRG+B8h/aPRdVnfrPbC8 0NOz9ow9sGQHe6QkSk0Puyk0qZkRFgsE3lMuAQJsgr0skFsOi0UWyAJZ5JIfY8BGsvkRKZKtFilR ngcMxAhm5tJNfVX8WFWsqmb3w8+uEupd4IwTlrb98EHgezidshlJF23/5XhYavoeFyidIcpS3PbX mPufPfr1rx6iIxHjBHsgn/Ij1PZjIZZH5TKfwjDiD9gSp/DbnGUJEnCbLcqzDF2C3oSWoyColxNE Ut9LUQJqn8/nZIq9sVTpP9ooH1C4TQWXA1OajaRqbEko7Ow8lAi+5j2aeReItn2YZ8Yux/hK+B5F XMAPbT9Qf3750cMyOsqFqDgga8gN1V8ulwvMziM1Z7aYFJMGg6hZDQv9CkDFPm7QlP+FPgVA0yms VHMxdYa1etCMcqwB0pcO3a1GWLHxhv7KHuewVe9GVUu/Amn91T18MGwN+jULr0AaX9vDd4Ko26pY eAXS+PoevjroNKKBhVegmJL0fB9dbzSb9RxdQOaMHjvhrXo9aPRz+BYF0VBEl5xizlJxKNYS9Jpl QwBIIEWCpJ5YL/EcTSGKO0vBuNcnfEnR2veWKGUchoMoDCH0qkFU/CuLoyOMDGnJC5jwvSHJx+PT jCxF238CWn0D8u7nn9+++entm7+//frrt2/+6p2QRSy0KkvuGKULU+6Xv/zhP9/91vv33/78yzd/ dOO5iX//w+/e/+OfH1IPW21rind/+vH9Tz+++/b3//r+G4f2ToYmJnxMEsy9Z/jSe8ESWKAyhc0f T7LbSYxjREyJTrrgKEVyFof+gYgt9LM1osiB62Lbjq8ySDUu4OPVa4vwKM5Wgjg0Po0TC3jKGO2y zGmFp3Iuw8zjVbpwT56tTNwLhC5cc/dQanl5sFpCjiUulb0YWzTPKEoFWuAUC0/+xs4xdqzuC0Is u56SacY4mwvvC+J1EXGaZEwmVjRthY5JAn5ZuwiCvy3bnL7yuoy6Vt3HFzYS9gaiDvJjTC0zPkYr gRKXyjFKqGnwEyRiF8nROpuauAEX4OkFpswbzDDnLpnnGazXcPpTBNnN6fZTuk5sZCbIuUvnCWLM RPbZeS9GydKFHZE0NrGf83MIUeSdMeGCnzJ7h8h78ANKD7r7FcGWu6/PBi8hy5mUtgEif1llDl8+ xsyK39GazhF2pZpOllgptpMRZ3R0VwsrtE8wpugSzTD2Xn7uYNBlS8vmW9JPYsgqx9gVWE+QHavy PsUceiXZ3OznyRPCrZAd4QU7wOd0vZN41ihNUHZI8zPwumnzwSSDzeig8JxOz03gMwI9IMSL0yjP Oegwgvug1rMYWQVM3nN3vK4zy3832WOwL19bNG6wL0EG31oGErsp80HbjBG1JtgGzBgR78SVbkHE cv9WRBZXJbZyys3tTbt1A3RHVtOTkPSaDuh/0/k4AvHj9DxuxVbCumW3cyihHO/0OIdwu51Nj2Uz 8uk3Nn20Ss8w1JL9rHXf19z3Nf7/fV9zaD/fdzOHeo77bsaHLuO+m8kPWD5ON7NtYKC3kYcM+rBH Hf0kB09+5oTSkVhTfMLV4Q+HZ5rZEAalnDr1xMVJ4DKGS1nmYAILt8iQkvEyJn5DRDyK0RJOiEJf KlnwXPWCe0vG4eBIDTt1SzxdJadspg881QlToCsrR2I7HtTg6EmPw2GV0Oh6Ix+U/NSpKvBVbBfq sHVDQMrehoQxmU2i4iDR2AxeQ0KenX0cFi0Hi6ZUv3HVnimAWuEVeOj24FG97deqkhCclPMpNOgz 6Sft6o13lTM/pqcPGdOKADhc1CuBo/nC0y3J9eDy5Op0qN3A0xYJ5RQdVjYJZRnV4PEYHoXz6JSj N6FxW1+3ti616ElTqPkgvrc0Gs0Psbirr0FuNzfQ1MwUNPUuYY9HsOl8b4qWbX8OJ8dwmSwheLh8 8EJ0Aa9fpiLTO/4uqWWZcdFHPNYWV1lH+ychAmceJUnbl+sv/EBTlUQ0uRZs3U+VXCQ33KdGDrxu exnP53gqTL8bI9LS+hZSvE4Wzl+V+N3BUpKtwN2jeHbpTegqe4EgxGqNUHp3Rji8QAi1q2cE3ogV mWwbfzuVKc/+5ispFUN6HNFljPKSYmZzDVcFpaCj7gobGHf5msGghknySjhZyAprGtUqp0Xt0hwO lt3rhaTljKy5LZpWWpFl053GrBk2dWDHlner8garjYkhqZklXufu3Zzb2iS7nUahKBNg8MJ+d6v9 BrXtZBY1yXg/D8uknY/axWOzwGuo3aRKGGm/vlG7Y7eiSDing8E7lX6Q241aGJpvGktlafXq3Hy7 zSavIXn0oc1dUf2+m6ZwJ6OSL88y5dsJm63zS8p1otE+l02pRNL0BZ57ZHbV9iNX56jfuYZ5N6DQ UkwWr0LQ2e3ZgjleiuoNWwjrAC+CSm9KW7iQUDND710Iq3NFF21xtaEse3XAKxNyvWowbW4puNq3 IpyRZwh625Hq7HTuBdpXIs8vcOWtMtL2vwxqnWovqvVKQbM2KFUr1aDUrHUqpU6tVgkHtTDod6Ov gJ6Ik7Cmv30Ywqsgus6/gFDje19BJJu3XQ+mLCkz9ZVDWXlffQURRoe/ggBHAq1oEFajTtQr9fph vVSN+vVSs1HplHpRvR91oGjXh52vfO9CgcNuvz8c1qJSvQe4atCplTrdSq9Ubw660TAcVPsBgPPy cwVPMWCzjS3gUvF69F8AAAD//wMAUEsDBBQABgAIAAAAIQAN0ZCftgAAABsBAAAnAAAAdGhlbWUv dGhlbWUvX3JlbHMvdGhlbWVNYW5hZ2VyLnhtbC5yZWxzhI9NCsIwFIT3gncIb2/TuhCRJt2I0K3U A4TkNQ02PyRR7O0NriwILodhvplpu5edyRNjMt4xaKoaCDrplXGawW247I5AUhZOidk7ZLBggo5v N+0VZ5FLKE0mJFIoLjGYcg4nSpOc0IpU+YCuOKOPVuQio6ZByLvQSPd1faDxmwF8xSS9YhB71QAZ llCa/7P9OBqJZy8fFl3+UUFz2YUFKKLGzOAjm6pMBMpburrE3wAAAP//AwBQSwECLQAUAAYACAAA ACEA6d4Pv/8AAAAcAgAAEwAAAAAAAAAAAAAAAAAAAAAAW0NvbnRlbnRfVHlwZXNdLnhtbFBLAQIt ABQABgAIAAAAIQCl1qfnwAAAADYBAAALAAAAAAAAAAAAAAAAADABAABfcmVscy8ucmVsc1BLAQIt ABQABgAIAAAAIQBreZYWgwAAAIoAAAAcAAAAAAAAAAAAAAAAABkCAAB0aGVtZS90aGVtZS90aGVt ZU1hbmFnZXIueG1sUEsBAi0AFAAGAAgAAAAhAFuVnrH0BwAAEyIAABYAAAAAAAAAAAAAAAAA1gIA AHRoZW1lL3RoZW1lL3RoZW1lMS54bWxQSwECLQAUAAYACAAAACEADdGQn7YAAAAbAQAAJwAAAAAA AAAAAAAAAAD+CgAAdGhlbWUvdGhlbWUvX3JlbHMvdGhlbWVNYW5hZ2VyLnhtbC5yZWxzUEsFBgAA AAAFAAUAXQEAAPkLAAAAAA== ------=_NextPart_01DBCCE0.CD23D5E0 Content-Location: file:///C:/790962C5/sample-anonymouse_files/colorschememapping.xml Content-Transfer-Encoding: quoted-printable Content-Type: text/xml ------=_NextPart_01DBCCE0.CD23D5E0 Content-Location: file:///C:/790962C5/sample-anonymouse_files/filelist.xml Content-Transfer-Encoding: quoted-printable Content-Type: text/xml; charset="utf-8" ------=_NextPart_01DBCCE0.CD23D5E0-- ================================================ FILE: tests_data/basic/pem/doc.pem ================================================ -----BEGIN PGP PUBLIC KEY BLOCK----- mFIEWhdc7RMIKoZIzj0DAQcCAwTCrR7Da5QHqFi/CtJJ6egFb48zR9bn48epqb92 kfLIN/sjBc6iqvjcXQM8pfhFZnf5Bhk0ZzwvuAHzCZSJgNgNtCFFbGllIEJ1cnN6 dGVpbiA8Y29udGFjdEBlbGllLm5ldD6IgAQTEwgAHAUCWhdc7QILCQIbAwQVCAkK BBYCAwECF4ACHgEAFgkQtc4ql0fc7HoLGlRSRVpPUi1HUEd3nQD/bqujXolVmt7n GmY/kIRWEro2oUp2rXL5sAbABMC/SrIA/ihJa5nfZz8wAe7IzD76cRHW0qGpUHSM ehJzdDXXsEhruFYEWhdc7RIIKoZIzj0DAQcCAwSF6kdXcDKXmK5UYjfoRV07yxQo xapjucsZcXytjdLqbPDJr+Sw7Rlz41XIM3QQzOksFdNzlNemBuXBUE/K2522AwEI B4htBBgTCAAJBQJaF1ztAhsMABYJELXOKpdH3Ox6CxpUUkVaT1ItR1BHl9QBAI55 7DxLdB2WMXemGZ0U07vqGt2jSzTtUdYhqk4DkXUeAP98LFF4syoKrxD2pcArpKzI OwBiyuQgLZqQr2mtIPFWCw== =qq2a -----END PGP PUBLIC KEY BLOCK----- ================================================ FILE: tests_data/basic/pem/doc.pub ================================================ -----BEGIN PGP PUBLIC KEY BLOCK----- mFIEWhdc7RMIKoZIzj0DAQcCAwTCrR7Da5QHqFi/CtJJ6egFb48zR9bn48epqb92 kfLIN/sjBc6iqvjcXQM8pfhFZnf5Bhk0ZzwvuAHzCZSJgNgNtCFFbGllIEJ1cnN6 dGVpbiA8Y29udGFjdEBlbGllLm5ldD6IgAQTEwgAHAUCWhdc7QILCQIbAwQVCAkK BBYCAwECF4ACHgEAFgkQtc4ql0fc7HoLGlRSRVpPUi1HUEd3nQD/bqujXolVmt7n GmY/kIRWEro2oUp2rXL5sAbABMC/SrIA/ihJa5nfZz8wAe7IzD76cRHW0qGpUHSM ehJzdDXXsEhruFYEWhdc7RIIKoZIzj0DAQcCAwSF6kdXcDKXmK5UYjfoRV07yxQo xapjucsZcXytjdLqbPDJr+Sw7Rlz41XIM3QQzOksFdNzlNemBuXBUE/K2522AwEI B4htBBgTCAAJBQJaF1ztAhsMABYJELXOKpdH3Ox6CxpUUkVaT1ItR1BHl9QBAI55 7DxLdB2WMXemGZ0U07vqGt2jSzTtUdYhqk4DkXUeAP98LFF4syoKrxD2pcArpKzI OwBiyuQgLZqQr2mtIPFWCw== =qq2a -----END PGP PUBLIC KEY BLOCK----- ================================================ FILE: tests_data/basic/python/code.py ================================================ def print_primes(max_n: int) -> None: for i in range(2, max_n + 1): if is_prime(i): print(i) def is_prime(n: int) -> bool: for i in range(2, n // 2 + 1): if n % i == 0: return False return True ================================================ FILE: tests_data/basic/rtf/doc.rtf ================================================ {\rtf1\ansi\ansicpg1252\uc0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deff0\adeff0{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f2\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}{\f3\fnil\fcharset0 Roboto;}}{ \colortbl;\red109\green158\blue235;\red0\green0\blue0;\red106\green168\blue79;\red230\green145\blue56;\red102\green102\blue102;}{\stylesheet{\s0\snext0\sqformat\spriority0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2 Normal;}{\s1\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb400\sa120\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs40\ltrch\b0\i0\fs40\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 heading 1;}{\s2\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb360\sa120\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs32\ltrch\b0\i0\fs32\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3 heading 2;}{\s3\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb320\sa80\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs28\ltrch\b0\i0\fs28\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf4 heading 3;}{\s4\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb280\sa80\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs24\ltrch\b0\i0\fs24\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf5 heading 4;}{\s5\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb240\sa80\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf5 heading 5;}{\s6\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb240\sa80\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai\af3\afs22\ltrch\b0\i\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf5 heading 6;}{\*\cs10\additive\ssemihidden\spriority0 Default Paragraph Font; }{\*\ts11\tsrowd\snext11\ssemihidden\spriority0\aspalpha\aspnum\adjustright\ltrpar\li0\lin0\ri0\rin0\ql\faauto\tsvertalt\tsbrdrl\tsbrdrr\tsbrdrt\tsbrdrb\tsbrdrdgr\tsbrdrdgl\tsbrdrh\tsbrdrv\trpaddl108\trpaddfl3\trwWidthB0\trftsWidthB3\trpaddt0\trpaddft3\trpaddb0 \trpaddfb3\trpaddr108\trpaddfr3 Normal Table;}{\s15\sbasedon0\snext15\styrsid15694742\sqformat\spriority0\keep\keepn\fi0\sb0\sa60\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\qc\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs52\ltrch\b0\i0\fs52\loch\af3 \dbch\af3\hich\f3\strike0\ulnone\cf2 Title;}{\s16\sbasedon0\snext16\styrsid15694742\sqformat\spriority0\keep\keepn\fi0\sb0\sa320\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\qc\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs30\ltrch\b0\i0\fs30 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf5 Subtitle;}}{\*\rsidtbl\rsid10976062\rsid13249109}{\*\generator Aspose.Words for Java 23.4.0;}{\info\version1\edmins0\nofpages1\nofwords0\nofchars0\nofcharsws0}\paperw12240\paperh15840\margl1440\margr1440\margt1440\margb1440\gutter0 {\mmathPr\mbrkBin0\mbrkBinSub0\mdefJc1\mdispDef1\minterSp0\mintLim0\mintraSp0\mlMargin0\mmathFont0\mnaryLim1\mpostSp0\mpreSp0\mrMargin0\msmallFrac0\mwrapIndent1440\mwrapRight0}\deflang1033\deflangfe2052\adeflang1025\jexpand\showxmlerrors1\validatexml1{ \*\wgrffmtfilter 013f}\viewkind1\viewscale100\fet0\ftnbj\aenddoc\ftnrstcont\aftnrstcont\ftnnar\aftnnrlc\widowctrl\nospaceforul\nolnhtadjtbl\alntblind\lyttblrtgr\dntblnsbdb\noxlattoyen\wrppunct\nobrkwrptbl\expshrtn\snaptogridincell\asianbrkrule\htmautsp\noultrlspc \useltbaln\splytwnine\ftnlytwnine\lytcalctblwd\allowfieldendsel\lnbrkrule\nouicompat\nofeaturethrottle1\utinl\formshade\nojkernpunct\dghspace180\dgvspace180\dghorigin1800\dgvorigin1440\dghshow1\dgvshow1\dgmargin\pgbrdrhead\pgbrdrfoot\rsidroot10976062\sectd\sectlinegrid360\pgwsxn12240\pghsxn15840\marglsxn1440\margrsxn1440\margtsxn1440\margbsxn1440\guttersxn0\headery720\footery720\colsx720\ltrsect\pard\plain\itap0\s1\keep\keepn\ilvl0\fi0\sb400\sa120\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr \brdrbtw\brdrbar\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs40\ltrch\b0\i0\fs40\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1{\*\bkmkstart h.z8khh864y3wu}{\*\bkmkend h.z8khh864y3wu}{\rtlch\ab0\ai0\af3\alang1025\afs40\ltrch\b0\i0\fs40\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 Hello}{\rtlch\ab0\ai0\af3\alang1025\afs40\ltrch\b0\i0\fs40\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs40\ltrch\b0\i0\fs40\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 world}{\rtlch\ab0\ai0\af3\afs40\ltrch\b0\i0\fs40\loch\af3\dbch\af3\hich\f3\insrsid10976062\strike0\ulnone\cf1\par}\pard\plain\itap0\s0\ilvl0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr\brdrbtw \brdrbar\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2{\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\insrsid10976062\strike0\ulnone\cf2\par} \pard\plain\itap0\s0\ilvl0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr\brdrbtw\brdrbar\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2{\rtlch\ab0\ai0\af3\alang1025\afs22 \ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2 hello}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2 }{ \rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2 world}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3 \hich\f3\strike0\ulnone\cf2 !}{\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\insrsid10976062\strike0\ulnone\cf2\par}\pard\plain\itap0\s0\ilvl0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr\brdrbtw\brdrbar\widctlpar \ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2{\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\insrsid10976062\strike0\ulnone\cf2\par}\pard\plain\itap0\s2 \keep\keepn\ilvl0\fi0\sb360\sa120\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr\brdrbtw\brdrbar\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs32\ltrch\b0\i0\fs32\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3{ \*\bkmkstart h.oe6g4mjyfx6j}{\*\bkmkend h.oe6g4mjyfx6j}{\rtlch\ab0\ai0\af3\alang1025\afs32\ltrch\b0\i0\fs32\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3 Yes}{\rtlch\ab0\ai0\af3\alang1025\afs32\ltrch\b0\i0\fs32\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3 , }{\rtlch\ab0\ai0\af3\alang1025\afs32\ltrch\b0\i0\fs32\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3 another}{\rtlch\ab0\ai0\af3\alang1025\afs32\ltrch\b0\i0\fs32\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3 }{\rtlch\ab0\ai0\af3\alang1025\afs32\ltrch\b0\i0\fs32\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3 hello}{\rtlch\ab0\ai0\af3\alang1025\afs32\ltrch\b0\i0\fs32\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3 }{\rtlch\ab0\ai0\af3\alang1025\afs32\ltrch\b0\i0\fs32\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3 world}{\rtlch\ab0\ai0\af3\afs32\ltrch\b0\i0\fs32\loch\af3\dbch\af3 \hich\f3\insrsid10976062\strike0\ulnone\cf3\par}\pard\plain\itap0\s0\ilvl0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr\brdrbtw\brdrbar\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2{\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\insrsid10976062\strike0\ulnone\cf2\par}\pard\plain\itap0\s0\ilvl0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr\brdrbtw \brdrbar\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3 \dbch\af3\hich\f3\strike0\ulnone\cf2 hello}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2 !}{\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\insrsid10976062\strike0 \ulnone\cf2\par}{ \*\latentstyles\lsdstimax267\lsdlockeddef0\lsdsemihiddendef0\lsdunhideuseddef0\lsdqformatdef0\lsdprioritydef0{\lsdlockedexcept\lsdqformat1 Normal;\lsdqformat1 heading 1;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 2;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 3; \lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 4;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 5;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 6;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 7;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 8; \lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 9;\lsdsemihidden1\lsdunhideused1\lsdqformat1 caption;\lsdqformat1 Title;\lsdqformat1 Subtitle;\lsdqformat1 Strong;\lsdqformat1 Emphasis;\lsdsemihidden1\lsdpriority99 Placeholder Text;\lsdqformat1\lsdpriority1 No Spacing; \lsdpriority60 Light Shading;\lsdpriority61 Light List;\lsdpriority62 Light Grid;\lsdpriority63 Medium Shading 1;\lsdpriority64 Medium Shading 2;\lsdpriority65 Medium List 1;\lsdpriority66 Medium List 2;\lsdpriority67 Medium Grid 1;\lsdpriority68 Medium Grid 2; \lsdpriority69 Medium Grid 3;\lsdpriority70 Dark List;\lsdpriority71 Colorful Shading;\lsdpriority72 Colorful List;\lsdpriority73 Colorful Grid;\lsdpriority60 Light Shading Accent 1;\lsdpriority61 Light List Accent 1;\lsdpriority62 Light Grid Accent 1;\lsdpriority63 Medium Shading 1 Accent 1; \lsdpriority64 Medium Shading 2 Accent 1;\lsdpriority65 Medium List 1 Accent 1;\lsdsemihidden1\lsdpriority99 Revision;\lsdqformat1\lsdpriority34 List Paragraph;\lsdqformat1\lsdpriority29 Quote;\lsdqformat1\lsdpriority30 Intense Quote;\lsdpriority66 Medium List 2 Accent 1; \lsdpriority67 Medium Grid 1 Accent 1;\lsdpriority68 Medium Grid 2 Accent 1;\lsdpriority69 Medium Grid 3 Accent 1;\lsdpriority70 Dark List Accent 1;\lsdpriority71 Colorful Shading Accent 1;\lsdpriority72 Colorful List Accent 1;\lsdpriority73 Colorful Grid Accent 1; \lsdpriority60 Light Shading Accent 2;\lsdpriority61 Light List Accent 2;\lsdpriority62 Light Grid Accent 2;\lsdpriority63 Medium Shading 1 Accent 2;\lsdpriority64 Medium Shading 2 Accent 2;\lsdpriority65 Medium List 1 Accent 2;\lsdpriority66 Medium List 2 Accent 2; \lsdpriority67 Medium Grid 1 Accent 2;\lsdpriority68 Medium Grid 2 Accent 2;\lsdpriority69 Medium Grid 3 Accent 2;\lsdpriority70 Dark List Accent 2;\lsdpriority71 Colorful Shading Accent 2;\lsdpriority72 Colorful List Accent 2;\lsdpriority73 Colorful Grid Accent 2; \lsdpriority60 Light Shading Accent 3;\lsdpriority61 Light List Accent 3;\lsdpriority62 Light Grid Accent 3;\lsdpriority63 Medium Shading 1 Accent 3;\lsdpriority64 Medium Shading 2 Accent 3;\lsdpriority65 Medium List 1 Accent 3;\lsdpriority66 Medium List 2 Accent 3; \lsdpriority67 Medium Grid 1 Accent 3;\lsdpriority68 Medium Grid 2 Accent 3;\lsdpriority69 Medium Grid 3 Accent 3;\lsdpriority70 Dark List Accent 3;\lsdpriority71 Colorful Shading Accent 3;\lsdpriority72 Colorful List Accent 3;\lsdpriority73 Colorful Grid Accent 3; \lsdpriority60 Light Shading Accent 4;\lsdpriority61 Light List Accent 4;\lsdpriority62 Light Grid Accent 4;\lsdpriority63 Medium Shading 1 Accent 4;\lsdpriority64 Medium Shading 2 Accent 4;\lsdpriority65 Medium List 1 Accent 4;\lsdpriority66 Medium List 2 Accent 4; \lsdpriority67 Medium Grid 1 Accent 4;\lsdpriority68 Medium Grid 2 Accent 4;\lsdpriority69 Medium Grid 3 Accent 4;\lsdpriority70 Dark List Accent 4;\lsdpriority71 Colorful Shading Accent 4;\lsdpriority72 Colorful List Accent 4;\lsdpriority73 Colorful Grid Accent 4; \lsdpriority60 Light Shading Accent 5;\lsdpriority61 Light List Accent 5;\lsdpriority62 Light Grid Accent 5;\lsdpriority63 Medium Shading 1 Accent 5;\lsdpriority64 Medium Shading 2 Accent 5;\lsdpriority65 Medium List 1 Accent 5;\lsdpriority66 Medium List 2 Accent 5; \lsdpriority67 Medium Grid 1 Accent 5;\lsdpriority68 Medium Grid 2 Accent 5;\lsdpriority69 Medium Grid 3 Accent 5;\lsdpriority70 Dark List Accent 5;\lsdpriority71 Colorful Shading Accent 5;\lsdpriority72 Colorful List Accent 5;\lsdpriority73 Colorful Grid Accent 5; \lsdpriority60 Light Shading Accent 6;\lsdpriority61 Light List Accent 6;\lsdpriority62 Light Grid Accent 6;\lsdpriority63 Medium Shading 1 Accent 6;\lsdpriority64 Medium Shading 2 Accent 6;\lsdpriority65 Medium List 1 Accent 6;\lsdpriority66 Medium List 2 Accent 6; \lsdpriority67 Medium Grid 1 Accent 6;\lsdpriority68 Medium Grid 2 Accent 6;\lsdpriority69 Medium Grid 3 Accent 6;\lsdpriority70 Dark List Accent 6;\lsdpriority71 Colorful Shading Accent 6;\lsdpriority72 Colorful List Accent 6;\lsdpriority73 Colorful Grid Accent 6; \lsdqformat1\lsdpriority19 Subtle Emphasis;\lsdqformat1\lsdpriority21 Intense Emphasis;\lsdqformat1\lsdpriority31 Subtle Reference;\lsdqformat1\lsdpriority32 Intense Reference;\lsdqformat1\lsdpriority33 Book Title;\lsdsemihidden1\lsdunhideused1\lsdpriority37 Bibliography; \lsdsemihidden1\lsdunhideused1\lsdqformat1\lsdpriority39 TOC Heading;}}} ================================================ FILE: tests_data/basic/rtf/magika_test.rtf ================================================ {\rtf1\ansi\ansicpg1252\uc0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deff0\adeff0{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f2\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}{\f3\fnil\fcharset0 Roboto;}}{ \colortbl;\red0\green0\blue0;\red109\green158\blue235;\red106\green168\blue79;\red230\green145\blue56;\red102\green102\blue102;}{\stylesheet{\s0\snext0\sqformat\spriority0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 Normal;}{\s1\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb400\sa120\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs40\ltrch\b0\i0\fs40\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2 heading 1;}{\s2\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb360\sa120\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs32\ltrch\b0\i0\fs32\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf3 heading 2;}{\s3\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb320\sa80\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs28\ltrch\b0\i0\fs28\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf4 heading 3;}{\s4\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb280\sa80\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs24\ltrch\b0\i0\fs24\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf5 heading 4;}{\s5\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb240\sa80\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf5 heading 5;}{\s6\sbasedon0\snext0\styrsid15694742 \sqformat\spriority0\keep\keepn\fi0\sb240\sa80\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl240\slmult1\rtlch\ab0\ai\af3\afs22\ltrch\b0\i\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf5 heading 6;}{\*\cs10\additive\ssemihidden\spriority0 Default Paragraph Font; }{\*\ts11\tsrowd\snext11\ssemihidden\spriority0\aspalpha\aspnum\adjustright\ltrpar\li0\lin0\ri0\rin0\ql\faauto\tsvertalt\tsbrdrl\tsbrdrr\tsbrdrt\tsbrdrb\tsbrdrdgr\tsbrdrdgl\tsbrdrh\tsbrdrv\trpaddl108\trpaddfl3\trwWidthB0\trftsWidthB3\trpaddt0\trpaddft3\trpaddb0 \trpaddfb3\trpaddr108\trpaddfr3 Normal Table;}{\s15\sbasedon0\snext15\styrsid15694742\sqformat\spriority0\keep\keepn\fi0\sb0\sa60\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\qc\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs52\ltrch\b0\i0\fs52\loch\af3 \dbch\af3\hich\f3\strike0\ulnone\cf1 Title;}{\s16\sbasedon0\snext16\styrsid15694742\sqformat\spriority0\keep\keepn\fi0\sb0\sa320\aspalpha\aspnum\adjustright\widctlpar\ltrpar\li0\lin0\ri0\rin0\qc\faauto\sl240\slmult1\rtlch\ab0\ai0\af3\afs30\ltrch\b0\i0\fs30 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf5 Subtitle;}}{\*\rsidtbl\rsid10976062\rsid13249109}{\*\generator Aspose.Words for Java 23.4.0;}{\info\version1\edmins0\nofpages1\nofwords0\nofchars0\nofcharsws0}\paperw12240\paperh15840\margl1440\margr1440\margt1440\margb1440\gutter0 {\mmathPr\mbrkBin0\mbrkBinSub0\mdefJc1\mdispDef1\minterSp0\mintLim0\mintraSp0\mlMargin0\mmathFont0\mnaryLim1\mpostSp0\mpreSp0\mrMargin0\msmallFrac0\mwrapIndent1440\mwrapRight0}\deflang1033\deflangfe2052\adeflang1025\jexpand\showxmlerrors1\validatexml1{ \*\wgrffmtfilter 013f}\viewkind1\viewscale100\fet0\ftnbj\aenddoc\ftnrstcont\aftnrstcont\ftnnar\aftnnrlc\widowctrl\nospaceforul\nolnhtadjtbl\alntblind\lyttblrtgr\dntblnsbdb\noxlattoyen\wrppunct\nobrkwrptbl\expshrtn\snaptogridincell\asianbrkrule\htmautsp\noultrlspc \useltbaln\splytwnine\ftnlytwnine\lytcalctblwd\allowfieldendsel\lnbrkrule\nouicompat\nofeaturethrottle1\utinl\formshade\nojkernpunct\dghspace180\dgvspace180\dghorigin1800\dgvorigin1440\dghshow1\dgvshow1\dgmargin\pgbrdrhead\pgbrdrfoot\rsidroot10976062\sectd\sectlinegrid360\pgwsxn12240\pghsxn15840\marglsxn1440\margrsxn1440\margtsxn1440\margbsxn1440\guttersxn0\headery720\footery720\colsx720\ltrsect\pard\plain\itap0\s0\ilvl0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr\brdrbtw\brdrbar \widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1{\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\insrsid10976062\strike0\ulnone\cf1\par}\pard \plain\itap0\s1\keep\keepn\ilvl0\fi0\sb400\sa120\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr\brdrbtw\brdrbar\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs40\ltrch\b0\i0\fs40\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2 {\*\bkmkstart h.9ddtw4s9rgoq}{\*\bkmkend h.9ddtw4s9rgoq}{\rtlch\ab0\ai0\af3\alang1025\afs40\ltrch\b0\i0\fs40\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf2 Introduction}{\rtlch\ab0\ai0\af3\afs40\ltrch\b0\i0\fs40 \loch\af3\dbch\af3\hich\f3\insrsid10976062\strike0\ulnone\cf2\par}\pard\plain\itap0\s0\ilvl0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb\brdrr\brdrbtw\brdrbar\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22 \ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1{\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\insrsid10976062\strike0\ulnone\cf1\par}\pard\plain\itap0\s0\ilvl0\fi0\sb0\sa0\aspalpha\aspnum\adjustright\brdrt\brdrl\brdrb \brdrr\brdrbtw\brdrbar\widctlpar\ltrpar\li0\lin0\ri0\rin0\ql\faauto\sl276\slmult1\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 This}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 is}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 a}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 test}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 document}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 for}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 Magika}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 , }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 yay}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 ! }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 We}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 are}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 going}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 to}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 take}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 this}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 file}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 and}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 convert}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 it}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 in}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 a}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 number}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 of}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 other}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 }{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033 \loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 formats}{\rtlch\ab0\ai0\af3\alang1025\afs22\ltrch\b0\i0\fs22\lang1033\langnp1033\langfe1033\langfenp1033\loch\af3\dbch\af3\hich\f3\strike0\ulnone\cf1 .}{\rtlch\ab0\ai0\af3\afs22\ltrch\b0\i0\fs22\loch\af3\dbch\af3 \hich\f3\insrsid10976062\strike0\ulnone\cf1\par}{ \*\latentstyles\lsdstimax267\lsdlockeddef0\lsdsemihiddendef0\lsdunhideuseddef0\lsdqformatdef0\lsdprioritydef0{\lsdlockedexcept\lsdqformat1 Normal;\lsdqformat1 heading 1;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 2;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 3; \lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 4;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 5;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 6;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 7;\lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 8; \lsdsemihidden1\lsdunhideused1\lsdqformat1 heading 9;\lsdsemihidden1\lsdunhideused1\lsdqformat1 caption;\lsdqformat1 Title;\lsdqformat1 Subtitle;\lsdqformat1 Strong;\lsdqformat1 Emphasis;\lsdsemihidden1\lsdpriority99 Placeholder Text;\lsdqformat1\lsdpriority1 No Spacing; \lsdpriority60 Light Shading;\lsdpriority61 Light List;\lsdpriority62 Light Grid;\lsdpriority63 Medium Shading 1;\lsdpriority64 Medium Shading 2;\lsdpriority65 Medium List 1;\lsdpriority66 Medium List 2;\lsdpriority67 Medium Grid 1;\lsdpriority68 Medium Grid 2; \lsdpriority69 Medium Grid 3;\lsdpriority70 Dark List;\lsdpriority71 Colorful Shading;\lsdpriority72 Colorful List;\lsdpriority73 Colorful Grid;\lsdpriority60 Light Shading Accent 1;\lsdpriority61 Light List Accent 1;\lsdpriority62 Light Grid Accent 1;\lsdpriority63 Medium Shading 1 Accent 1; \lsdpriority64 Medium Shading 2 Accent 1;\lsdpriority65 Medium List 1 Accent 1;\lsdsemihidden1\lsdpriority99 Revision;\lsdqformat1\lsdpriority34 List Paragraph;\lsdqformat1\lsdpriority29 Quote;\lsdqformat1\lsdpriority30 Intense Quote;\lsdpriority66 Medium List 2 Accent 1; \lsdpriority67 Medium Grid 1 Accent 1;\lsdpriority68 Medium Grid 2 Accent 1;\lsdpriority69 Medium Grid 3 Accent 1;\lsdpriority70 Dark List Accent 1;\lsdpriority71 Colorful Shading Accent 1;\lsdpriority72 Colorful List Accent 1;\lsdpriority73 Colorful Grid Accent 1; \lsdpriority60 Light Shading Accent 2;\lsdpriority61 Light List Accent 2;\lsdpriority62 Light Grid Accent 2;\lsdpriority63 Medium Shading 1 Accent 2;\lsdpriority64 Medium Shading 2 Accent 2;\lsdpriority65 Medium List 1 Accent 2;\lsdpriority66 Medium List 2 Accent 2; \lsdpriority67 Medium Grid 1 Accent 2;\lsdpriority68 Medium Grid 2 Accent 2;\lsdpriority69 Medium Grid 3 Accent 2;\lsdpriority70 Dark List Accent 2;\lsdpriority71 Colorful Shading Accent 2;\lsdpriority72 Colorful List Accent 2;\lsdpriority73 Colorful Grid Accent 2; \lsdpriority60 Light Shading Accent 3;\lsdpriority61 Light List Accent 3;\lsdpriority62 Light Grid Accent 3;\lsdpriority63 Medium Shading 1 Accent 3;\lsdpriority64 Medium Shading 2 Accent 3;\lsdpriority65 Medium List 1 Accent 3;\lsdpriority66 Medium List 2 Accent 3; \lsdpriority67 Medium Grid 1 Accent 3;\lsdpriority68 Medium Grid 2 Accent 3;\lsdpriority69 Medium Grid 3 Accent 3;\lsdpriority70 Dark List Accent 3;\lsdpriority71 Colorful Shading Accent 3;\lsdpriority72 Colorful List Accent 3;\lsdpriority73 Colorful Grid Accent 3; \lsdpriority60 Light Shading Accent 4;\lsdpriority61 Light List Accent 4;\lsdpriority62 Light Grid Accent 4;\lsdpriority63 Medium Shading 1 Accent 4;\lsdpriority64 Medium Shading 2 Accent 4;\lsdpriority65 Medium List 1 Accent 4;\lsdpriority66 Medium List 2 Accent 4; \lsdpriority67 Medium Grid 1 Accent 4;\lsdpriority68 Medium Grid 2 Accent 4;\lsdpriority69 Medium Grid 3 Accent 4;\lsdpriority70 Dark List Accent 4;\lsdpriority71 Colorful Shading Accent 4;\lsdpriority72 Colorful List Accent 4;\lsdpriority73 Colorful Grid Accent 4; \lsdpriority60 Light Shading Accent 5;\lsdpriority61 Light List Accent 5;\lsdpriority62 Light Grid Accent 5;\lsdpriority63 Medium Shading 1 Accent 5;\lsdpriority64 Medium Shading 2 Accent 5;\lsdpriority65 Medium List 1 Accent 5;\lsdpriority66 Medium List 2 Accent 5; \lsdpriority67 Medium Grid 1 Accent 5;\lsdpriority68 Medium Grid 2 Accent 5;\lsdpriority69 Medium Grid 3 Accent 5;\lsdpriority70 Dark List Accent 5;\lsdpriority71 Colorful Shading Accent 5;\lsdpriority72 Colorful List Accent 5;\lsdpriority73 Colorful Grid Accent 5; \lsdpriority60 Light Shading Accent 6;\lsdpriority61 Light List Accent 6;\lsdpriority62 Light Grid Accent 6;\lsdpriority63 Medium Shading 1 Accent 6;\lsdpriority64 Medium Shading 2 Accent 6;\lsdpriority65 Medium List 1 Accent 6;\lsdpriority66 Medium List 2 Accent 6; \lsdpriority67 Medium Grid 1 Accent 6;\lsdpriority68 Medium Grid 2 Accent 6;\lsdpriority69 Medium Grid 3 Accent 6;\lsdpriority70 Dark List Accent 6;\lsdpriority71 Colorful Shading Accent 6;\lsdpriority72 Colorful List Accent 6;\lsdpriority73 Colorful Grid Accent 6; \lsdqformat1\lsdpriority19 Subtle Emphasis;\lsdqformat1\lsdpriority21 Intense Emphasis;\lsdqformat1\lsdpriority31 Subtle Reference;\lsdqformat1\lsdpriority32 Intense Reference;\lsdqformat1\lsdpriority33 Book Title;\lsdsemihidden1\lsdunhideused1\lsdpriority37 Bibliography; \lsdsemihidden1\lsdunhideused1\lsdqformat1\lsdpriority39 TOC Heading;}}} ================================================ FILE: tests_data/basic/ruby/code.rb ================================================ class Kalimat def initialize(nama = "Dunia") @nama = nama end def sapaan puts "Hai #{@nama}." end def perpisahan puts "Sampai jumpa #{@nama}." end end ================================================ FILE: tests_data/basic/rust/asm.rs ================================================ use std::arch::asm; fn main() { let mut x: u64 = 5; println!("Original value of x: {}", x); unsafe { asm!( "mov rax, {x}", "mul rax", "mov {x}, rax", x = inout(reg) x, ); } println!("Squared value of x: {}", x); assert_eq!(x, 5 * 5); } ================================================ FILE: tests_data/basic/rust/code.rs ================================================ fn main() { println!("Hello World!"); } ================================================ FILE: tests_data/basic/rust/test_case1.rs ================================================ /// Sample function to load a file fn load_model() { println!("Magika model ⏳"); // Simulating a delay for loading std::thread::sleep(std::time::Duration::from_millis(200)); println!("Model loaded successfully!"); } fn main() { // Load the model load_model(); } ================================================ FILE: tests_data/basic/rust/test_case2.rs ================================================ use std::fs::File; use std::io::{Write, BufWriter}; // Function to create a CSV file fn create_csv() -> std::io::Result<()> { let mut file = File::create("sample.csv")?; writeln!(file, "Name,Age,City")?; writeln!(file, "Alice,30,New York")?; writeln!(file, "Bob,25,Los Angeles")?; writeln!(file, "Charlie,35,Chicago")?; Ok(()) } // Function to create a JSON file fn create_json() -> std::io::Result<()> { let mut file = File::create("sample.json")?; writeln!(file, "{{\"name\": \"Alice\", \"age\": 30, \"city\": \"New York\"}}")?; Ok(()) } // Main function fn main() -> std::io::Result<()> { // Function calls create_csv()?; create_json()?; println!("Sample files created successfully."); Ok(()) } ================================================ FILE: tests_data/basic/smali/code.smali ================================================ .class public LHelloWorld; .super Ljava/lang/Object; .method public static main([Ljava/lang/String;)V .registers 2 sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream; const-string v1, "Hello World!" invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V return-void .end method ================================================ FILE: tests_data/basic/srt/code.srt ================================================ 1 00:00:00,000 --> 00:00:02,000 Hello, World! ================================================ FILE: tests_data/basic/swift/code.swift ================================================ import Foundation let string = "Hello, World!" print(string) ================================================ FILE: tests_data/basic/toml/doc.toml ================================================ [tool.poetry] name = "magika" version = "0.6.0-dev" description = "A tool to determine the content type of a file with deep-learning" authors = ["Yanick Fratantonio "] readme = "README.md" packages = [{include = "magika"}] [tool.poetry.dependencies] python = "^3.8,<3.13" click = "^8.1.3" tqdm = "^4.66.2" onnxruntime = "^1.17.0" numpy = [ {version = "^1.24", python = ">=3.8,<3.9"}, {version = "^1.26", python = ">=3.9,<3.13"} ] tabulate = "^0.9.0" python-dotenv = "^1.0.1" [tool.poetry.group.dev.dependencies] pytest = "^8.0.1" ipython = [ {version = "^8.12.3", python = ">=3.8,<3.9"}, {version = "^8.18.1", python = ">=3.9,<3.10"}, {version = "^8.21.0", python = ">=3.10,<3.13"} ] ruff = ">=0.2.2,<0.4.0" mypy = "^1.8.0" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or # McCabe complexity (`C901`) by default. select = ["E4", "E7", "E9", "F", "I001"] ignore = [] ================================================ FILE: tests_data/basic/tsv/magika_test.tsv ================================================ Name Value1 Value2 Value3 Test1 1 10 100 Test2 2 20 200 Test3 3 30 300 Test4 4 40 400 Test5 5 50 500 ================================================ FILE: tests_data/basic/twig/example.twig ================================================ {% set items = ['apple', 'banana', 'cherry'] %}
    {% for item in items %}
  • {{ loop.index }} - {{ item|title }}
  • {% endfor %}
{% block content %}

This is content from a block definition.

{% endblock %} {{ dump(items) }} {{ 'hello world'|title }} {% macro input(name, value = '', type = 'text') %} {% endmacro %} ================================================ FILE: tests_data/basic/txt/complex-sentence.txt ================================================ This is yet another simple test, it includes one simple sentence, but it is not as trivial as other simpler tests. ================================================ FILE: tests_data/basic/txt/few-words.txt ================================================ this is just a test ================================================ FILE: tests_data/basic/txt/lorem-big.txt ================================================ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer aliquam dui sit amet justo eleifend ullamcorper. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Mauris ut nunc feugiat, rutrum sem quis, sodales velit. Etiam gravida, nisi ut facilisis laoreet, metus odio laoreet dolor, in molestie leo nisl scelerisque justo. Nulla blandit massa a metus sagittis, vel blandit dui luctus. Donec arcu arcu, egestas convallis ultricies id, viverra a ante. Donec maximus turpis metus, eget mattis massa scelerisque in. Nam mattis, mi vitae iaculis dapibus, tellus eros cursus dui, sed sagittis elit augue ornare lacus. In massa massa, ornare in malesuada quis, elementum ut magna. Nullam rhoncus volutpat nibh, eu aliquet lectus. Praesent scelerisque velit sem, quis sodales augue venenatis eu. Ut facilisis erat erat, at finibus purus elementum et. Quisque dignissim nunc et magna blandit tincidunt. Sed in libero eget ligula molestie aliquet quis a enim. Curabitur ac mauris nec sapien sollicitudin consectetur vitae luctus nisi. Ut augue nisi, facilisis eget ullamcorper in, eleifend at augue. Sed lacinia est eget porta aliquam. Aliquam erat volutpat. Aliquam congue, mi sed laoreet vestibulum, lectus purus condimentum sem, sit amet elementum felis lectus porttitor mauris. Nam lobortis commodo ex, in lacinia augue congue nec. Fusce fermentum, sem vel hendrerit viverra, nulla elit molestie velit, a iaculis ex mi vitae ipsum. Mauris eleifend leo quis mi venenatis, gravida venenatis lacus commodo. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Integer laoreet vel leo quis suscipit. Nam consectetur ligula quis interdum volutpat. Donec at libero semper libero faucibus placerat in ut quam. Aliquam ipsum urna, posuere quis orci sit amet, vulputate finibus orci. Sed tortor metus, scelerisque eu magna sed, convallis aliquam erat. Sed pretium elit fermentum sodales hendrerit. Ut eleifend lectus et ullamcorper interdum. Sed laoreet, quam at placerat viverra, ipsum erat rhoncus dolor, sit amet elementum nisl elit at lectus. Praesent volutpat nulla at diam placerat, ut interdum diam vehicula. Etiam in mattis metus, sed pharetra urna. Aenean vitae urna sit amet metus malesuada sodales. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean varius est quam, a rutrum mi luctus et. Quisque fermentum, augue nec lobortis accumsan, enim arcu ultricies orci, at vestibulum magna ex id dui. Vestibulum vestibulum efficitur elementum. Mauris in congue nunc, vitae pulvinar ipsum. Pellentesque at volutpat lectus, quis feugiat justo. Sed sed malesuada justo. Quisque fermentum molestie porta. Aenean volutpat tellus a viverra ornare. Nulla semper mauris id orci efficitur, eu pharetra velit porta. Proin velit turpis, rutrum ut pretium molestie, pretium ut eros. Vivamus porttitor elementum varius. Proin a erat et lectus dapibus mollis at ut sapien. Cras pellentesque neque massa, non congue lorem bibendum at. Quisque accumsan elit at leo commodo molestie. Phasellus nibh enim, vehicula vitae massa non, scelerisque semper ligula. Aliquam facilisis, dolor non pulvinar consequat, dolor nisl porttitor dui, efficitur ultrices urna massa quis libero. Ut vel tincidunt odio. Quisque semper et est sed pharetra. Cras enim orci, convallis sit amet enim sed, varius mattis velit. Ut ac nisi in dui ultrices vehicula vel et velit. Quisque dictum feugiat velit. Etiam pellentesque turpis ullamcorper urna finibus mattis. Suspendisse convallis vulputate justo, vel fringilla tortor interdum ut. Nullam placerat, magna nec blandit interdum, nisl turpis pharetra dolor, quis laoreet neque lacus a ipsum. Nam blandit magna nisl, a mattis felis cursus sit amet. Fusce sit amet arcu feugiat, volutpat felis sed, tincidunt dolor. Praesent vulputate ex diam, ut congue sem pretium ut. Morbi pretium nunc sit amet turpis tincidunt, a ultrices enim tristique. Praesent lacinia id nisl nec hendrerit. Cras fermentum dui sit amet elementum tristique. Curabitur vel suscipit dui. Aenean at dolor dui. Quisque eget ex at tellus aliquet venenatis quis sit amet urna. Pellentesque gravida, ipsum at pharetra laoreet, ante mi sollicitudin ligula, a congue felis purus consectetur lectus. Interdum et malesuada fames ac ante ipsum primis in faucibus. Duis mi diam, egestas a posuere non, ornare a lectus. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Sed lacinia porttitor arcu ac pulvinar. Etiam tincidunt porta lacus, eu lacinia quam venenatis sed. Suspendisse posuere metus sit amet massa congue, non pretium lectus vulputate. Donec in odio sit amet metus imperdiet accumsan. Praesent posuere venenatis ipsum in rutrum. Curabitur vulputate sapien sit amet diam sodales, non aliquet nisi blandit. Sed et lectus hendrerit, porttitor velit sed, tincidunt nulla. Donec aliquam dictum enim et porttitor. Etiam et mi sit amet justo vehicula posuere ac ut quam. Vestibulum malesuada, risus in placerat feugiat, mauris diam mollis ligula, nec mollis ligula lectus eu ligula. Ut bibendum fermentum justo, eget convallis nunc egestas eu. Nam a tellus sit amet sapien viverra tempor ac nec lorem. Maecenas ultricies neque ut odio pulvinar mattis. Vivamus aliquam nisl in ultrices suscipit. Praesent eleifend nisi eget nisl elementum, suscipit accumsan mauris ullamcorper. Cras placerat, purus non pulvinar volutpat, nisi mauris tincidunt justo, dictum luctus dui purus non erat. Ut et arcu lorem. Fusce in nisi vitae mauris pharetra congue. Ut nec ex lacinia, molestie elit vitae, hendrerit neque. Curabitur lobortis justo non libero scelerisque eleifend. Vivamus augue augue, fringilla ut ante nec, hendrerit efficitur dolor. Donec molestie elit congue tristique dictum. Cras finibus sed metus vitae placerat. Nunc vel cursus enim. Duis condimentum, massa et commodo sagittis, eros risus tempor odio, id ornare mauris leo a velit. In pretium risus vitae lectus pulvinar, in dignissim risus pharetra. Interdum et malesuada fames ac ante ipsum primis in faucibus. Etiam faucibus eleifend ante eu mattis. ================================================ FILE: tests_data/basic/txt/lorem-small.txt ================================================ Utilitatis causa amicitia est quaesita. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Collatio igitur ista te nihil iuvat. Honesta oratio, Socratica, Platonis etiam. Primum in nostrane potestate est, quid meminerimus? Duo Reges: constructio interrete. Quid, si etiam iucunda memoria est praeteritorum malorum? Si quidem, inquit, tollerem, sed relinquo. An nisi populari fama? Quamquam id quidem licebit iis existimare, qui legerint. Summum a vobis bonum voluptas dicitur. At hoc in eo M. Refert tamen, quo modo. Quid sequatur, quid repugnet, vident. Iam id ipsum absurdum, maximum malum neglegi. ================================================ FILE: tests_data/basic/txt/magika_test_pptx.txt ================================================ This is a test for Magika! Very cool if this can be detected correctly! ================================================ FILE: tests_data/basic/txt/many-words.txt ================================================ this is just a test but it contains more words than the simple test ================================================ FILE: tests_data/basic/txt/one-sentence-with-newline.txt ================================================ This is just a test that includes a simple sentence. ================================================ FILE: tests_data/basic/txt/one-sentence.txt ================================================ This is just a test that includes a simple sentence. ================================================ FILE: tests_data/basic/txt/random-ascii.txt ================================================ faslkdfjhasdfkljhasdfklajshdfaklsjdfhaluehzsdjvnmcnbxzcv ================================================ FILE: tests_data/basic/typescript/code.ts ================================================ // This is typescript, and not valid javascript. interface Person { name: string; age: number; } function greet(person: Person): string { return `Hello, ${person.name}. You are ${person.age} years old.`; } const user: Person = { name: "Bob", age: 42, }; console.log(greet(user)); ================================================ FILE: tests_data/basic/yaml/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "monthly" - package-ecosystem: "docker" directory: "/" schedule: interval: "daily" - package-ecosystem: "pip" directory: "/python" schedule: interval: "daily" - package-ecosystem: "npm" directory: "/js" schedule: interval: "weekly" - package-ecosystem: "cargo" directory: "/rust" schedule: interval: "weekly" ================================================ FILE: tests_data/basic/yaml/python-test.yml ================================================ name: Python - test on: workflow_dispatch: push: branches: - 'main' paths: - 'python/**' - 'tests_data/**' - '.github/workflows/**' pull_request: paths: - 'python/**' - 'tests_data/**' - '.github/workflows/**' permissions: contents: read jobs: unit-testing: strategy: matrix: python-version: [ "3.8.x", "3.9.x", "3.10.x", "3.11.x", "3.12.x" ] os: [ "ubuntu-latest", "macos-latest" ] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # pin@v4 - name: Setup Python uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # pin@v5 with: python-version: '${{ matrix.python-version }}' - name: Install poetry uses: abatilo/actions-poetry@7b6d33e44b4f08d7021a1dee3c044e9c253d6439 # pin@v3 with: poetry-version: "1.7.1" - name: Install the project dependencies working-directory: python run: poetry install - name: Run ruff check working-directory: python run: poetry run ruff check --verbose - name: Run ruff format check working-directory: python run: poetry run ruff format --check --verbose - name: Run mypy working-directory: python run: poetry run mypy magika tests - name: Run pytest working-directory: python run: poetry run pytest tests -m "not slow" ================================================ FILE: tests_data/basic/yara/rule.yar ================================================ rule Rule_485729_77379 { strings: $s1 = "HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run" $s2 = "Win32_Process" $s3 = "Create" wide condition: $s1 and ($s2 and $s3) meta: author = "CyberThreatResearch" date = "2019-09-23" tags = "malware, persistence, registry" } ================================================ FILE: tests_data/basic/zig/code.zig ================================================ const std = @import("std"); pub fn main() void { std.debug.print("Hello, World!\n", .{}); } ================================================ FILE: tests_data/mitra/php/php.php ================================================ PHP: Hypertext Preprocessor
"; ?> ================================================ FILE: tests_data/mitra_candidates/html.htm ================================================ -->

HTML page

### Table of Contents * [MagikaNode][1] * [identifyStream][2] * [Parameters][3] * [create][4] * [Parameters][5] * [Magika][6] * [identifyBytes][7] * [Parameters][8] * [create][9] * [Parameters][10] ## MagikaNode **Extends Magika** The main Magika object for Node use (`MagikaNode`). Example usage: ```js import { readFile } from "fs/promises"; import { MagikaNode as Magika } from "magika/node"; const data = await readFile("some file"); const magika = await Magika.create(); const result = await magika.identifyBytes(data); console.log(result.prediction.output.label); ``` For a client-side implementation, please import `Magika` instead. Note that this `MagikaNode` class extends `Magika`, which means that all public `Magika` APIs (e.g., `identifyBytes`) are available for `MagikaNode` as well. Demos: * Node: `/js/magika-cli.js`, which you can run with `yarn run bin -h`. * Client-side: see `/website/src/components/FileClassifierDemo.vue` ### identifyStream Identifies the content type from a read stream #### Parameters * `stream` **ReadStream** A read stream. * `length` **[number][11]** Total length of stream data. Returns **MagikaResult** An object containing the result of the content type prediction. ### create Factory method to create a Magika instance. #### Parameters * `options` **MagikaOptions** The urls or file paths where the model and its config are stored.Parameters are optional. If not provided, the model will be loaded from GitHub. Returns **[Promise][12]<[MagikaNode][1]>** ## Magika The main Magika object for client-side use. Example usage: ```js const file = new File(["# Hello I am a markdown file"], "hello.md"); const fileBytes = new Uint8Array(await file.arrayBuffer()); const magika = await Magika.create(); const result = await magika.identifyBytes(fileBytes); console.log(result.prediction.output.label); ``` For a Node implementation, please import `MagikaNode` instead. Demos: * Node: `/js/magika-cli.js`, which you can run with `yarn run bin -h`. * Client-side: see `/website/src/components/FileClassifierDemo.vue` ### identifyBytes Identifies the content type of a byte array. #### Parameters * `fileBytes` **[Uint8Array][13]** A fixed-length sequence of bytes. Returns **MagikaResult** An object containing the result of the content type prediction. ### create Factory method to create a Magika instance. #### Parameters * `options` **MagikaOptions** The urls or file paths where the model and its config are stored.Parameters are optional. If not provided, the model will be loaded from GitHub. Returns **[Promise][12]<[Magika][6]>** [1]: #magikanode [2]: #identifystream [3]: #parameters [4]: #create [5]: #parameters-1 [6]: #magika [7]: #identifybytes [8]: #parameters-2 [9]: #create-1 [10]: #parameters-3 [11]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number [12]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise [13]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array ================================================ FILE: website-ng/src/content/docs/cli-and-bindings/js.md ================================================ --- title: "JavaScript / TypeScript Library" --- Use Magika in the browser or in Node! ## Installing MagikaJS ```bash npm install magika ``` ## Using Magika in JavaScript Simple usage in Node: ```js import { readFile } from "fs/promises"; import { MagikaNode as Magika } from "magika/node"; const data = await readFile("some file"); const magika = await Magika().create(); const prediction = await magika.identifyBytes(data); console.log(prediction); ``` Simple usage in the browser: ```js import { Magika } from "magika"; const file = new File(["# Hello I am a markdown file"], "hello.md"); const fileBytes = new Uint8Array(await file.arrayBuffer()); const magika = await Magika.create(); const prediction = await magika.identifyBytes(fileBytes); console.log(prediction); ``` For more, see the API reference below. ## Command-line tool Please use the official CLI as it can perform batch processing and search for files recursively. Read more about that in the main the [Command Line Interface (CLI)](/magika/cli-and-bindings/cli/) section. This one is useful to load the TensorflowJS model and see that it works as expected. Install it with `npm install -g magika`. You can then run it by executing `magika-js ` ``` Usage: magika-js [options] Magika JS - file type detection with ML. https://google.github.io/magika Arguments: paths Paths of the files to detect Options: --json-output Format output in JSON --model-url Model URL (default: "https://google.github.io/magika/models/standard_v3_2/model.json") --model-path Model file path --model-config-url Model config URL (default: "https://google.github.io/magika/models/standard_v3_2/config.min.json") --model-config-path Model config file path --by-stream Identify file via stream, not via bytes --debug Output debug information -h, --help display help for command ``` ## Loading the model and configuration MagikaJS is designed to be flexible in how you provide the model and configuration file to it. Both the Node and browser versions accept URLs to asynchronously load these two assets. ```js const magika = await magika.create({ modelURL: "https://...", configURL: "https://...", }); ``` The Node version also allows to load local files. ```js const magika = await magika.create({ modelPath: "./assets/...", configPath: "./assets/...", }); ``` ## Development Using the model hosted On Github: ```bash yarn install yarn run build yarn run bin -- README.md ``` Using the local model: ```bash yarn install yarn run build (cd ../website; yarn install; yarn run dev) & yarn run bin --model-url http://localhost:5173/magika/model/model.json --config-url http://localhost:5173/magika/model/config.json ../tests_data/basic/* ``` Using the local `magika` package when developing the website: ```bash yarn install yarn run build yarn link (cd ../website; yarn link magika; yarn install; yarn run dev) & ``` ## Testing Execute: ```bash yarn install yarn run build yarn run test ``` ## API Reference See the [JavaScript API Reference](/magika/cli-and-bindings/js-api) section. ================================================ FILE: website-ng/src/content/docs/cli-and-bindings/other-bindings.md ================================================ --- title: "Other bindings" --- ### Go (In Progress) A Go port of Magika is currently under development. While the implementation is largely complete, a few final steps are needed before it can be published as a package. You can explore the source code and track its progress on GitHub. Source Code: github.com/google/magika/tree/main/go ### Other Languages Official bindings for other languages are not yet available. However, since Magika's core is built in Rust, it can be integrated into many programming environments using a Foreign Function Interface (FFI). For example: - Java (JVM): Integration should be possible using the Java Native Interface (JNI). - .NET: Integration can be achieved using P/Invoke. We encourage community contributions to create and maintain new bindings. If you are interested in developing one, please feel free to contact us to discuss it. ================================================ FILE: website-ng/src/content/docs/cli-and-bindings/overview.md ================================================ --- title: CLI & Bindings Overview --- Magika provides a native CLI for command-line use and official language bindings—including Python, JavaScript, and Rust—to enable seamless integration into diverse projects. | Artifact | Status | Latest version | Default model | | -------------------------------------------------------------- | -------------- | -------------- | ---------------------------------------------------------- | | [`magika` CLI](/magika/cli-and-bindings/cli) | Stable | `1.0.2` | [`standard_v3_3`](https://github.com/google/magika/blob/main/assets/models/standard_v3_3/README.md) | | [Python `Magika` module](/magika/cli-and-bindings/python) | Stable | `1.0.2` | [`standard_v3_3`](https://github.com/google/magika/blob/main/assets/models/standard_v3_3/README.md) | | [JavaScript / TypeScript package](/magika/cli-and-bindings/js) | Stable | `1.0.0` | [`standard_v3_3`](https://github.com/google/magika/blob/main/assets/models/standard_v3_3/README.md) | | [Rust `magika` library](/magika/cli-and-bindings/rust) | Stable | `1.0.1` | [`standard_v3_3`](https://github.com/google/magika/blob/main/assets/models/standard_v3_3/README.md) | | [Demo Website](/magika/demo/magika-demo/) | Stable | - | [`standard_v3_3`](https://github.com/google/magika/blob/main/assets/models/standard_v3_3/README.md) | | [Other bindings](/magika/cli-and-bindings/other-bindings) | WIP | - | - | ================================================ FILE: website-ng/src/content/docs/cli-and-bindings/python.md ================================================ --- title: "Python `Magika` Module" --- This guide provides documentation on how to use the `magika` Python module to identify file types from your code. :::tip This section assumes you are familiar with the topics discussed in the [Core Concepts](/magika/core-concepts/) section. ::: ## Quick Examples The `magika` API is designed to be simple and intuitive. The following examples cover the most common use cases for identifying content from bytes, paths, and streams. **From bytes:** ```python >>> from magika import Magika >>> m = Magika() >>> res = m.identify_bytes(b'function log(msg) {console.log(msg);}') >>> print(res.output.label) javascript ``` **From a file path:** ```python >>> from magika import Magika >>> m = Magika() >>> res = m.identify_path('./tests_data/basic/ini/doc.ini') >>> print(res.output.label) ini ``` **From an open file stream:** ```python >>> from magika import Magika >>> m = Magika() >>> with open('./tests_data/basic/ini/doc.ini', 'rb') as f: >>> res = m.identify_stream(f) >>> print(res.output.label) ini ``` ## API Reference ### Instantiating `Magika` First, create an instance of the `Magika` class. The constructor accepts several optional arguments to customize its behavior. ```python from magika import Magika, PredictionMode # Default instantiation magika = Magika() # Custom instantiation magika_custom = Magika( model_dir="/path/to/custom/model", prediction_mode=PredictionMode.BEST_GUESS, no_dereference=True, ) ``` **Constructor Arguments:** - `model_dir` (`Path`, optional): Path to a directory containing a custom model. If not provided, defaults to the latest bundled model. - `prediction_mode` (`PredictionMode`, optional): The prediction mode to use. Defaults to `PredictionMode.HIGH_CONFIDENCE`. - `no_dereference` (`bool`, optional): If `True`, symbolic links will not be followed; their content type will be reported as `symlink`. Defaults to `False`. **Identifying Content** Once instantiated, the `Magika` object provides several methods for identifying content from different sources. - `magika.identify_bytes(bytes)`: Identifies the content type of an in-memory bytes object. - `magika.identify_path(path)`: Identifies the content type of a single file from its path (`str | os.PathLike`). - `magika.identify_paths(paths)`: Identifies the content type for a list of file paths. - `magika.identify_stream(stream)`: Identifies the content type from an already-open binary file-like object (e.g., the output of `open(file_path, 'rb')`). Note: 1) Magika will `seek()` around the stream; 2) the stream _is not closed_ (closing is the responsibility of the caller). If you are dealing with large files, the `identify_path`, `identify_paths`, and `identify_stream` variants are generally better: their implementation `seek()`s around the file/stream to extract the needed features, without loading the entire content in memory. :::tip[Performance with Large Files] For large files, the `identify_path`, `identify_paths`, and `identify_stream` methods are highly recommended. They are optimized to read only the necessary portions of the file by seeking within the file/stream, which avoids loading the entire content into memory. If your content is already loaded into a bytes object, `identify_bytes` is the most direct and efficient option. ::: **Understanding the Result** All `identify_*` methods return a `MagikaResult` object. This object acts as a wrapper that contains the prediction details and the status of the operation. **You should always check if the operation was successful before accessing the prediction.** ```python >>> result = m.identify_path("path/to/file") >>> if result.ok: ... print(f"File is a {result.output.description}") ... print(f"MIME Type: {result.output.mime_type}") ... else: ... print(f"Error: {result.status.message}") ``` ### Data Models The `MagikaResult` object and its nested data classes provide detailed information about the scan. Consult the [Understanding the Output](/magika/core-concepts/understanding-the-output) section for more context. `MagikaResult` ```python class MagikaResult: path: Path ok: bool status: Status prediction: MagikaPrediction # Shortcuts available only when result.ok is True dl: ContentTypeInfo output: ContentTypeInfo score: float ``` - `ok` (bool): `True` if the identification was successful, `False` otherwise. - `status` (Status): Provides details on an error if `ok` is `False`. - `prediction` (`MagikaPrediction`): The core prediction object, available only if `ok` is `True`. - `dl`, `output`, `score`: For convenience, these are direct shortcuts to the corresponding fields within the `prediction` object. `MagikaPrediction` Contains the core deep learning model prediction and the final Magika output. ```python class MagikaPrediction: dl: ContentTypeInfo output: ContentTypeInfo score: float overwrite_reason: OverwriteReason ``` - `dl` (`ContentTypeInfo`): The raw prediction from the deep learning model. - `output` (`ContentTypeInfo`): The final prediction from "Magika the tool," which considers the model's prediction, its confidence score, and the selected prediction mode. **This is the result most users should rely on.** - `score` (`float`): The model's confidence score (from 0.0 to 1.0). - `overwrite_reason` (`OverwriteReason`): It indicates why the deep learning model's prediction was overwritten (e.g., low confidence). `ContentTypeInfo` Contains detailed metadata about a predicted content type. ```python class ContentTypeInfo: label: ContentTypeLabel # e.g., "python" mime_type: str # e.g., "text/x-python" group: str # e.g., "code" description: str # e.g., "Python source" extensions: List[str] # e.g., ["py", "pyc"] is_text: bool # e.g., True ``` `ContentTypeLabel` A string enum (`StrEnum`) of all possible content type labels. Because it's a `StrEnum`, its members can be used and compared just like regular strings. ```python class ContentTypeLabel(StrEnum): APK = "apk" BMP = "bmp" # ... and many more ``` :::caution **`ContentTypeLabel` is a superset of supported types.** This enum is generated from our internal Content Types Knowledge Base and includes many types that the default model may not be trained to detect. The presence of a label in this enum **does not guarantee** it can be a prediction result. To get the definitive list of possible output labels that "Magika the tool" can return, use the `magika.get_output_content_types()` method, discussed next. ::: ### Additional APIs The `Magika` class also exposes a few helper methods: - `get_output_content_types()`: Returns a list of all possible content type labels that Magika can return in the `output.label` field. This is the recommended way to get a definitive list of Magika's possible outputs. - `get_model_content_types()`: Returns a list of all possible content type labels the _deep learning model_ can return (i.e., the possible values for `dl.label`, in addition to `undefined`). This is useful for debugging. - `get_module_version()`: Returns the `magika` Python package version as a string. - `get_model_version()`: Returns the name of the model being used as a string. ## Development setup This section is for contributors to the `magika` Python package. - **Project Management:** `magika` uses `uv` for dependency management. To install all development dependencies, run: `cd python; uv sync`. - **Testing:** To run the test suite, use `pytest`. You can exclude slow tests for faster runs: `cd python; uv run pytest tests -m "not slow"`. Refer to the GitHub Actions workflows for more testing examples. - **Packaging:** We use `maturin` to build the Python package, which combines the Rust-based CLI with the Python source code. This process is automated in our [Build and Release Python Package GitHub Action](https://github.com/google/magika/blob/main/.github/workflows/python-build-and-release-package.yml). - **Publishing:** We publish to PyPI via GitHub Trusted Publishing. This is automated by the [Build and Release Python Package GitHub Action](https://github.com/google/magika/blob/main/.github/workflows/python-build-and-release-package.yml), which publishes packages (binary wheels, pure-python wheels, and source distribution) to PyPI (or TestPyPI) after pushing a tag with `python-v*` (or `python-test-v*`) as prefix. This also takes care of attestation. ================================================ FILE: website-ng/src/content/docs/cli-and-bindings/rust.md ================================================ --- title: "Rust Library" --- The documentation for the Rust's `magika` library is on docs.rs: https://docs.rs/magika. ================================================ FILE: website-ng/src/content/docs/contributing/creating-new-bindings.md ================================================ --- title: Creating New Bindings --- These notes aim at helping bindings developers. ### Reference implementation The reference implementation is the python's `Magika` module, at `python/src/magika.py`. The input vs. expected output examples are stored in `tests_data/reference`. See below about information on the format. The reference tests are generated with `cd python && uv run ./scripts/generate_reference.py`. ### Aspects to implement There are three aspects that need to be implemented: - Logic that handles "should we even use the model"? See `_get_result_or_features_from_path`. - Features extraction. See `_extract_features_from_seekable`. - How to obtain "Magika's output" from the model's prediction, the score (which depends on the prediction mode, thresholds, and overwrite_map). See `_get_output_ct_label_from_dl_result`. ### Testing We have a number of test cases that one can use to check that a new implementation matches the reference implementation. Testing that the output (e.g., model prediction, tool overall prediction, score) of the tool matches the expectations: - We have a number of test cases that cover normal situations as well as corner cases related to small files, content types with custom thresholds and overwrite maps, and prediction modes. Note that these corner cases are model-specific (the actual weights). We use a fuzzing-like approach to generate them. - These examples are stored in two formats, "examples by path" and "examples by content". They are stored at `tests_data/reference/-inference_examples_by_content.json.gz` and `tests_data/reference/-inference_examples_by_content.json.gz`. These store a list of `ExampleByPath` and `ExampleByContent` (defined in `python/tests/test_inference_vs_reference.py`), respectively. Testing the features extraction: - Input and expected output of the features extraction: `tests_data/reference/features_extraction_examples.json.gz`. - The JSON contains a list of `FeaturesExtractionExample` (defined in `python/tests/test_features_extraction_vs_reference.py`). - Suggestion: having a testable "extract features" function makes your life much easier. - Note that end-to-end tests would not be enough to be confident the features extraction is correctly implemented, as small bugs may require VERY specific input to show differences. What is *not* covered by the existing tests: - How to deal with special files (e.g., symlinks, directory). - How to deal with `permission error`. - How to deal with `file_not_found_error`. ================================================ FILE: website-ng/src/content/docs/contributing/how-to-contribute.md ================================================ --- title: How to Contribute --- We would love to accept your patches and contributions to this project! ### Before you begin #### Sign our Contributor License Agreement Contributions to this project must be accompanied by a [Contributor License Agreement](https://cla.developers.google.com/about) (CLA). You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project. If you or your current employer have already signed the Google CLA (even if it was for a different project), you probably don't need to do it again. Visit to see your current agreements or to sign a new one. #### Review our Community Guidelines This project follows [Google's Open Source Community Guidelines](https://opensource.google/conduct/). ### Contribution process #### Code Reviews All submissions, including submissions by project members, require review. We use [GitHub pull requests](https://docs.github.com/articles/about-pull-requests) for this purpose. ================================================ FILE: website-ng/src/content/docs/contributing/known-limitations.md ================================================ --- title: Known Limitations --- Magika significantly improves over the state of the art, but there's always room for improvement. More work can be done to increase detection accuracy, support for additional content types, bindings for more languages, etc. This initial release is not targeting polyglot detection, and we're looking forward to seeing adversarial examples from the community. We would also love to hear from the community about encountered problems, misdetections, features requests, need for support for additional content types, etc. Check our open GitHub issues to see what is on our roadmap and please report misdetections or feature requests by either opening GitHub issues (preferred) or by emailing us at magika-dev@google.com. Check [GitHub's open issues labeled as "help wanted"](https://github.com/google/magika/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) as a starting point. :::caution Do **NOT** send reports about files that may contain PII! ::: ================================================ FILE: website-ng/src/content/docs/contributing/reporting-security-vulnerabilities.md ================================================ --- title: Reporting Security Vulnerabilities --- Please contact us directly at magika-dev@google.com. ================================================ FILE: website-ng/src/content/docs/core-concepts/how-magika-works.md ================================================ --- title: How Magika Works --- Magika's command-line tool and language bindings are essentially wrappers around a compact deep learning model, optimized for efficient inference on standard CPUs. The identification process is highly efficient because Magika primarily inspects a few hundred bytes of a file (depending on the model, usually up to 2K bytes). This approach ensures a fast, constant-time inference that is independent of the overall file size. The core process works as follows: 1. Magika reads a few chunks of the input file (or byte stream). This is fast and memory efficient even for big files, as they are never fully read in memory. 2. It extracts "features" from these initial bytes, which are then processed by the deep learning model to predict the content type. 3. After the model makes a prediction, Magika evaluates its confidence score. 4. If this score exceeds a predefined threshold for the predicted type, Magika accepts the model's prediction. If the confidence is too low, Magika returns a more generic label, such as `txt` (for text files) or `unknown` (for binary files). This distinction is important: Magika internally manages two content type labels—one from the deep learning model and one from **"Magika the tool."** While they are often the same, they can differ when the model's confidence is low or in certain edge cases. The model is not used in all situations. Specifically: - If the input file is **empty**, Magika returns `empty`. - If the input is not a regular file, such as a **directory** or a **symlink**, Magika returns `directory` or `symlink`. - If the file is **too small** for the model (e.g., under ~8 bytes), Magika uses simple heuristics to return a generic answer like `txt` or `unknown`. In these cases, the model is not run, and its internal content type label is set to `undefined`. By default, users only see the final, processed prediction, but the model's raw output can be inspected for debugging. See the [Understanding the Output](/magika/core-concepts/understanding-the-output/) section for details. ================================================ FILE: website-ng/src/content/docs/core-concepts/models-and-content-types.md ================================================ --- title: Models & Supported Content Types --- Each Magika model is trained to detect a specific set of content types. Newer models typically expand this set, supporting a superset of the content types from previous versions, unless specified otherwise. For instance, our initial `standard_v1` model supported approximately 100 content types. The latest model, `standard_v3_3`, supports over 200 content types, while maintaining similar accuracy and inference speed. The list of supported content types is documented in the [model's page](/magika/models/standard_v3_3). :::tip The models' READMEs contain two lists: "the output space of the model" and "the output space of Magika the tool." The second list is a superset of the first, including additional labels like `empty`, `directory`, and so on. ::: Details on the improvements and tradeoffs for each model can be found in the models' CHANGELOG on GitHub: [models/CHANGELOG.md](https://github.com/google/magika/blob/main/assets/models/CHANGELOG.md). Clients and bindings usually integrate the latest available model, but this may not always be the case. For more information, check the [bindings section](/magika/cli-and-bindings/overview). :::caution You may see a "content types knowledge base" (KB) in the GitHub source code. This is a comprehensive list of all content types we track internally for research and development. It should **not** be confused with the content types Magika currently supports. The KB is a superset of what any single model supports. To be certain which content types a specific model supports, always refer to its README file. ::: ================================================ FILE: website-ng/src/content/docs/core-concepts/prediction-modes.md ================================================ --- title: Prediction Modes --- Magika's deep learning model returns each prediction with a confidence score (from 0.0 to 1.0). A common challenge with classification models is determining the minimum score required to trust a result. Instead of a single, global threshold, Magika uses **per-content-type thresholds**. The rationale is that the model is naturally more confident about some types than others. For example, our experiments show that most valid PDFs are detected with over 99% confidence, so a prediction with an 80% score might be questionable. In contrast, an 80% score for a JavaScript file is often a very reliable prediction. Magika manages these confidence levels in two ways: - **Pre-tuned Thresholds:** Each model ships with carefully tuned, per-content-type thresholds derived from evaluating the model on our large validation dataset. - **Prediction Modes:** Because the impact of a misidentification varies by use case, Magika allows you to select a prediction mode. This lets you balance precision (accuracy of predictions) and recall (number of identified files). The available modes are `high-confidence`, `medium-confidence`, and `best-guess`. The `high-confidence` mode offers higher precision at the cost of lower recall. In contrast, `best-guess` provides the highest recall—potentially with lower precision—as it returns the model's prediction regardless of its confidence score. This can be selected via a command-line flag or as an option in the language bindings. ================================================ FILE: website-ng/src/content/docs/core-concepts/understanding-the-output.md ================================================ --- title: Understanding the Output --- Whether you use the CLI or one of the language bindings (Python, Rust, JavaScript), Magika provides the same core prediction data. While many users only need the final content type label, detailed information is always available. The CLI offers flexible output formats like JSON, and the APIs provide dedicated result objects (e.g., the Python `MagikaResult` object). The meaning of each field is best understood through an example. ```shell $ magika tests_data/basic/javascript/code.js --json [ { "path": "tests_data/basic/javascript/code.js", "result": { "status": "ok", "value": { "dl": { "description": "JavaScript source", "extensions": [ "js", "mjs", "cjs" ], "group": "code", "is_text": true, "label": "javascript", "mime_type": "application/javascript" }, "output": { "description": "JavaScript source", "extensions": [ "js", "mjs", "cjs" ], "group": "code", "is_text": true, "label": "javascript", "mime_type": "application/javascript" }, "score": 0.9710000157356262 } } } ] ``` This is how to interpret the output: - `path` is simply the file path this prediction is referring to (relevant when scanning multiple files at the same time). - `result.status` indicates whether magika was able to scan the sample. `ok` means all was good, in which case a `value` field is present with the details about the output. - `score` indicates the confidence of the prediction. - the `dl` block returns information about the prediction with the deep learning model. In this case, the model predicted `javascript`. - the `output` block returns information about the prediction of "Magika the tool", which, as discuss in previous sections, considers a number of aspects such as the prediction of the deep learning model, its confidence score, and the selected prediction mode. In the example above, the model's confidence was high enough to be trustworthy, and thus the output of the "Magika the tool" matches the content type inferred by the deep learning model. - the `dl` and `output` blocks contain a number of metadata about the predicted content type, such as a simple textual label suitable for automated processing (`label`), a human-readable description (`description`), MIME Type (`mime_type`), a list of extensions usually associated with the predicted content type (`extensions`), a high-level group (`group`), and a boolean that indicates whether the type is textual or not (`is_text`). Here is how to interpret the output: - `path`: The file path corresponding to this prediction. - `result.status`: `ok` indicates a successful scan. If the status is not `ok`, the `value` field will be absent. The `value` field is present on successful scans and contains the following details: - `score`: The model's confidence in this prediction. - `dl`: Contains the raw prediction from the deep learning model. - `output`: Contains the final prediction from "Magika the tool." This result considers the model's prediction, its confidence score, and the selected prediction mode. In this example, the model's confidence was high, so the final output matches the model's prediction. Within both `dl` and `output`, you will find: - `label`: A simple, machine-readable content type label (e.g., `javascript`). The possible values for `dl.label` and `output.label` are documented in each model's README. - `description`: A human-readable description. - `mime_type`: The corresponding MIME type. - `group`: A high-level category (e.g., code, document, media). - `is_text`: A boolean indicating if the content is textual. - `extensions`: A list of common file extensions for this content type. As mentioned previously, when the model is not used (e.g., for empty files), `dl.label` is set to `undefined`, and the output block will contain a generic content type like `txt` or `unknown`. For most applications, you should use the `output.label` field, which is the default output of the CLI. The raw `dl` block is provided primarily for debugging and advanced use cases. See also the [FAQ](/magika/additional-resources/faq) for why it is best to integrate Magika's results by focusing on label rather than other fields like `mime_type`. ================================================ FILE: website-ng/src/content/docs/demo/magika-demo.mdx ================================================ --- title: "Magika Demo" --- import { Card } from '@astrojs/starlight/components'; import MagikaDemo from '@/components/MagikaDemo.svelte'; ================================================ FILE: website-ng/src/content/docs/getting-started/installation.mdx ================================================ --- title: Installation --- import { Tabs, TabItem } from '@astrojs/starlight/components'; ### Command Line Tool If you intend to use Magika only as a command line, you can install it in a number of ways: ```shell pipx install magika ``` ```shell curl -LsSf https://securityresearch.google/magika/install.sh | sh ``` ```shell wget -qO- https://securityresearch.google/magika/install.sh | sh ``` ```shell powershell -ExecutionPolicy Bypass -c "irm https://securityresearch.google/magika/install.ps1 | iex" ``` ```shell brew install magika ``` ### Python package Magika python package is available as `magika` on PyPI: ```shell pip install magika ``` If you want to test out the latest release candidate from pypi, you can install it with ```shell pip install --pre magika ``` ### JavaScript Package JavaScript / TypeScript bindings can be installed with npm: ```shell npm install magika ``` ### Docker If you want to test Magika's latest version (from git) within a Docker container, you can run: ```shell git clone https://github.com/google/magika cd magika/ docker build -t magika . docker run -it --rm -v $(pwd):/magika magika -r /magika/tests_data/basic ``` ================================================ FILE: website-ng/src/content/docs/getting-started/quick-start.md ================================================ --- title: Quick Start --- Here you can find a number of quick examples just to get you started. To learn about Magika's inner workings, see the [Core Concepts](/magika/core-concepts/how-magika-works) section. ### Command Line Tool Examples ```shell % cd tests_data/basic && magika -r * | head asm/code.asm: Assembly (code) batch/simple.bat: DOS batch file (code) c/code.c: C source (code) css/code.css: CSS source (code) csv/magika_test.csv: CSV document (code) dockerfile/Dockerfile: Dockerfile (code) docx/doc.docx: Microsoft Word 2007+ document (document) docx/magika_test.docx: Microsoft Word 2007+ document (document) eml/sample.eml: RFC 822 mail (text) empty/empty_file: Empty file (inode) ``` ```shell % magika ./tests_data/basic/python/code.py --json [ { "path": "./tests_data/basic/python/code.py", "result": { "status": "ok", "value": { "dl": { "description": "Python source", "extensions": [ "py", "pyi" ], "group": "code", "is_text": true, "label": "python", "mime_type": "text/x-python" }, "output": { "description": "Python source", "extensions": [ "py", "pyi" ], "group": "code", "is_text": true, "label": "python", "mime_type": "text/x-python" }, "score": 0.996999979019165 } } } ] ``` ```shell % cat tests_data/basic/ini/doc.ini | magika - -: INI configuration file (text) ``` ```shell % magika --help Determines file content types using AI Usage: magika [OPTIONS] [PATH]... Arguments: [PATH]... List of paths to the files to analyze. Use a dash (-) to read from standard input (can only be used once). Options: -r, --recursive Identifies files within directories instead of identifying the directory itself --no-dereference Identifies symbolic links as is instead of identifying their content by following them --colors Prints with colors regardless of terminal support --no-colors Prints without colors regardless of terminal support -s, --output-score Prints the prediction score in addition to the content type -i, --mime-type Prints the MIME type instead of the content type description -l, --label Prints a simple label instead of the content type description --json Prints in JSON format --jsonl Prints in JSONL format --format Prints using a custom format (use --help for details). The following placeholders are supported: %p The file path %l The unique label identifying the content type %d The description of the content type %g The group of the content type %m The MIME type of the content type %e Possible file extensions for the content type %s The score of the content type for the file %S The score of the content type for the file in percent %b The model output if overruled (empty otherwise) %% A literal % -h, --help Print help (see a summary with '-h') -V, --version Print version ``` For more examples and documentation about the CLI, see https://crates.io/crates/magika-cli. ### Python Examples ```python >>> from magika import Magika >>> m = Magika() >>> res = m.identify_bytes(b'function log(msg) {console.log(msg);}') >>> print(res.output.label) javascript ``` ```python >>> from magika import Magika >>> m = Magika() >>> res = m.identify_path('./tests_data/basic/ini/doc.ini') >>> print(res.output.label) ini ``` ```python >>> from magika import Magika >>> m = Magika() >>> with open('./tests_data/basic/ini/doc.ini', 'rb') as f: >>> res = m.identify_stream(f) >>> print(res.output.label) ini ``` For more examples and documentation about the Python module, see the [Python `Magika` module](/magika/cli-and-bindings/python) section. ================================================ FILE: website-ng/src/content/docs/index.mdx ================================================ --- title: Magika description: Magika landing page template: splash --- ================================================ FILE: website-ng/src/content/docs/introduction/overview.md ================================================ --- title: Magika --- Magika is a novel AI-powered file type detection tool that relies on the recent advance of deep learning to provide accurate detection. Under the hood, Magika employs a custom, highly optimized model that only weighs about a few MBs, and enables precise file identification within milliseconds, even when running on a single CPU. Magika has been trained and evaluated on a dataset of ~100M samples across 200+ content types (covering both binary and textual file formats), and it achieves an average ~99% accuracy on our test set. Here is an example of what Magika command line output looks like:

Magika is used at scale to help improve Google users' safety by routing Gmail, Drive, and Safe Browsing files to the proper security and content policy scanners, processing hundreds billions samples on a weekly basis. Magika has also been integrated with [VirusTotal](https://www.virustotal.com/) ([example](/magika/magika-vt.png)) and [abuse.ch](https://bazaar.abuse.ch/) ([example](/magika/magika-abusech.png)). For more context you can read our initial [announcement post on Google's OSS blog](https://opensource.googleblog.com/2024/02/magika-ai-powered-fast-and-efficient-file-type-identification.html), and you can read more in our [research paper](/magika/additional-resources/research-papers-and-citation), published at the IEEE/ACM International Conference on Software Engineering (ICSE) 2025. You can try Magika without installing anything by using our [web demo](/magika/demo/magika-demo), which runs locally in your browser! ### Highlights - Available as a command line tool written in Rust, a Python API, and additional bindings for Rust, JavaScript/TypeScript (with an experimental npm package (which powers the [web demo](/magika/demo/magika-demo)), and GoLang (WIP). - Trained and evaluated on a dataset of ~100M files across [200+ content types](/magika/models/standard_v3_3). - On our test set, Magika achieves ~99% average precision and recall, outperforming existing approaches -- especially on textual content types. - After the model is loaded (which is a one-off overhead), the inference time is about 5ms per file, even when run on a single CPU. - You can invoke Magika with even thousands of files at the same time. You can also use `-r` for recursively scanning a directory. - Near-constant inference time, independently from the file size; Magika only uses a limited subset of the file's content. - Magika uses a pre-tuned, per-content-type threshold system that determines whether to "trust" the prediction for the model, or whether to return a generic label, such as "Generic text document" or "Unknown binary data". - The tolerance to errors can be controlled via different [prediction modes](/magika/core-concepts/prediction-modes). - Support for all major operating systems. - The client and the bindings are already open source, and more is coming soon! ================================================ FILE: website-ng/src/content/docs/models/standard_v3_3.md ================================================ --- title: standard_v3_3 model --- This documents the `standard_v3_3` model. ## List of possible outputs This is the full list of all possible Magika's outputs (which is different than the list of possible outputs of the underlying model; see the following section for such list, and consult the [Core Concepts](/magika/core-concepts/how-magika-works/) section for more context). E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.output.label`. | Content Type Label | Description | |:-------------:|------| | 3gp | 3GPP multimedia file | | ace | ACE archive | | ai | Adobe Illustrator Artwork | | aidl | Android Interface Definition Language | | apk | Android package | | applebplist | Apple binary property list | | appleplist | Apple property list | | asm | Assembly | | asp | ASP source | | autohotkey | AutoHotKey script | | autoit | AutoIt script | | awk | Awk | | batch | DOS batch file | | bazel | Bazel build file | | bib | BibTeX | | bmp | BMP image data | | bzip | bzip2 compressed data | | c | C source | | cab | Microsoft Cabinet archive data | | cat | Windows Catalog file | | chm | MS Windows HtmlHelp Data | | clojure | Clojure | | cmake | CMake build file | | cobol | Cobol | | coff | Intel 80386 COFF | | coffeescript | CoffeeScript | | cpp | C++ source | | crt | Certificates (binary format) | | crx | Google Chrome extension | | cs | C# source | | csproj | .NET project config | | css | CSS source | | csv | CSV document | | dart | Dart source | | deb | Debian binary package | | dex | Dalvik dex file | | dicom | DICOM | | diff | Diff file | | directory | A directory | | dm | Dream Maker | | dmg | Apple disk image | | doc | Microsoft Word CDF document | | dockerfile | Dockerfile | | docx | Microsoft Word 2007+ document | | dsstore | Application Desktop Services Store | | dwg | Autocad Drawing | | dxf | Audocad Drawing Exchange Format | | elf | ELF executable | | elixir | Elixir script | | emf | Windows Enhanced Metafile image data | | eml | RFC 822 mail | | empty | Empty file | | epub | EPUB document | | erb | Embedded Ruby source | | erlang | Erlang source | | flac | FLAC audio bitstream data | | flv | Flash Video | | fortran | Fortran | | gemfile | Gemfile file | | gemspec | Gemspec file | | gif | GIF image data | | gitattributes | Gitattributes file | | gitmodules | Gitmodules file | | go | Golang source | | gradle | Gradle source | | groovy | Groovy source | | gzip | gzip compressed data | | h5 | Hierarchical Data Format v5 | | handlebars | Handlebars source | | haskell | Haskell source | | hcl | HashiCorp configuration language | | hlp | MS Windows help | | htaccess | Apache access configuration | | html | HTML document | | icns | Mac OS X icon | | ico | MS Windows icon resource | | ics | Internet Calendaring and Scheduling | | ignorefile | Ignorefile | | ini | INI configuration file | | internetshortcut | MS Windows Internet shortcut | | ipynb | Jupyter notebook | | iso | ISO 9660 CD-ROM filesystem data | | jar | Java archive data (JAR) | | java | Java source | | javabytecode | Java compiled bytecode | | javascript | JavaScript source | | jinja | Jinja template | | jp2 | jpeg2000 | | jpeg | JPEG image data | | json | JSON document | | jsonl | JSONL document | | julia | Julia source | | kotlin | Kotlin source | | latex | LaTeX document | | lha | LHarc archive | | lisp | Lisp source | | lnk | MS Windows shortcut | | lua | Lua | | m3u | M3U playlist | | m4 | GNU Macro | | macho | Mach-O executable | | makefile | Makefile source | | markdown | Markdown document | | matlab | Matlab Source | | mht | MHTML document | | midi | Midi | | mkv | Matroska | | mp3 | MP3 media file | | mp4 | MP4 media file | | mscompress | MS Compress archive data | | msi | Microsoft Installer file | | mum | Windows Update Package file | | npy | Numpy Array | | npz | Numpy Arrays Archive | | nupkg | NuGet Package | | objectivec | ObjectiveC source | | ocaml | OCaml | | odp | OpenDocument Presentation | | ods | OpenDocument Spreadsheet | | odt | OpenDocument Text | | ogg | Ogg data | | one | One Note | | onnx | Open Neural Network Exchange | | otf | OpenType font | | outlook | MS Outlook Message | | parquet | Apache Parquet | | pascal | Pascal source | | pcap | pcap capture file | | pdb | Windows Program Database | | pdf | PDF document | | pebin | PE Windows executable | | pem | PEM certificate | | perl | Perl source | | php | PHP source | | pickle | Python pickle | | png | PNG image | | po | Portable Object (PO) for i18n | | postscript | PostScript document | | powershell | Powershell source | | ppt | Microsoft PowerPoint CDF document | | pptx | Microsoft PowerPoint 2007+ document | | prolog | Prolog source | | proteindb | Protein DB | | proto | Protocol buffer definition | | psd | Adobe Photoshop | | python | Python source | | pythonbytecode | Python compiled bytecode | | pytorch | Pytorch storage file | | qt | QuickTime | | r | R (language) | | rar | RAR archive data | | rdf | Resource Description Framework document (RDF) | | rpm | RedHat Package Manager archive (RPM) | | rst | ReStructuredText document | | rtf | Rich Text Format document | | ruby | Ruby source | | rust | Rust source | | scala | Scala source | | scss | SCSS source | | sevenzip | 7-zip archive data | | sgml | sgml | | shell | Shell script | | smali | Smali source | | snap | Snap archive | | solidity | Solidity source | | sql | SQL source | | sqlite | SQLITE database | | squashfs | Squash filesystem | | srt | SubRip Text Format | | stlbinary | Stereolithography CAD (binary) | | stltext | Stereolithography CAD (text) | | sum | Checksum file | | svg | SVG Scalable Vector Graphics image data | | swf | Small Web File | | swift | Swift | | symlink | Symbolic link | | tar | POSIX tar archive | | tcl | Tickle | | textproto | Text protocol buffer | | tga | Targa image data | | thumbsdb | Windows thumbnail cache | | tiff | TIFF image data | | toml | Tom's obvious, minimal language | | torrent | BitTorrent file | | tsv | TSV document | | ttf | TrueType Font data | | twig | Twig template | | txt | Generic text document | | typescript | TypeScript source | | unknown | Unknown binary data | | vba | MS Visual Basic source (VBA) | | vcxproj | Visual Studio MSBuild project | | verilog | Verilog source | | vhdl | VHDL source | | vtt | Web Video Text Tracks | | vue | Vue source | | wasm | Web Assembly | | wav | Waveform Audio file (WAV) | | webm | WebM media file | | webp | WebP media file | | winregistry | Windows Registry text | | wmf | Windows metafile | | woff | Web Open Font Format | | woff2 | Web Open Font Format v2 | | xar | XAR archive compressed data | | xls | Microsoft Excel CDF document | | xlsb | Microsoft Excel 2007+ document (binary format) | | xlsx | Microsoft Excel 2007+ document | | xml | XML document | | xpi | Compressed installation archive (XPI) | | xz | XZ compressed data | | yaml | YAML source | | yara | YARA rule | | zig | Zig source | | zip | Zip archive data | | zlibstream | zlib compressed data | ## List of possible model's outputs This is the full list of all possible model's output. E.g., this is the list of all possible values for Magika python module's `MagikaResult.prediction.dl.label`. Note that, in general, the list of "model outputs" is different than the "tool outputs" as in some cases the model is not even used. Consult the [Core Concepts](/magika/core-concepts/how-magika-works/) section for more context. This list is useful mostly for debugging purposes; the vast majority of clients should just consult the table in the section above. | Content Type Label | Description | |:-------------:|------| | 3gp | 3GPP multimedia file | | ace | ACE archive | | ai | Adobe Illustrator Artwork | | aidl | Android Interface Definition Language | | apk | Android package | | applebplist | Apple binary property list | | appleplist | Apple property list | | asm | Assembly | | asp | ASP source | | autohotkey | AutoHotKey script | | autoit | AutoIt script | | awk | Awk | | batch | DOS batch file | | bazel | Bazel build file | | bib | BibTeX | | bmp | BMP image data | | bzip | bzip2 compressed data | | c | C source | | cab | Microsoft Cabinet archive data | | cat | Windows Catalog file | | chm | MS Windows HtmlHelp Data | | clojure | Clojure | | cmake | CMake build file | | cobol | Cobol | | coff | Intel 80386 COFF | | coffeescript | CoffeeScript | | cpp | C++ source | | crt | Certificates (binary format) | | crx | Google Chrome extension | | cs | C# source | | csproj | .NET project config | | css | CSS source | | csv | CSV document | | dart | Dart source | | deb | Debian binary package | | dex | Dalvik dex file | | dicom | DICOM | | diff | Diff file | | dm | Dream Maker | | dmg | Apple disk image | | doc | Microsoft Word CDF document | | dockerfile | Dockerfile | | docx | Microsoft Word 2007+ document | | dsstore | Application Desktop Services Store | | dwg | Autocad Drawing | | dxf | Audocad Drawing Exchange Format | | elf | ELF executable | | elixir | Elixir script | | emf | Windows Enhanced Metafile image data | | eml | RFC 822 mail | | epub | EPUB document | | erb | Embedded Ruby source | | erlang | Erlang source | | flac | FLAC audio bitstream data | | flv | Flash Video | | fortran | Fortran | | gemfile | Gemfile file | | gemspec | Gemspec file | | gif | GIF image data | | gitattributes | Gitattributes file | | gitmodules | Gitmodules file | | go | Golang source | | gradle | Gradle source | | groovy | Groovy source | | gzip | gzip compressed data | | h5 | Hierarchical Data Format v5 | | handlebars | Handlebars source | | haskell | Haskell source | | hcl | HashiCorp configuration language | | hlp | MS Windows help | | htaccess | Apache access configuration | | html | HTML document | | icns | Mac OS X icon | | ico | MS Windows icon resource | | ics | Internet Calendaring and Scheduling | | ignorefile | Ignorefile | | ini | INI configuration file | | internetshortcut | MS Windows Internet shortcut | | ipynb | Jupyter notebook | | iso | ISO 9660 CD-ROM filesystem data | | jar | Java archive data (JAR) | | java | Java source | | javabytecode | Java compiled bytecode | | javascript | JavaScript source | | jinja | Jinja template | | jp2 | jpeg2000 | | jpeg | JPEG image data | | json | JSON document | | jsonl | JSONL document | | julia | Julia source | | kotlin | Kotlin source | | latex | LaTeX document | | lha | LHarc archive | | lisp | Lisp source | | lnk | MS Windows shortcut | | lua | Lua | | m3u | M3U playlist | | m4 | GNU Macro | | macho | Mach-O executable | | makefile | Makefile source | | markdown | Markdown document | | matlab | Matlab Source | | mht | MHTML document | | midi | Midi | | mkv | Matroska | | mp3 | MP3 media file | | mp4 | MP4 media file | | mscompress | MS Compress archive data | | msi | Microsoft Installer file | | mum | Windows Update Package file | | npy | Numpy Array | | npz | Numpy Arrays Archive | | nupkg | NuGet Package | | objectivec | ObjectiveC source | | ocaml | OCaml | | odp | OpenDocument Presentation | | ods | OpenDocument Spreadsheet | | odt | OpenDocument Text | | ogg | Ogg data | | one | One Note | | onnx | Open Neural Network Exchange | | otf | OpenType font | | outlook | MS Outlook Message | | parquet | Apache Parquet | | pascal | Pascal source | | pcap | pcap capture file | | pdb | Windows Program Database | | pdf | PDF document | | pebin | PE Windows executable | | pem | PEM certificate | | perl | Perl source | | php | PHP source | | pickle | Python pickle | | png | PNG image | | po | Portable Object (PO) for i18n | | postscript | PostScript document | | powershell | Powershell source | | ppt | Microsoft PowerPoint CDF document | | pptx | Microsoft PowerPoint 2007+ document | | prolog | Prolog source | | proteindb | Protein DB | | proto | Protocol buffer definition | | psd | Adobe Photoshop | | python | Python source | | pythonbytecode | Python compiled bytecode | | pytorch | Pytorch storage file | | qt | QuickTime | | r | R (language) | | randombytes | Random bytes | | randomtxt | Random text | | rar | RAR archive data | | rdf | Resource Description Framework document (RDF) | | rpm | RedHat Package Manager archive (RPM) | | rst | ReStructuredText document | | rtf | Rich Text Format document | | ruby | Ruby source | | rust | Rust source | | scala | Scala source | | scss | SCSS source | | sevenzip | 7-zip archive data | | sgml | sgml | | shell | Shell script | | smali | Smali source | | snap | Snap archive | | solidity | Solidity source | | sql | SQL source | | sqlite | SQLITE database | | squashfs | Squash filesystem | | srt | SubRip Text Format | | stlbinary | Stereolithography CAD (binary) | | stltext | Stereolithography CAD (text) | | sum | Checksum file | | svg | SVG Scalable Vector Graphics image data | | swf | Small Web File | | swift | Swift | | tar | POSIX tar archive | | tcl | Tickle | | textproto | Text protocol buffer | | tga | Targa image data | | thumbsdb | Windows thumbnail cache | | tiff | TIFF image data | | toml | Tom's obvious, minimal language | | torrent | BitTorrent file | | tsv | TSV document | | ttf | TrueType Font data | | twig | Twig template | | txt | Generic text document | | typescript | TypeScript source | | undefined | Undefined | | vba | MS Visual Basic source (VBA) | | vcxproj | Visual Studio MSBuild project | | verilog | Verilog source | | vhdl | VHDL source | | vtt | Web Video Text Tracks | | vue | Vue source | | wasm | Web Assembly | | wav | Waveform Audio file (WAV) | | webm | WebM media file | | webp | WebP media file | | winregistry | Windows Registry text | | wmf | Windows metafile | | woff | Web Open Font Format | | woff2 | Web Open Font Format v2 | | xar | XAR archive compressed data | | xls | Microsoft Excel CDF document | | xlsb | Microsoft Excel 2007+ document (binary format) | | xlsx | Microsoft Excel 2007+ document | | xml | XML document | | xpi | Compressed installation archive (XPI) | | xz | XZ compressed data | | yaml | YAML source | | yara | YARA rule | | zig | Zig source | | zip | Zip archive data | | zlibstream | zlib compressed data | ================================================ FILE: website-ng/src/content.config.ts ================================================ import { defineCollection } from 'astro:content'; import { docsLoader } from '@astrojs/starlight/loaders'; import { docsSchema } from '@astrojs/starlight/schema'; export const collections = { docs: defineCollection({ loader: docsLoader(), schema: docsSchema() }), }; ================================================ FILE: website-ng/src/lib/components/ui/button/button.svelte ================================================ {#if href} {@render children?.()} {:else} {/if} ================================================ FILE: website-ng/src/lib/components/ui/button/index.ts ================================================ import Root, { type ButtonProps, type ButtonSize, type ButtonVariant, buttonVariants, } from "./button.svelte"; export { Root, type ButtonProps as Props, // Root as Button, buttonVariants, type ButtonProps, type ButtonSize, type ButtonVariant, }; ================================================ FILE: website-ng/src/lib/components/ui/card/card-action.svelte ================================================
{@render children?.()}
================================================ FILE: website-ng/src/lib/components/ui/card/card-content.svelte ================================================
{@render children?.()}
================================================ FILE: website-ng/src/lib/components/ui/card/card-description.svelte ================================================

{@render children?.()}

================================================ FILE: website-ng/src/lib/components/ui/card/card-footer.svelte ================================================
{@render children?.()}
================================================ FILE: website-ng/src/lib/components/ui/card/card-header.svelte ================================================
{@render children?.()}
================================================ FILE: website-ng/src/lib/components/ui/card/card-title.svelte ================================================
{@render children?.()}
================================================ FILE: website-ng/src/lib/components/ui/card/card.svelte ================================================
{@render children?.()}
================================================ FILE: website-ng/src/lib/components/ui/card/index.ts ================================================ import Root from "./card.svelte"; import Content from "./card-content.svelte"; import Description from "./card-description.svelte"; import Footer from "./card-footer.svelte"; import Header from "./card-header.svelte"; import Title from "./card-title.svelte"; import Action from "./card-action.svelte"; export { Root, Content, Description, Footer, Header, Title, Action, // Root as Card, Content as CardContent, Description as CardDescription, Footer as CardFooter, Header as CardHeader, Title as CardTitle, Action as CardAction, }; ================================================ FILE: website-ng/src/lib/components/ui/file-drop-zone/file-drop-zone.svelte ================================================ ================================================ FILE: website-ng/src/lib/components/ui/file-drop-zone/index.ts ================================================ /* Installed from @ieedan/shadcn-svelte-extras */ import FileDropZone from "./file-drop-zone.svelte"; import { type FileRejectedReason, type FileDropZoneProps } from "./types"; export const displaySize = (bytes: number): string => { if (bytes < KILOBYTE) return `${bytes.toFixed(0)} B`; if (bytes < MEGABYTE) return `${(bytes / KILOBYTE).toFixed(0)} KB`; if (bytes < GIGABYTE) return `${(bytes / MEGABYTE).toFixed(0)} MB`; return `${(bytes / GIGABYTE).toFixed(0)} GB`; }; // Utilities for working with file sizes export const BYTE = 1; export const KILOBYTE = 1024; export const MEGABYTE = 1024 * KILOBYTE; export const GIGABYTE = 1024 * MEGABYTE; // utilities for limiting accepted files export const ACCEPT_IMAGE = "image/*"; export const ACCEPT_VIDEO = "video/*"; export const ACCEPT_AUDIO = "audio/*"; export { FileDropZone, type FileRejectedReason, type FileDropZoneProps }; ================================================ FILE: website-ng/src/lib/components/ui/file-drop-zone/types.ts ================================================ /* Installed from @ieedan/shadcn-svelte-extras */ import type { WithChildren } from "bits-ui"; import type { HTMLInputAttributes } from "svelte/elements"; export type FileRejectedReason = | "Maximum file size exceeded" | "File type not allowed" | "Maximum files uploaded"; export type FileDropZonePropsWithoutHTML = WithChildren<{ ref?: HTMLInputElement | null; /** Called with the uploaded files when the user drops or clicks and selects their files. * * @param files */ onUpload: (files: File[]) => Promise; /** The maximum amount files allowed to be uploaded */ maxFiles?: number; fileCount?: number; /** The maximum size of a file in bytes */ maxFileSize?: number; /** Called when a file does not meet the upload criteria (size, or type) */ onFileRejected?: (opts: { reason: FileRejectedReason; file: File }) => void; // just for extra documentation /** Takes a comma separated list of one or more file types. * * [MDN Reference](https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/accept) * * ### Usage * ```svelte * * ``` * * ### Common Values * ```svelte * * * * ``` */ accept?: string; }>; export type FileDropZoneProps = FileDropZonePropsWithoutHTML & Omit; ================================================ FILE: website-ng/src/lib/components/ui/input/index.ts ================================================ import Root from "./input.svelte"; export { Root, // Root as Input, }; ================================================ FILE: website-ng/src/lib/components/ui/input/input.svelte ================================================ {#if type === "file"} {:else} {/if} ================================================ FILE: website-ng/src/lib/components/ui/label/index.ts ================================================ import Root from "./label.svelte"; export { Root, // Root as Label, }; ================================================ FILE: website-ng/src/lib/components/ui/label/label.svelte ================================================ ================================================ FILE: website-ng/src/lib/components/ui/progress/index.ts ================================================ import Root from "./progress.svelte"; export { Root, // Root as Progress, }; ================================================ FILE: website-ng/src/lib/components/ui/progress/progress.svelte ================================================
{#if showPercentage}
{percentage}%
{/if}
================================================ FILE: website-ng/src/lib/components/ui/tabs/index.ts ================================================ import Root from "./tabs.svelte"; import Content from "./tabs-content.svelte"; import List from "./tabs-list.svelte"; import Trigger from "./tabs-trigger.svelte"; export { Root, Content, List, Trigger, // Root as Tabs, Content as TabsContent, List as TabsList, Trigger as TabsTrigger, }; ================================================ FILE: website-ng/src/lib/components/ui/tabs/tabs-content.svelte ================================================ ================================================ FILE: website-ng/src/lib/components/ui/tabs/tabs-list.svelte ================================================ ================================================ FILE: website-ng/src/lib/components/ui/tabs/tabs-trigger.svelte ================================================ ================================================ FILE: website-ng/src/lib/components/ui/tabs/tabs.svelte ================================================ ================================================ FILE: website-ng/src/lib/components/ui/textarea/index.ts ================================================ import Root from "./textarea.svelte"; export { Root, // Root as Textarea, }; ================================================ FILE: website-ng/src/lib/components/ui/textarea/textarea.svelte ================================================ ================================================ FILE: website-ng/src/lib/utils/utils.ts ================================================ /* Installed from @ieedan/shadcn-svelte-extras */ import { type ClassValue, clsx } from "clsx"; import { twMerge } from "tailwind-merge"; export function cn(...inputs: ClassValue[]) { return twMerge(clsx(inputs)); } // eslint-disable-next-line @typescript-eslint/no-explicit-any export type WithoutChild = T extends { child?: any } ? Omit : T; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type WithoutChildren = T extends { children?: any } ? Omit : T; export type WithoutChildrenOrChild = WithoutChildren>; export type WithElementRef = T & { ref?: U | null; }; ================================================ FILE: website-ng/src/lib/utils.ts ================================================ import { clsx, type ClassValue } from "clsx"; import { twMerge } from "tailwind-merge"; export function cn(...inputs: ClassValue[]) { return twMerge(clsx(inputs)); } // eslint-disable-next-line @typescript-eslint/no-explicit-any export type WithoutChild = T extends { child?: any } ? Omit : T; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type WithoutChildren = T extends { children?: any } ? Omit : T; export type WithoutChildrenOrChild = WithoutChildren>; export type WithElementRef = T & { ref?: U | null }; ================================================ FILE: website-ng/src/pages/install.ps1.ts ================================================ import type { APIContext } from "astro"; // We don't want a redirection page, so we disable prerendering. export const prerender = false; export async function GET({ redirect }: APIContext) { console.log("Redirecting to latest installer script"); return redirect( "https://github.com/google/magika/releases/download/cli-latest/magika-installer.ps1", 302 ); } ================================================ FILE: website-ng/src/pages/install.sh.ts ================================================ import type { APIContext } from "astro"; // We don't want a redirection page, so we disable prerendering. export const prerender = false; export async function GET({ redirect }: APIContext) { console.log("Redirecting to latest installer script"); return redirect( "https://github.com/google/magika/releases/download/cli-latest/magika-installer.sh", 302 ); } ================================================ FILE: website-ng/src/styles/global.css ================================================ /* Custom fonts */ @import url("https://fonts.googleapis.com/css?family=Google+Sans+Text:300,400,700&display=swap"); @import url("https://fonts.googleapis.com/css?family=Google+Sans&display=swap"); /* Starlight Global Styles */ @import "@astrojs/starlight-tailwind"; @import "tailwindcss"; @import "tailwindcss/theme.css" layer(theme); @import "tailwindcss/utilities.css" layer(utilities); /* Shadcn UI + Tailwind CSS + Animate.css */ @import "tw-animate-css"; /* Shadcn UI Theming */ @custom-variant dark (&:is(.dark *)); :root { --radius: 0.625rem; --background: oklch(1 0 0); --foreground: oklch(0.145 0 0); --card: oklch(1 0 0); --card-foreground: oklch(0.145 0 0); --popover: oklch(1 0 0); --popover-foreground: oklch(0.145 0 0); --primary: oklch(0.205 0 0); --primary-foreground: oklch(0.985 0 0); --secondary: oklch(0.97 0 0); --secondary-foreground: oklch(0.205 0 0); --muted: oklch(0.97 0 0); --muted-foreground: oklch(0.556 0 0); --accent: oklch(0.97 0 0); --accent-foreground: oklch(0.205 0 0); --destructive: oklch(0.577 0.245 27.325); --border: oklch(0.922 0 0); --input: oklch(0.922 0 0); --ring: oklch(0.708 0 0); --chart-1: oklch(0.646 0.222 41.116); --chart-2: oklch(0.6 0.118 184.704); --chart-3: oklch(0.398 0.07 227.392); --chart-4: oklch(0.828 0.189 84.429); --chart-5: oklch(0.769 0.188 70.08); --sidebar: oklch(0.985 0 0); --sidebar-foreground: oklch(0.145 0 0); --sidebar-primary: oklch(0.205 0 0); --sidebar-primary-foreground: oklch(0.985 0 0); --sidebar-accent: oklch(0.97 0 0); --sidebar-accent-foreground: oklch(0.205 0 0); --sidebar-border: oklch(0.922 0 0); --sidebar-ring: oklch(0.708 0 0); } .dark { --background: oklch(0.145 0 0); --foreground: oklch(0.985 0 0); --card: oklch(0.205 0 0); --card-foreground: oklch(0.985 0 0); --popover: oklch(0.205 0 0); --popover-foreground: oklch(0.985 0 0); --primary: oklch(0.922 0 0); --primary-foreground: oklch(0.205 0 0); --secondary: oklch(0.269 0 0); --secondary-foreground: oklch(0.985 0 0); --muted: oklch(0.269 0 0); --muted-foreground: oklch(0.708 0 0); --accent: oklch(0.269 0 0); --accent-foreground: oklch(0.985 0 0); --destructive: oklch(0.704 0.191 22.216); --border: oklch(1 0 0 / 10%); --input: oklch(1 0 0 / 15%); --ring: oklch(0.556 0 0); --chart-1: oklch(0.488 0.243 264.376); --chart-2: oklch(0.696 0.17 162.48); --chart-3: oklch(0.769 0.188 70.08); --chart-4: oklch(0.627 0.265 303.9); --chart-5: oklch(0.645 0.246 16.439); --sidebar: oklch(0.205 0 0); --sidebar-foreground: oklch(0.985 0 0); --sidebar-primary: oklch(0.488 0.243 264.376); --sidebar-primary-foreground: oklch(0.985 0 0); --sidebar-accent: oklch(0.269 0 0); --sidebar-accent-foreground: oklch(0.985 0 0); --sidebar-border: oklch(1 0 0 / 10%); --sidebar-ring: oklch(0.556 0 0); } @theme inline { --radius-sm: calc(var(--radius) - 4px); --radius-md: calc(var(--radius) - 2px); --radius-lg: var(--radius); --radius-xl: calc(var(--radius) + 4px); --color-background: var(--background); --color-foreground: var(--foreground); --color-card: var(--card); --color-card-foreground: var(--card-foreground); --color-popover: var(--popover); --color-popover-foreground: var(--popover-foreground); --color-primary: var(--primary); --color-primary-foreground: var(--primary-foreground); --color-secondary: var(--secondary); --color-secondary-foreground: var(--secondary-foreground); --color-muted: var(--muted); --color-muted-foreground: var(--muted-foreground); --color-accent: var(--accent); --color-accent-foreground: var(--accent-foreground); --color-destructive: var(--destructive); --color-border: var(--border); --color-input: var(--input); --color-ring: var(--ring); --color-chart-1: var(--chart-1); --color-chart-2: var(--chart-2); --color-chart-3: var(--chart-3); --color-chart-4: var(--chart-4); --color-chart-5: var(--chart-5); --color-sidebar: var(--sidebar); --color-sidebar-foreground: var(--sidebar-foreground); --color-sidebar-primary: var(--sidebar-primary); --color-sidebar-primary-foreground: var(--sidebar-primary-foreground); --color-sidebar-accent: var(--sidebar-accent); --color-sidebar-accent-foreground: var(--sidebar-accent-foreground); --color-sidebar-border: var(--sidebar-border); --color-sidebar-ring: var(--sidebar-ring); } @layer base { * { @apply border-border outline-ring/50; } body { @apply bg-background text-foreground; } } @layer base, starlight, theme, components, utilities; /* Show lists correctly */ .sl-container ul { list-style: disc; } .sl-container ol { list-style: decimal; } .sl-container starlight-toc nav ul { list-style: none; } ================================================ FILE: website-ng/svelte.config.js ================================================ export default { compilerOptions: { experimental: { async: true } } }; ================================================ FILE: website-ng/tsconfig.json ================================================ { "extends": "astro/tsconfigs/strict", "include": [ ".astro/types.d.ts", "**/*" ], "exclude": [ "dist" ], "compilerOptions": { "jsx": "preserve", "baseUrl": "./", "paths": { "@/*": ["src/*"] } } }