Showing preview only (1,958K chars total). Download the full file or copy to clipboard to get everything.
Repository: google/magika
Branch: main
Commit: fece437bf08d
Files: 432
Total size: 1.8 MB
Directory structure:
gitextract_9puqy48w/
├── .cargo/
│ └── config.toml
├── .dockerignore
├── .gemini/
│ └── config.yaml
├── .gitattributes
├── .github/
│ ├── CODEOWNERS
│ ├── ISSUE_TEMPLATE/
│ │ ├── misdetection.md
│ │ └── new_content_type_request.md
│ ├── dependabot.yml
│ ├── labeler.yml
│ ├── scorecard.yml
│ └── workflows/
│ ├── cli-latest.yml
│ ├── cli-release.yml
│ ├── codeql.yml
│ ├── docs-check.yml
│ ├── github-issue-labeler.yml
│ ├── github-pages.yml
│ ├── go-test.yml
│ ├── js-check-import-scenarios.yml
│ ├── js-docs-builder.yml
│ ├── js-publish.yml
│ ├── js-test.yml
│ ├── python-build-and-release-package.yml
│ ├── python-test-published-package.yml
│ ├── python-test-published-rc-package.yml
│ ├── python-test-suite.yml
│ ├── rust-test.yml
│ ├── scorecard.yml
│ └── website-test.yml
├── .gitignore
├── CITATION.cff
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README.md
├── assets/
│ ├── content_types_kb.min.json
│ └── models/
│ ├── CHANGELOG.md
│ ├── begonly_v2_1/
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ ├── model.keras
│ │ └── model.onnx
│ ├── fast_v2_1/
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ ├── model.keras
│ │ └── model.onnx
│ ├── standard_v1/
│ │ ├── README.md
│ │ ├── content_types_config.json
│ │ ├── magika_config.json
│ │ ├── model.h5
│ │ ├── model_config.json
│ │ └── thresholds.json
│ ├── standard_v2_0/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ ├── model.keras
│ │ └── model.onnx
│ ├── standard_v2_1/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ ├── model.keras
│ │ └── model.onnx
│ ├── standard_v3_0/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.onnx
│ ├── standard_v3_1/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.onnx
│ ├── standard_v3_2/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.onnx
│ └── standard_v3_3/
│ ├── README.md
│ ├── config.min.json
│ ├── metadata.json
│ └── model.onnx
├── dist-workspace.toml
├── docs/
│ ├── concepts.md
│ └── js.md
├── go/
│ ├── README.md
│ ├── cli/
│ │ ├── cli.go
│ │ ├── cli_test.go
│ │ ├── main.go
│ │ └── tests_data/
│ │ └── magika_test_pptx.txt
│ ├── docker/
│ │ └── Dockerfile
│ ├── example/
│ │ └── main.go
│ ├── go.mod
│ ├── go.sum
│ ├── magika/
│ │ ├── config.go
│ │ ├── content.go
│ │ ├── features.go
│ │ ├── features_test.go
│ │ ├── scanner.go
│ │ └── scanner_test.go
│ └── onnx/
│ ├── onnx.go
│ ├── onnx_runtime.go
│ ├── onnx_runtime.h
│ ├── onnx_runtime_test.go
│ └── onnx_zero.go
├── js/
│ ├── .gitignore
│ ├── CHANGELOG.md
│ ├── README.md
│ ├── magika-cli.ts
│ ├── magika-node.ts
│ ├── magika.ts
│ ├── package.json
│ ├── postBuild.js
│ ├── simple_examples/
│ │ ├── browser-esmodule-example/
│ │ │ ├── index.html
│ │ │ ├── index.js
│ │ │ ├── package.json
│ │ │ ├── playwright.config.ts
│ │ │ └── test/
│ │ │ └── simple.spec.ts
│ │ ├── node-commonjs-example/
│ │ │ ├── index.js
│ │ │ └── package.json
│ │ ├── node-esmodule-example/
│ │ │ ├── index.js
│ │ │ └── package.json
│ │ ├── run_examples.sh
│ │ └── typescript-esmodule-example/
│ │ ├── index.ts
│ │ └── package.json
│ ├── src/
│ │ ├── .npmignore
│ │ ├── content-type-info.ts
│ │ ├── content-type-label.ts
│ │ ├── content-types-infos.ts
│ │ ├── magika-options.ts
│ │ ├── magika-prediction.ts
│ │ ├── magika-result.ts
│ │ ├── model-config-node.ts
│ │ ├── model-config.ts
│ │ ├── model-features.ts
│ │ ├── model-node.ts
│ │ ├── model-prediction.ts
│ │ ├── model.ts
│ │ ├── overwrite-reason.ts
│ │ ├── prediction-mode.ts
│ │ └── status.ts
│ ├── test/
│ │ ├── features-extraction-vs-reference.test.ts
│ │ ├── inference-vs-reference.test.ts
│ │ ├── magika-cli.test.ts
│ │ ├── magika.test.ts
│ │ ├── tfnHook.ts
│ │ └── utils.ts
│ ├── tsconfig.cjs.json
│ ├── tsconfig.esm.json
│ └── tsconfig.json
├── python/
│ ├── .gitignore
│ ├── .python-version
│ ├── CHANGELOG.md
│ ├── README.md
│ ├── mypy.ini
│ ├── pyproject.toml
│ ├── pytest.ini
│ ├── scripts/
│ │ ├── check_changelog.sh
│ │ ├── check_copyright.py
│ │ ├── check_documentation.py
│ │ ├── check_source.sh
│ │ ├── generate_reference.py
│ │ ├── pre_release_check.py
│ │ ├── prepare_pyproject_for_pure_python_wheel.py
│ │ ├── run_quick_test_magika_cli.py
│ │ ├── run_quick_test_magika_module.py
│ │ ├── sync.py
│ │ └── test_magika_model.py
│ ├── src/
│ │ └── magika/
│ │ ├── __init__.py
│ │ ├── cli/
│ │ │ ├── magika_client.py
│ │ │ └── magika_rust_client_not_found_warning.py
│ │ ├── colors.py
│ │ ├── config/
│ │ │ └── content_types_kb.min.json
│ │ ├── logger.py
│ │ ├── magika.py
│ │ ├── models/
│ │ │ └── standard_v3_3/
│ │ │ ├── README.md
│ │ │ ├── config.min.json
│ │ │ ├── metadata.json
│ │ │ └── model.onnx
│ │ ├── py.typed
│ │ └── types/
│ │ ├── __init__.py
│ │ ├── content_type_info.py
│ │ ├── content_type_label.py
│ │ ├── magika_error.py
│ │ ├── magika_prediction.py
│ │ ├── magika_result.py
│ │ ├── model.py
│ │ ├── overwrite_reason.py
│ │ ├── prediction_mode.py
│ │ ├── seekable.py
│ │ ├── status.py
│ │ └── strenum.py
│ └── tests/
│ ├── __init__.py
│ ├── test_features_extraction_vs_reference.py
│ ├── test_inference_vs_reference.py
│ ├── test_magika_python_module.py
│ ├── test_python_magika_client.py
│ └── utils.py
├── rust/
│ ├── .gitignore
│ ├── README.md
│ ├── changelog.sh
│ ├── cli/
│ │ ├── CHANGELOG.md
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── output
│ │ ├── publish.sh
│ │ ├── src/
│ │ │ └── main.rs
│ │ └── test.sh
│ ├── color.sh
│ ├── gen/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── content_types
│ │ ├── src/
│ │ │ └── main.rs
│ │ └── test.sh
│ ├── latest.sh
│ ├── lib/
│ │ ├── CHANGELOG.md
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── src/
│ │ │ ├── builder.rs
│ │ │ ├── config.rs
│ │ │ ├── content.rs
│ │ │ ├── error.rs
│ │ │ ├── file.rs
│ │ │ ├── future.rs
│ │ │ ├── input.rs
│ │ │ ├── lib.rs
│ │ │ ├── model.rs
│ │ │ └── session.rs
│ │ └── test.sh
│ ├── onnx/
│ │ ├── build.sh
│ │ └── maturin.sh
│ ├── publish.sh
│ ├── rustfmt.toml
│ ├── sync.sh
│ ├── taplo.toml
│ └── test.sh
├── tests_data/
│ ├── README.md
│ ├── basic/
│ │ ├── asm/
│ │ │ └── code.asm
│ │ ├── batch/
│ │ │ └── simple.bat
│ │ ├── c/
│ │ │ └── code.c
│ │ ├── css/
│ │ │ └── code.css
│ │ ├── csv/
│ │ │ └── magika_test.csv
│ │ ├── dockerfile/
│ │ │ └── Dockerfile
│ │ ├── docx/
│ │ │ ├── doc.docx
│ │ │ └── magika_test.docx
│ │ ├── eml/
│ │ │ └── sample.eml
│ │ ├── empty/
│ │ │ └── empty_file
│ │ ├── epub/
│ │ │ ├── doc.epub
│ │ │ └── magika_test.epub
│ │ ├── flac/
│ │ │ └── test.flac
│ │ ├── handlebars/
│ │ │ └── example.handlebars
│ │ ├── html/
│ │ │ └── doc.html
│ │ ├── ignorefile/
│ │ │ ├── example.ignorefile
│ │ │ └── other.ignorefile
│ │ ├── ini/
│ │ │ └── doc.ini
│ │ ├── javascript/
│ │ │ └── code.js
│ │ ├── jinja/
│ │ │ └── example.j2
│ │ ├── json/
│ │ │ └── doc.json
│ │ ├── latex/
│ │ │ └── sample.tex
│ │ ├── makefile/
│ │ │ └── simple.Makefile
│ │ ├── markdown/
│ │ │ ├── README.md
│ │ │ ├── magika_test.md
│ │ │ └── simple.md
│ │ ├── mht/
│ │ │ └── sample.mht
│ │ ├── odp/
│ │ │ └── magika_test.odp
│ │ ├── ods/
│ │ │ └── magika_test.ods
│ │ ├── odt/
│ │ │ ├── doc.odt
│ │ │ └── magika_test.odt
│ │ ├── ogg/
│ │ │ └── test.ogg
│ │ ├── outlook/
│ │ │ └── sample.msg
│ │ ├── pem/
│ │ │ ├── doc.pem
│ │ │ └── doc.pub
│ │ ├── pptx/
│ │ │ └── magika_test.pptx
│ │ ├── psd/
│ │ │ └── MagikaTest.psd
│ │ ├── python/
│ │ │ └── code.py
│ │ ├── pytorch/
│ │ │ └── example.pth
│ │ ├── rtf/
│ │ │ ├── doc.rtf
│ │ │ └── magika_test.rtf
│ │ ├── ruby/
│ │ │ └── code.rb
│ │ ├── rust/
│ │ │ ├── asm.rs
│ │ │ ├── code.rs
│ │ │ ├── test_case1.rs
│ │ │ └── test_case2.rs
│ │ ├── smali/
│ │ │ └── code.smali
│ │ ├── srt/
│ │ │ └── code.srt
│ │ ├── swift/
│ │ │ └── code.swift
│ │ ├── toml/
│ │ │ └── doc.toml
│ │ ├── tsv/
│ │ │ └── magika_test.tsv
│ │ ├── twig/
│ │ │ └── example.twig
│ │ ├── txt/
│ │ │ ├── complex-sentence.txt
│ │ │ ├── few-words.txt
│ │ │ ├── lorem-big.txt
│ │ │ ├── lorem-small.txt
│ │ │ ├── magika_test_pptx.txt
│ │ │ ├── many-words.txt
│ │ │ ├── one-sentence-with-newline.txt
│ │ │ ├── one-sentence.txt
│ │ │ └── random-ascii.txt
│ │ ├── typescript/
│ │ │ └── code.ts
│ │ ├── xlsx/
│ │ │ └── magika_test.xlsx
│ │ ├── yaml/
│ │ │ ├── dependabot.yml
│ │ │ └── python-test.yml
│ │ ├── yara/
│ │ │ └── rule.yar
│ │ └── zig/
│ │ └── code.zig
│ ├── current_missdetections/
│ │ ├── html/
│ │ │ └── malformed-html-gh-521.html
│ │ └── xls/
│ │ └── password-protected-example.xls
│ ├── mitra/
│ │ ├── bzip/
│ │ │ └── bzip2.bz2
│ │ ├── cab/
│ │ │ └── cab.cab
│ │ ├── elf/
│ │ │ ├── elf.elf
│ │ │ └── elf64.elf
│ │ ├── flac/
│ │ │ ├── flac.flac
│ │ │ └── tiny.flac
│ │ ├── iso/
│ │ │ └── iso.iso
│ │ ├── ogg/
│ │ │ └── vorbis.ogg
│ │ ├── pcap/
│ │ │ └── pcap.pcap
│ │ ├── php/
│ │ │ └── php.php
│ │ ├── rtf/
│ │ │ └── rich.rtf
│ │ ├── tga/
│ │ │ └── footer.tga
│ │ ├── tiff/
│ │ │ ├── tiff-be.tif
│ │ │ └── tiff-le.tif
│ │ ├── webm/
│ │ │ └── webm.webm
│ │ ├── xar/
│ │ │ ├── hello-world.xar
│ │ │ └── mini.xar
│ │ └── xz/
│ │ └── xz.xz
│ └── mitra_candidates/
│ ├── DS_Store
│ ├── ace.ace
│ ├── dicom.dcm
│ ├── hdf5.h5
│ ├── html.htm
│ ├── jp2-stream.jp2
│ ├── jp2.jp2
│ ├── lha.lzh
│ ├── lzip.lz
│ ├── mini.bplist
│ ├── mini.plist
│ ├── mini.protobuf
│ ├── pcapng.pcapng
│ ├── photoshop.psd
│ ├── qoi.qoi
│ ├── raw.tga
│ ├── tiny.avro
│ ├── wad.wad
│ └── wasm.wasm
├── website/
│ ├── .gitignore
│ ├── README.md
│ ├── index.html
│ ├── jsconfig.json
│ ├── package.json
│ ├── public/
│ │ ├── model/
│ │ │ ├── config.json
│ │ │ └── model.json
│ │ └── models/
│ │ ├── standard_v3_2/
│ │ │ ├── config.min.json
│ │ │ ├── metadata.json
│ │ │ └── model.json
│ │ └── standard_v3_3/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.json
│ ├── src/
│ │ ├── App.vue
│ │ └── main.js
│ └── vite.config.js
└── website-ng/
├── .gcloudignore
├── .gitignore
├── README.md
├── app.yaml
├── astro.config.mjs
├── components.json
├── content.config.ts
├── jsrepo.json
├── package.json
├── public/
│ └── models/
│ ├── standard_v3_2/
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.json
│ └── standard_v3_3/
│ ├── README.md
│ ├── config.min.json
│ ├── metadata.json
│ └── model.json
├── src/
│ ├── components/
│ │ └── MagikaDemo.svelte
│ ├── content/
│ │ └── docs/
│ │ ├── additional-resources/
│ │ │ ├── changelog.md
│ │ │ ├── disclaimer.md
│ │ │ ├── faq.md
│ │ │ ├── license.md
│ │ │ ├── related-blog-posts.md
│ │ │ └── research-papers-and-citation.md
│ │ ├── cli-and-bindings/
│ │ │ ├── cli.md
│ │ │ ├── js-api.md
│ │ │ ├── js.md
│ │ │ ├── other-bindings.md
│ │ │ ├── overview.md
│ │ │ ├── python.md
│ │ │ └── rust.md
│ │ ├── contributing/
│ │ │ ├── creating-new-bindings.md
│ │ │ ├── how-to-contribute.md
│ │ │ ├── known-limitations.md
│ │ │ └── reporting-security-vulnerabilities.md
│ │ ├── core-concepts/
│ │ │ ├── how-magika-works.md
│ │ │ ├── models-and-content-types.md
│ │ │ ├── prediction-modes.md
│ │ │ └── understanding-the-output.md
│ │ ├── demo/
│ │ │ └── magika-demo.mdx
│ │ ├── getting-started/
│ │ │ ├── installation.mdx
│ │ │ └── quick-start.md
│ │ ├── index.mdx
│ │ ├── introduction/
│ │ │ └── overview.md
│ │ └── models/
│ │ └── standard_v3_3.md
│ ├── content.config.ts
│ ├── lib/
│ │ ├── components/
│ │ │ └── ui/
│ │ │ ├── button/
│ │ │ │ ├── button.svelte
│ │ │ │ └── index.ts
│ │ │ ├── card/
│ │ │ │ ├── card-action.svelte
│ │ │ │ ├── card-content.svelte
│ │ │ │ ├── card-description.svelte
│ │ │ │ ├── card-footer.svelte
│ │ │ │ ├── card-header.svelte
│ │ │ │ ├── card-title.svelte
│ │ │ │ ├── card.svelte
│ │ │ │ └── index.ts
│ │ │ ├── file-drop-zone/
│ │ │ │ ├── file-drop-zone.svelte
│ │ │ │ ├── index.ts
│ │ │ │ └── types.ts
│ │ │ ├── input/
│ │ │ │ ├── index.ts
│ │ │ │ └── input.svelte
│ │ │ ├── label/
│ │ │ │ ├── index.ts
│ │ │ │ └── label.svelte
│ │ │ ├── progress/
│ │ │ │ ├── index.ts
│ │ │ │ └── progress.svelte
│ │ │ ├── tabs/
│ │ │ │ ├── index.ts
│ │ │ │ ├── tabs-content.svelte
│ │ │ │ ├── tabs-list.svelte
│ │ │ │ ├── tabs-trigger.svelte
│ │ │ │ └── tabs.svelte
│ │ │ └── textarea/
│ │ │ ├── index.ts
│ │ │ └── textarea.svelte
│ │ ├── utils/
│ │ │ └── utils.ts
│ │ └── utils.ts
│ ├── pages/
│ │ ├── install.ps1.ts
│ │ └── install.sh.ts
│ └── styles/
│ └── global.css
├── svelte.config.js
└── tsconfig.json
================================================
FILE CONTENTS
================================================
================================================
FILE: .cargo/config.toml
================================================
[build]
target-dir = "rust/target"
================================================
FILE: .dockerignore
================================================
# Include any files or directories that you don't want to be copied to your
# container here (e.g., local build artifacts, temporary files, etc.).
#
# For more help, visit the .dockerignore file reference guide at
# https://docs.docker.com/go/build-context-dockerignore/
**/.DS_Store
**/__pycache__
**/.venv
**/.classpath
**/.dockerignore
**/.env
**/.git
**/.gitignore
**/.project
**/.settings
**/.toolstarget
**/.vs
**/.vscode
**/*.*proj.user
**/*.dbmdl
**/*.jfm
**/bin
**/charts
**/docker-compose*
**/compose*
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/obj
**/secrets.dev.yaml
**/values.dev.yaml
LICENSE
# Keep this if the container wants it, this is a test file.
!tests_data/basic/dockerfile/Dockerfile
================================================
FILE: .gemini/config.yaml
================================================
code_review:
pull_request_opened:
summary: false
================================================
FILE: .gitattributes
================================================
/tests_data/** -text
================================================
FILE: .github/CODEOWNERS
================================================
# Default owners (lowest precedence).
* @reyammer @invernizzi
# Julien owns the Rust code
/rust/ @ia0
# Yanick owns the Python code, all docs, and test data
/python/ @reyammer
*.md @reyammer
/tests_data/ @reyammer
# Julien still owns the Rust changelogs
/rust/**/CHANGELOG.md @ia0
# Luca owns the JS code, docs, and website
/js/ @invernizzi
/docs/js.md @invernizzi
/website/ @invernizzi
================================================
FILE: .github/ISSUE_TEMPLATE/misdetection.md
================================================
---
name: Misdetection
about: Report a file, or files, that have been misdetected as something that they
aren't.
title: "[Misdetection] <file> file misdetected as <file type>"
labels: misdetection, needs triage
assignees: ''
---
**What should the file have been detected as? What has the file been misdetected as?**
Ex. "HTML pages are being mistaken for generic XML files.", "C# code misdetected as Java.", or "Can't tell the difference between exe and dll files."
**Please link or attach the misdetected file below** (Do NOT upload PII!)
Placeholder.zip
**Additional context**
Add any other context or screenshots about the feature request here.
================================================
FILE: .github/ISSUE_TEMPLATE/new_content_type_request.md
================================================
---
name: New content type request
about: Suggest a new content type for Magika to detect
title: "[NEW CONTENT TYPE REQUEST]"
labels: missing content type, needs triage
assignees: ''
---
**What type of file would you like magika to detect?**
Write the full name of the file format, followed by the file extension in parenthesis.
Examples:
- "Nintendo Binary Revolution RESource (.brres)"
- "Valve Map Format file (.vmf)"
- "Blender save file (.blend)"
- "RPG Maker 2000/2003 Lcf DataBase (.ldb)"
- "COLLADA file (.dae)"
- "Unreal Engine Asset (.uasset)"
**What software can create/open these files?**
Examples:
- Simply state the name of the software, and where it can be obtained:
- "Valve Hammer Editor, included with any Source Engine game on Steam."
- If the file is common enough, write a general description:
- "Many 3D modeling software."
- "Any text editor."
- Link to the GitHub page:
- "[BrawlCrate](https://github.com/soopercool101/BrawlCrate)"
- Link to the software's websight main page or download page:
- "[Blender](https://www.blender.org/download/)"
- "[Unreal Engine](https://www.unrealengine.com/en-US)"
- If there are more than one software to open the file type, list them:
- "[RPG Maker 2003](https://www.rpgmakerweb.com/products/rpg-maker-2003), [easyRPG](https://easyrpg.org/), Wolf RPG Editor"
**Where can these files be found?**
Examples:
- Simply state where the files can be obtained:
- "Any RPGMaker 2000/2003 game."
- Provide instructions on how to obtain the files:
- "Dump the ISO of any of [these Wii games](https://wiki.vg-resource.com/BRRES#List_of_games_using_the_format)"
- "Use [bspsrc](https://github.com/ata4/bspsrc) to decompile the BSP files of any Source Engine game. Use [GCFScape](https://nemstools.github.io/pages/GCFScape-Download.html) to extract even more BSPs from 'dir.vpk' files. Make your own with Hammer."
- Link to a source of the files:
- "[Unreal Marketplace](https://www.unrealengine.com/marketplace/en-US/store)"
- "https://blendermarket.com/categories/models, https://www.turbosquid.com/Search/3D-Models/marketplace/blend, https://sketchfab.com/store/3d-models/blend?ref=store-home"
- Provide some of your own:
- "placeholder.zip" as attachment.
**If possible, please provide a specification for this file type.**
Link to a resource that explains how the file works.
Examples:
- A wiki page:
- "https://developer.valvesoftware.com/wiki/VMF_(Valve_Map_Format)"
- GitHub documentation:
- "https://gota7.github.io/NitroStudio2/specs/sequenceArchive.html"
- If you find more than one source, list them:
- "https://wiki.tockdom.com/wiki/BRRES_(File_Format), https://horizon.miraheze.org/wiki/.brres"
- "http://www.amnoid.de/gc/Rarc.txt, https://kuribo64.net/wiki/?page=RARC, https://wiki.tockdom.com/wiki/RARC_(File_Format)"
- "https://www.3dbrew.org/wiki/CGFX, https://mk3ds.com/index.php?title=CGFX_(File_Format)"
- "https://mk8.tockdom.com/wiki/BFRES_(File_Format), https://wiki.vg-resource.com/BFRES, https://wiki.oatmealdome.me/BFRES_(File_Format)"
- A PDF:
- "https://www.collada.org/2008/03/COLLADASchema"
**Additional context**
Add any other context or screenshots about the feature request here.
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "pip"
directory: "/python"
schedule:
interval: "daily"
- package-ecosystem: "npm"
directory: "/js"
schedule:
interval: "weekly"
- package-ecosystem: "cargo"
directory: "/rust"
schedule:
interval: "weekly"
================================================
FILE: .github/labeler.yml
================================================
needs triage:
- "/.*/"
================================================
FILE: .github/scorecard.yml
================================================
# Scorecard maintainer annotations.
# See https://github.com/ossf/scorecard/blob/main/config/README.md
annotations:
# Binary files in tests_data/ are only used for testing.
- checks:
- binary-artifacts
reasons:
- reason: test-data
- checks:
- pinned-dependencies
reasons:
# Test data with unpinned dependencies:
# - tests_data/basic/dockerfile/Dockerfile
- reason: test-data
# CI/CD containers meant to run the latest version:
# - .github/workflows/python-e2e-test.yml
- reason: remediated
================================================
FILE: .github/workflows/cli-latest.yml
================================================
name: Update the trampoline release
on:
# This is called by cli-release.yml but we don't use the input.
workflow_call:
inputs:
plan:
required: true
type: string
# In case we want to run it manually (the workflow is idempotent).
workflow_dispatch:
permissions:
contents: write
jobs:
update:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- run: ./latest.sh
working-directory: rust
env:
GH_TOKEN: ${{ github.token }}
================================================
FILE: .github/workflows/cli-release.yml
================================================
# This file was autogenerated by dist: https://axodotdev.github.io/cargo-dist
#
# Copyright 2022-2024, axodotdev
# SPDX-License-Identifier: MIT or Apache-2.0
#
# CI that:
#
# * checks for a Git Tag that looks like a release
# * builds artifacts with dist (archives, installers, hashes)
# * uploads those artifacts to temporary workflow zip
# * on success, uploads the artifacts to a GitHub Release
#
# Note that the GitHub Release will be created with a generated
# title/body based on your changelogs.
name: Release
permissions:
"contents": "write"
# This task will run whenever you push a git tag that looks like a version
# like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc.
# Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where
# PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION
# must be a Cargo-style SemVer Version (must have at least major.minor.patch).
#
# If PACKAGE_NAME is specified, then the announcement will be for that
# package (erroring out if it doesn't have the given version or isn't dist-able).
#
# If PACKAGE_NAME isn't specified, then the announcement will be for all
# (dist-able) packages in the workspace with that version (this mode is
# intended for workspaces with only one dist-able package, or with all dist-able
# packages versioned/released in lockstep).
#
# If you push multiple tags at once, separate instances of this workflow will
# spin up, creating an independent announcement for each one. However, GitHub
# will hard limit this to 3 tags per commit, as it will assume more tags is a
# mistake.
#
# If there's a prerelease-style suffix to the version, then the release(s)
# will be marked as a prerelease.
on:
pull_request:
push:
tags:
- 'cli**[0-9]+.[0-9]+.[0-9]+*'
jobs:
# Run 'dist plan' (or host) to determine what tasks we need to do
plan:
runs-on: "ubuntu-latest"
outputs:
val: ${{ steps.plan.outputs.manifest }}
tag: ${{ !github.event.pull_request && github.ref_name || '' }}
tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }}
publishing: ${{ !github.event.pull_request }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
- name: Install dist
# we specify bash to get pipefail; it guards against the `curl` command
# failing. otherwise `sh` won't catch that `curl` returned non-0
shell: bash
run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.31.0/cargo-dist-installer.sh | sh"
- name: Cache dist
uses: actions/upload-artifact@v6
with:
name: cargo-dist-cache
path: ~/.cargo/bin/dist
# sure would be cool if github gave us proper conditionals...
# so here's a doubly-nested ternary-via-truthiness to try to provide the best possible
# functionality based on whether this is a pull_request, and whether it's from a fork.
# (PRs run on the *source* but secrets are usually on the *target* -- that's *good*
# but also really annoying to build CI around when it needs secrets to work right.)
- id: plan
run: |
dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json
echo "dist ran successfully"
cat plan-dist-manifest.json
echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v6
with:
name: artifacts-plan-dist-manifest
path: plan-dist-manifest.json
# Build and packages all the platform-specific things
build-local-artifacts:
name: build-local-artifacts (${{ join(matrix.targets, ', ') }})
# Let the initial task tell us to not run (currently very blunt)
needs:
- plan
if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }}
strategy:
fail-fast: false
# Target platforms/runners are computed by dist in create-release.
# Each member of the matrix has the following arguments:
#
# - runner: the github runner
# - dist-args: cli flags to pass to dist
# - install-dist: expression to run to install dist on the runner
#
# Typically there will be:
# - 1 "global" task that builds universal installers
# - N "local" tasks that build each platform's binaries and platform-specific installers
matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }}
runs-on: ${{ matrix.runner }}
container: ${{ matrix.container && matrix.container.image || null }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json
permissions:
"attestations": "write"
"contents": "read"
"id-token": "write"
steps:
- name: enable windows longpaths
run: |
git config --global core.longpaths true
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
- name: Install Rust non-interactively if not already installed
if: ${{ matrix.container }}
run: |
if ! command -v cargo > /dev/null 2>&1; then
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
fi
- name: Install dist
run: ${{ matrix.install_dist.run }}
# Get the dist-manifest
- name: Fetch local artifacts
uses: actions/download-artifact@v7
with:
pattern: artifacts-*
path: target/distrib/
merge-multiple: true
- name: Install dependencies
run: |
${{ matrix.packages_install }}
- name: Build artifacts
run: |
# Actually do builds and make zips and whatnot
dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
echo "dist ran successfully"
- name: Attest
uses: actions/attest-build-provenance@v3
with:
subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*"
- id: cargo-dist
name: Post-build
# We force bash here just because github makes it really hard to get values up
# to "real" actions without writing to env-vars, and writing to env-vars has
# inconsistent syntax between shell and powershell.
shell: bash
run: |
# Parse out what we just built and upload it to scratch storage
echo "paths<<EOF" >> "$GITHUB_OUTPUT"
dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
cp dist-manifest.json "$BUILD_MANIFEST_NAME"
- name: "Upload artifacts"
uses: actions/upload-artifact@v6
with:
name: artifacts-build-local-${{ join(matrix.targets, '_') }}
path: |
${{ steps.cargo-dist.outputs.paths }}
${{ env.BUILD_MANIFEST_NAME }}
# Build and package all the platform-agnostic(ish) things
build-global-artifacts:
needs:
- plan
- build-local-artifacts
runs-on: "ubuntu-latest"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json
steps:
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v7
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/dist
# Get all the local artifacts for the global tasks to use (for e.g. checksums)
- name: Fetch local artifacts
uses: actions/download-artifact@v7
with:
pattern: artifacts-*
path: target/distrib/
merge-multiple: true
- id: cargo-dist
shell: bash
run: |
dist build ${NEEDS_PLAN_OUTPUTS_TAG_FLAG} --output-format=json "--artifacts=global" > dist-manifest.json
echo "dist ran successfully"
# Parse out what we just built and upload it to scratch storage
echo "paths<<EOF" >> "$GITHUB_OUTPUT"
jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"
cp dist-manifest.json "$BUILD_MANIFEST_NAME"
env:
NEEDS_PLAN_OUTPUTS_TAG_FLAG: ${{ needs.plan.outputs.tag-flag }}
- name: "Upload artifacts"
uses: actions/upload-artifact@v6
with:
name: artifacts-build-global
path: |
${{ steps.cargo-dist.outputs.paths }}
${{ env.BUILD_MANIFEST_NAME }}
# Determines if we should publish/announce
host:
needs:
- plan
- build-local-artifacts
- build-global-artifacts
# Only run if we're "publishing", and only if plan, local and global didn't fail (skipped is fine)
if: ${{ always() && needs.plan.result == 'success' && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
runs-on: "ubuntu-latest"
outputs:
val: ${{ steps.host.outputs.manifest }}
steps:
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
- name: Install cached dist
uses: actions/download-artifact@v7
with:
name: cargo-dist-cache
path: ~/.cargo/bin/
- run: chmod +x ~/.cargo/bin/dist
# Fetch artifacts from scratch-storage
- name: Fetch artifacts
uses: actions/download-artifact@v7
with:
pattern: artifacts-*
path: target/distrib/
merge-multiple: true
- id: host
shell: bash
run: |
dist host ${NEEDS_PLAN_OUTPUTS_TAG_FLAG} --steps=upload --steps=release --output-format=json > dist-manifest.json
echo "artifacts uploaded and released successfully"
cat dist-manifest.json
echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
env:
NEEDS_PLAN_OUTPUTS_TAG_FLAG: ${{ needs.plan.outputs.tag-flag }}
- name: "Upload dist-manifest.json"
uses: actions/upload-artifact@v6
with:
# Overwrite the previous copy
name: artifacts-dist-manifest
path: dist-manifest.json
# Create a GitHub Release while uploading all files to it
- name: "Download GitHub Artifacts"
uses: actions/download-artifact@v7
with:
pattern: artifacts-*
path: artifacts
merge-multiple: true
- name: Cleanup
run: |
# Remove the granular manifests
rm -f artifacts/*-dist-manifest.json
- name: Create GitHub Release
env:
PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}"
ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}"
ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}"
RELEASE_COMMIT: "${{ github.sha }}"
NEEDS_PLAN_OUTPUTS_TAG: ${{ needs.plan.outputs.tag }}
run: |
# Write and read notes from a file to avoid quoting breaking things
echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt
gh release create "${NEEDS_PLAN_OUTPUTS_TAG}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/*
announce:
needs:
- plan
- host
# use "always() && ..." to allow us to wait for all publish jobs while
# still allowing individual publish jobs to skip themselves (for prereleases).
# "host" however must run to completion, no skipping allowed!
if: ${{ always() && needs.host.result == 'success' }}
runs-on: "ubuntu-latest"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v6
with:
persist-credentials: false
submodules: recursive
custom-cli-latest:
needs:
- plan
- announce
uses: ./.github/workflows/cli-latest.yml
with:
plan: ${{ needs.plan.outputs.val }}
secrets: inherit
================================================
FILE: .github/workflows/codeql.yml
================================================
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"
on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
schedule:
- cron: "42 13 * * 2"
permissions:
contents: read
jobs:
analyze:
name: Analyze
# Runner size impacts CodeQL analysis time. To learn more, please see:
# - https://gh.io/recommended-hardware-resources-for-running-codeql
# - https://gh.io/supported-runners-and-hardware-resources
# - https://gh.io/using-larger-runners
# Consider using larger runners for possible analysis time improvements.
runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
permissions:
# required for all workflows
security-events: write
# only required for workflows in private repositories
actions: read
contents: read
strategy:
fail-fast: false
matrix:
language: ["javascript-typescript", "python"]
# CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ]
# Use only 'java-kotlin' to analyze code written in Java, Kotlin or both
# Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
steps:
- name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
# queries: security-extended,security-and-quality
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5
# ℹ️ Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
# If the Autobuild fails above, remove it and uncomment the following three lines.
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
# - run: |
# echo "Run, Build Application using script"
# ./location_of_script_within_repo/buildscript.sh
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5
with:
category: "/language:${{matrix.language}}"
================================================
FILE: .github/workflows/docs-check.yml
================================================
name: Docs - Check documentation
on:
workflow_dispatch:
push:
branches:
- "main"
pull_request:
paths:
- "*.md"
- "assets/**/*.md"
- "docs/**/*.md"
- "js/**/*.md"
- "python/**/*.md"
- "rust/**/*.md"
- "website-ng/**/*.md"
schedule:
- cron: "42 7 * * 4" # Run weekly
permissions:
contents: read
jobs:
run-check-docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- name: Install uv
run: curl -LsSf https://astral.sh/uv/0.5.22/install.sh | sh
- name: "Run check_documentation.py script"
working-directory: python
run: uv run ./scripts/check_documentation.py
================================================
FILE: .github/workflows/github-issue-labeler.yml
================================================
name: New issue labeler
on:
# Runs on newly opened issues
issues:
types: [opened]
# Sets permissions of the GITHUB_TOKEN
permissions:
issues: write
contents: read
jobs:
triage:
runs-on: ubuntu-latest
steps:
- uses: github/issue-labeler@c1b0f9f52a63158c4adc09425e858e87b32e9685 # pin@v3.4
with:
configuration-path: .github/labeler.yml
enable-versioned-regex: 0
repo-token: "${{secrets.GITHUB_TOKEN}}"
================================================
FILE: .github/workflows/github-pages.yml
================================================
name: Pages - deploy
on:
# Runs on pushes targeting the default branch
push:
branches: ["main"]
paths:
- "website/**"
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
contents: read
pages: write
id-token: write
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
deploy-pages:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./website
steps:
- name: Checkout
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
with:
ref: main
- name: Set up Node
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # pin@v4
with:
node-version: 18.x
- name: Install dependencies
run: yarn install --frozen-lockfile
- name: Build
run: yarn run build-github
- name: Setup Pages
uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # pin@v4
- name: Upload artifact
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # pin@v3
with:
path: "./website/dist"
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # pin@v4
================================================
FILE: .github/workflows/go-test.yml
================================================
name: Go - build and run tests
on:
workflow_dispatch:
push:
branches:
- "main"
pull_request:
paths:
- "go/**"
- "tests_data/**"
- ".github/workflows/go-test.yml"
permissions:
contents: read
jobs:
unit-testing:
runs-on: "ubuntu-latest"
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- name: Build the Go Docker image (which also runs tests)
run: docker build -f go/docker/Dockerfile .
================================================
FILE: .github/workflows/js-check-import-scenarios.yml
================================================
# Tests that the Magika library can be used in various scenarios (e.g., commonjs/node, esmodule/node, esmodule/browse)
name: JS - check import scenarios
on:
workflow_dispatch:
push:
branches:
- "main"
pull_request:
paths:
- 'js/simple_examples/**'
- '.github/workflows/run-js-examples.yml'
jobs:
run-examples:
runs-on: ubuntu-latest
defaults:
run:
working-directory: js/simple_examples
steps:
- name: Checkout
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # pin@v4
with:
ref: main
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # pin@v4
name: Set up Node.js
with:
node-version: "20.x"
registry-url: "https://registry.npmjs.org"
- name: Install Magika dependencies
run: yarn install --frozen-lockfile
working-directory: js
- name: Build Magika
run: yarn build && rm -Rf node_modules
working-directory: js
- name: Install dependencies for the node-commonjs-example
run: yarn install
working-directory: js/simple_examples/node-commonjs-example
- name: Test node-commonjs-example
run: yarn --silent start && rm -Rf node_modules
working-directory: js/simple_examples/node-commonjs-example
- name: Install dependencies for the node-esmodule-example
run: yarn install
working-directory: js/simple_examples/node-esmodule-example
- name: Test node-esmodule-example
run: yarn --silent start && rm -Rf node_modules
working-directory: js/simple_examples/node-esmodule-example
- name: Install dependencies for the browser-esmodule-example
run: yarn install && yarn playwright install chromium
working-directory: js/simple_examples/browser-esmodule-example
- name: Test browser-esmodule-example
run: yarn --silent start && rm -Rf node_modules
working-directory: js/simple_examples/browser-esmodule-example
- name: Install dependencies for the typescript-esmodule-example
run: yarn install
working-directory: js/simple_examples/typescript-esmodule-example
- name: Test typescript-esmodule-example
run: yarn --silent start && rm -Rf node_modules
working-directory: js/simple_examples/typescript-esmodule-example
================================================
FILE: .github/workflows/js-docs-builder.yml
================================================
name: JS - generate docs
on:
# Runs on pushes targeting the default branch
push:
branches: ["main"]
paths:
- "js/**"
- ".github/workflows/**"
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
permissions:
contents: read
jobs:
makeDocs:
permissions:
contents: write
id-token: write
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
with:
ref: main
- name: Generate docs
working-directory: js
run: |
yarn install --frozen-lockfile
yarn run build
yarn run make-docs
- name: Commit
run: |
git config --local user.email "invernizzi.l@gmail.com"
git config --local user.name "Luca Invernizzi"
git commit -m "Update docs" -a
================================================
FILE: .github/workflows/js-publish.yml
================================================
name: JS - publish
on:
workflow_dispatch:
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- name: Enable Corepack
run: corepack enable
- uses: actions/setup-node@v6
with:
node-version: "20.x"
registry-url: "https://registry.npmjs.org"
- name: Build
working-directory: js
run: |
yarn install --frozen-lockfile
yarn run build
yarn run test
- name: Publish
working-directory: js
run: yarn publish
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
================================================
FILE: .github/workflows/js-test.yml
================================================
name: JS - tests
on:
workflow_dispatch:
push:
branches:
- "main"
pull_request:
paths:
- "js/**"
- "tests_data/**"
- ".github/workflows/js-*"
permissions:
contents: read
jobs:
unit-testing:
strategy:
matrix:
node-version: ["18", "20"]
os: ["ubuntu-latest", "macos-latest"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # pin@v4
with:
node-version: ${{ matrix.node-version }}
- name: Install dependencies
working-directory: js
run: yarn install --frozen-lockfile
- name: Build
working-directory: js
run: yarn run build
- name: Run tests
working-directory: js
run: yarn test
================================================
FILE: .github/workflows/python-build-and-release-package.yml
================================================
name: Python - build and release package
on:
workflow_dispatch:
push:
branches:
- "main"
tags:
# Note: These need to match what specified in env.[TEST_]RELEASE_TAG_PREFIX below.
- "python-v*"
- "python-test-v*"
pull_request:
paths:
- "python/**"
- "rust/**"
- "tests_data/**"
- ".github/workflows/python-build-and-release-package.yml"
schedule:
- cron: "12 3 * * 4" # Run everything once per week.
- cron: "12 3 * * 1" # Refresh the cache an additional time.
release:
types: [created]
permissions:
contents: read
env:
# Trigger for publishing to pypi and testpypi (and for pre-release checks
# enforcement).
RELEASE_TAG_PREFIX: "python-v"
TEST_RELEASE_TAG_PREFIX: "python-test-v"
UV_VERSION: "0.9.5"
jobs:
# This job acts as a gatekeeper for releases, which are triggered by a tag
# push. It performs critical pre-release checks. These checks are skipped for
# non-release pushes.
pre-release-checks:
name: Pre-release checks
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Setup python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5
with:
python-version: "3.12"
- name: Install uv
run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh
- name: Check package for release
env:
IS_RELEASE_TAG: ${{ github.event_name == 'push' && github.ref_type == 'tag' && (startsWith(github.ref_name, env.RELEASE_TAG_PREFIX) || startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX)) }}
run: |
if [[ "${IS_RELEASE_TAG}" == 'true' ]]; then
FULL_TAG_REF="${GITHUB_REF}"
TAG_NAME="${GITHUB_REF_NAME}"
if [[ "${TAG_NAME}" == "${{ env.RELEASE_TAG_PREFIX }}"* ]]; then
PREFIX_TO_REMOVE="refs/tags/${{ env.RELEASE_TAG_PREFIX }}"
else
PREFIX_TO_REMOVE="refs/tags/${{ env.TEST_RELEASE_TAG_PREFIX }}"
fi
TAG_VERSION="${FULL_TAG_REF#${PREFIX_TO_REMOVE}}"
CHECKER_OPTIONS="--expected-version ${TAG_VERSION}"
else
CHECKER_OPTIONS="--report-only"
fi
# Note: this uses the magika python package installed via uv. Also,
# pip is not available here, so we skip pip show check.
uv run ./scripts/pre_release_check.py $CHECKER_OPTIONS --no-check-pip-show-package-version
working-directory: python
build-wheels:
needs: [pre-release-checks]
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: ubuntu-latest
target: x86_64
- runner: windows-latest
target: x64
- runner: macos-14
target: aarch64
steps:
- uses: actions/checkout@v5
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5
with:
python-version: "3.12"
- name: Install uv
run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh
- if: matrix.platform.runner == 'ubuntu-latest'
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: rust/onnx/runtime/build/Linux
key: maturin-${{ matrix.platform.target }}-${{ hashFiles('rust/onnx/build.sh') }}
- if: matrix.platform.runner == 'ubuntu-latest'
name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out=../dist
before-script-linux: "${{ github.workspace }}/rust/onnx/maturin.sh"
manylinux: 2_28
working-directory: python
- if: matrix.platform.runner != 'ubuntu-latest'
name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out=../dist
working-directory: python
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheel-${{ matrix.platform.runner }}-${{ matrix.platform.target }}
path: dist
# Download, install, and test the wheels with different versions of python
test-wheels:
needs: [build-wheels]
runs-on: ${{ matrix.platform.runner }}
if: github.event.schedule != '12 3 * * 1'
strategy:
# We want to know in which exact situation the tests fail
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
platform:
- runner: ubuntu-latest
target: x86_64
- runner: windows-latest
target: x64
- runner: macos-14
target: aarch64
steps:
- uses: actions/checkout@v5
- uses: actions/download-artifact@v4
with:
name: wheel-${{ matrix.platform.runner }}-${{ matrix.platform.target }}
path: dist
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5
with:
python-version: "${{ matrix.python-version }}"
- name: Install uv
run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh
# Attempt "uv add magika.whl", in a temporary directory
- name: Check that `uv add magika.whl` works
shell: bash
run: |
mkdir /tmp/test-uv
cp -vR dist/*.whl /tmp/test-uv
cd /tmp/test-uv
uv init
uv add ./$(\ls -1 *.whl | head -n 1)
# From now on, magika will be available in the global environment
- name: Install the wheel via pip
run: python3 -m pip install $(python -c "import glob; print(glob.glob('dist/*.whl')[0])")
- run: magika --version
- run: "python3 -c 'import magika; m = magika.Magika(); print(m)'"
- run: magika -r tests_data/basic
- run: python3 ./python/scripts/run_quick_test_magika_cli.py
- run: python3 ./python/scripts/run_quick_test_magika_module.py
- name: Check package for release readiness
env:
IS_RELEASE_TAG: ${{ github.event_name == 'push' && github.ref_type == 'tag' && (startsWith(github.ref_name, env.RELEASE_TAG_PREFIX) || startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX)) }}
shell: bash
run: |
if [[ "${IS_RELEASE_TAG}" == 'true' ]]; then
FULL_TAG_REF="${GITHUB_REF}"
TAG_NAME="${GITHUB_REF_NAME}"
if [[ "${TAG_NAME}" == "${{ env.RELEASE_TAG_PREFIX }}"* ]]; then
PREFIX_TO_REMOVE="refs/tags/${{ env.RELEASE_TAG_PREFIX }}"
else
PREFIX_TO_REMOVE="refs/tags/${{ env.TEST_RELEASE_TAG_PREFIX }}"
fi
TAG_VERSION="${FULL_TAG_REF#${PREFIX_TO_REMOVE}}"
CHECKER_OPTIONS="--expected-version ${TAG_VERSION}"
else
CHECKER_OPTIONS="--report-only"
fi
# Note: this uses the magika python package that was just built.
python3 ./scripts/pre_release_check.py $CHECKER_OPTIONS
working-directory: python
build-pure-python-wheel-and-sdist:
needs: [pre-release-checks]
runs-on: ubuntu-latest
if: github.event.schedule != '12 3 * * 1'
steps:
- uses: actions/checkout@v5
- name: Install uv
run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh
- run: uv run ./scripts/prepare_pyproject_for_pure_python_wheel.py
working-directory: python
- name: Build pure python wheel and source distribution
run: uv build --out-dir ../dist
working-directory: python
- name: Upload pure python wheel
uses: actions/upload-artifact@v4
with:
name: wheel-pure-python
path: dist/*.whl
- name: Upload sdist
uses: actions/upload-artifact@v4
with:
name: sdist
path: dist/*.tar.gz
# Download, install, and test the pure python wheel on multiple platforms
test-pure-python-wheel:
needs: [build-pure-python-wheel-and-sdist]
runs-on: ${{ matrix.platform.runner }}
if: github.event.schedule != '12 3 * * 1'
strategy:
# We want to know in which exact situation the tests fail
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
platform:
- runner: ubuntu-latest
target: x86_64
- runner: windows-latest
target: x64
- runner: macos-14
target: aarch64
steps:
- uses: actions/checkout@v5
- uses: actions/download-artifact@v4
with:
name: wheel-pure-python
path: dist
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5
with:
python-version: "${{ matrix.python-version }}"
- name: Install uv
run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh
# Attempt "uv add magika.whl", in a temporary directory
- name: Check that `uv add magika.whl` works
shell: bash
run: |
mkdir /tmp/test-uv
cp -vR dist/*.whl /tmp/test-uv
cd /tmp/test-uv
uv init
uv add ./$(\ls -1 *.whl | head -n 1)
# From now on, magika will be available in the global environment
- name: Install the wheel
run: python3 -m pip install $(python -c "import glob; print(glob.glob('dist/*.whl')[0])")
# Check that the magika script points to the placeholder raising a warning
- run: magika --version | grep -C10 WARNING | grep -C10 magika-python-client
# Check that the fallback magika's python client can be run
- run: magika-python-client -r tests_data/basic
# Check that the results of the python's client are correct
- run: python3 ./python/scripts/run_quick_test_magika_cli.py --client-path magika-python-client
# Test the python module
- run: "python3 -c 'import magika; m = magika.Magika(); print(m)'"
- run: python3 ./python/scripts/run_quick_test_magika_module.py
- name: Check package for release readiness
env:
IS_RELEASE_TAG: ${{ github.event_name == 'push' && github.ref_type == 'tag' && (startsWith(github.ref_name, env.RELEASE_TAG_PREFIX) || startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX)) }}
shell: bash
run: |
if [[ "${IS_RELEASE_TAG}" == 'true' ]]; then
FULL_TAG_REF="${GITHUB_REF}"
TAG_NAME="${GITHUB_REF_NAME}"
if [[ "${TAG_NAME}" == "${{ env.RELEASE_TAG_PREFIX }}"* ]]; then
PREFIX_TO_REMOVE="refs/tags/${{ env.RELEASE_TAG_PREFIX }}"
else
PREFIX_TO_REMOVE="refs/tags/${{ env.TEST_RELEASE_TAG_PREFIX }}"
fi
TAG_VERSION="${FULL_TAG_REF#${PREFIX_TO_REMOVE}}"
CHECKER_OPTIONS="--expected-version ${TAG_VERSION}"
else
CHECKER_OPTIONS="--report-only"
fi
# Note: this uses the magika python package that was just built.
python3 ./scripts/pre_release_check.py $CHECKER_OPTIONS --use-python-client
working-directory: python
test-sdist:
needs: [build-pure-python-wheel-and-sdist]
runs-on: ${{ matrix.platform.runner }}
if: github.event.schedule != '12 3 * * 1'
strategy:
# We want to know in which exact situation the tests fail
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
platform:
- runner: ubuntu-latest
target: x86_64
- runner: windows-latest
target: x64
- runner: macos-14
target: aarch64
steps:
- uses: actions/checkout@v5
- uses: actions/download-artifact@v4
with:
name: sdist
path: dist
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5
with:
python-version: "${{ matrix.python-version }}"
- name: Install uv
run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh
# Attempt "uv add magika.whl", in a temporary directory
- name: Check that `uv add magika.tar.gz` works
shell: bash
run: |
mkdir /tmp/test-uv
cp -vR dist/*.tar.gz /tmp/test-uv
cd /tmp/test-uv
uv init
uv add ./$(\ls -1 *.tar.gz | head -n 1)
# From now on, magika will be available in the global environment
- name: Install the sdist
run: python3 -m pip install $(python -c "import glob; print(glob.glob('dist/*.tar.gz')[0])")
# Check that the magika script points to the placeholder raising a warning
- run: magika --version | grep -C10 WARNING | grep -C10 magika-python-client
# Check that the fallback magika's python client can be run
- run: magika-python-client -r tests_data/basic
# Check that the results of the python's client are correct
- run: python3 ./python/scripts/run_quick_test_magika_cli.py --client-path magika-python-client
# Test the python module
- run: "python3 -c 'import magika; m = magika.Magika(); print(m)'"
- run: python3 ./python/scripts/run_quick_test_magika_module.py
- name: Check package for release readiness
env:
IS_RELEASE_TAG: ${{ github.event_name == 'push' && github.ref_type == 'tag' && (startsWith(github.ref_name, env.RELEASE_TAG_PREFIX) || startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX)) }}
shell: bash
run: |
if [[ "${IS_RELEASE_TAG}" == 'true' ]]; then
FULL_TAG_REF="${GITHUB_REF}"
TAG_NAME="${GITHUB_REF_NAME}"
if [[ "${TAG_NAME}" == "${{ env.RELEASE_TAG_PREFIX }}"* ]]; then
PREFIX_TO_REMOVE="refs/tags/${{ env.RELEASE_TAG_PREFIX }}"
else
PREFIX_TO_REMOVE="refs/tags/${{ env.TEST_RELEASE_TAG_PREFIX }}"
fi
TAG_VERSION="${FULL_TAG_REF#${PREFIX_TO_REMOVE}}"
CHECKER_OPTIONS="--expected-version ${TAG_VERSION}"
else
CHECKER_OPTIONS="--report-only"
fi
# Note: this uses the magika python package that was just built.
python3 ./scripts/pre_release_check.py $CHECKER_OPTIONS --use-python-client
working-directory: python
# Adapted from https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
# Note: The publishing is only done with pushes of release tags.
publish-to-pypi:
name: Publish to PyPI
if: github.event_name == 'push' && github.ref_type == 'tag' && contains(github.ref_name, 'python')
needs: [test-wheels, test-pure-python-wheel, test-sdist]
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/magika
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing
steps:
- name: Download all the artifacts (binary wheels, pure python wheel, sdist)
uses: actions/download-artifact@v4
with:
path: artifacts/
- name: Flatten artifacts structure
run: |
# List all files for debugging
ls -alR artifacts/
# Find all files inside the subdirectories and move them up
find artifacts/ -mindepth 2 -type f -exec mv -t artifacts/ {} +
# Remove the now-empty subdirectories
find artifacts/ -mindepth 1 -type d -empty -delete
# Check structure after flattening
ls -alR artifacts/
- name: Publish distribution to PyPI
if: github.event_name == 'push' && github.ref_type == 'tag' && startsWith(github.ref_name, env.RELEASE_TAG_PREFIX)
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: artifacts/
# Note: The publishing is only done with pushes of test release tags.
publish-to-testpypi:
name: Publish to TestPyPI
if: github.event_name == 'push' && github.ref_type == 'tag' && contains(github.ref_name, 'python')
needs: [test-wheels, test-pure-python-wheel, test-sdist]
runs-on: ubuntu-latest
environment:
name: testpypi
url: https://test.pypi.org/p/magika
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing
steps:
- name: Download all the artifacts (binary wheels, pure python wheel, sdist)
uses: actions/download-artifact@v4
with:
path: artifacts/
- name: Flatten artifacts structure
run: |
# List all files for debugging
ls -alR artifacts/
# Find all files inside the subdirectories and move them up
find artifacts/ -mindepth 2 -type f -exec mv -t artifacts/ {} +
# Remove the now-empty subdirectories
find artifacts/ -mindepth 1 -type d -empty -delete
# Check structure after flattening
ls -alR artifacts/
- name: Publish distribution to TestPyPI
if: github.event_name == 'push' && github.ref_type == 'tag' && startsWith(github.ref_name, env.TEST_RELEASE_TAG_PREFIX)
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: artifacts/
repository-url: https://test.pypi.org/legacy/
================================================
FILE: .github/workflows/python-test-published-package.yml
================================================
# This routinely checks that published packages are installable and work
# properly. This makes sure that a new version of one of our dependencies is not
# breaking our releases.
# TODO: test more magika package versions
# TODO: check the actual predicted content types
name: Python - test published packages
on:
schedule:
- cron: "42 4 * * *" # Run daily
workflow_dispatch:
pull_request:
paths:
- ".github/workflows/python-test-published-package.yml"
permissions:
contents: read
env:
UV_VERSION: "0.9.5"
jobs:
unit-testing:
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5
with:
python-version: "${{ matrix.python-version }}"
- name: Install uv
run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh
- name: Check that `uv add magika` works
shell: bash
run: mkdir /tmp/test-uv && cd /tmp/test-uv && uv init && uv add magika && cd - && rm -rf /tmp/test-uv
- name: Install magika with pip
run: python3 -m pip install magika
- run: python3 -c 'import magika; m = magika.Magika(); print(m)'
- run: magika --version
# The latest published model does not necessarily support detection for
# all types in our tests data; thus, for now we just check that the magika
# CLI does not crash when scanning the files, without checking the actual
# predictions.
- run: magika -r tests_data/basic
================================================
FILE: .github/workflows/python-test-published-rc-package.yml
================================================
# This routinely checks that the latest published -rc packages are installable
# and work properly. This makes sure that a new version of one of our
# dependencies is not breaking our releases.
# TODO: test more magika package versions
# TODO: check the actual predicted content types
name: Python - test published -rc packages
on:
schedule:
- cron: "42 3 * * *" # Run daily
workflow_dispatch:
pull_request:
paths:
- ".github/workflows/python-test-published-rc-package.yml"
permissions:
contents: read
env:
UV_VERSION: "0.9.5"
jobs:
unit-testing:
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5
with:
python-version: "${{ matrix.python-version }}"
- name: Install uv
run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh
- name: Check that `uv add --prerelease magika` works
shell: bash
run: mkdir /tmp/test-uv && cd /tmp/test-uv && uv init && uv add --prerelease allow magika && cd - && rm -rf /tmp/test-uv
- name: Install magika with pip (considering prereleases)
run: python3 -m pip install --pre magika
- run: python3 -c 'import magika; m = magika.Magika(); print(m)'
- run: magika --version
# The latest published model does not necessarily support detection for
# all types in our tests data; thus, for now we just check that the magika
# CLI does not crash when scanning the files, without checking the actual
# predictions.
- run: magika -r tests_data/basic
================================================
FILE: .github/workflows/python-test-suite.yml
================================================
name: Python - run test suite
on:
workflow_dispatch:
push:
branches:
- "main"
pull_request:
paths:
- "python/**"
- "rust/**"
- "tests_data/**"
- ".github/workflows/python-test-suite.yml"
permissions:
contents: read
env:
UV_VERSION: "0.9.5"
jobs:
unit-testing:
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
os: ["ubuntu-latest", "macos-latest"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # pin@v5
with:
python-version: "${{ matrix.python-version }}"
- name: Install uv
run: curl -LsSf https://astral.sh/uv/${{ env.UV_VERSION }}/install.sh | sh
- name: Install all projects dependencies (with the requested python version)
working-directory: python
run: uv sync --python ${{ matrix.python-version }} --all-extras --dev
- name: Print python version
working-directory: python
run: uv run --python ${{ matrix.python-version }} python --version
- name: Run ruff check
working-directory: python
run: uv run --python ${{ matrix.python-version }} ruff check --verbose
- name: Run ruff format --check
working-directory: python
run: uv run --python ${{ matrix.python-version }} ruff format --check --verbose
- name: Run mypy
working-directory: python
run: uv run --python ${{ matrix.python-version }} mypy src/magika tests
- name: Run copyright checks
working-directory: python
run: uv run ./scripts/check_copyright.py
- name: Run the python tests suite
working-directory: python
run: uv run --python ${{ matrix.python-version }} pytest tests -m "not slow"
- name: Run magika --version
working-directory: python
run: uv run --python ${{ matrix.python-version }} magika --version
shell: bash # Allows for cross-platform
- name: Run magika with tests_data
working-directory: python
run: uv run --python ${{ matrix.python-version }} magika -r ../tests_data/basic
- name: Run "magika cli" quick tests
working-directory: python
run: uv run --python ${{ matrix.python-version }} scripts/run_quick_test_magika_cli.py
- name: Run "magika module" quick tests
working-directory: python
run: uv run --python ${{ matrix.python-version }} scripts/run_quick_test_magika_module.py
- name: Run "magika module" quick tests + perf measurement
working-directory: python
run: uv run --python ${{ matrix.python-version }} scripts/run_quick_test_magika_module.py --print-inference-stats --repeat 10
changelog-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # pin@v4
with:
fetch-depth: 0
- name: Check for undocumented changes
run: ./scripts/check_changelog.sh ${GITHUB_BASE_REF}
working-directory: python
================================================
FILE: .github/workflows/rust-test.yml
================================================
name: Rust - test
on:
workflow_dispatch:
push:
branches:
- "main"
pull_request:
paths:
- ".github/workflows/rust-*"
- "assets/**"
- "rust/**"
- "tests_data/**"
permissions:
contents: read
jobs:
changelog:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
with:
fetch-depth: 0
- run: ./changelog.sh
working-directory: rust
test:
runs-on: ubuntu-latest
continue-on-error: ${{ matrix.toolchain == 'nightly' }}
strategy:
matrix:
toolchain: [stable, nightly]
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- run: rustup default ${{ matrix.toolchain }}
- run: rustup component add rustfmt clippy
- run: ./test.sh
working-directory: rust
run:
runs-on: ${{ matrix.os }}-latest
strategy:
matrix:
os: [ubuntu, macos, windows]
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- run: cargo build --release
working-directory: rust/cli
- run: rust/target/release/magika -r tests_data/basic
================================================
FILE: .github/workflows/scorecard.yml
================================================
# This workflow uses actions that are not certified by GitHub. They are provided
# by a third-party and are governed by separate terms of service, privacy
# policy, and support documentation.
name: Scorecard supply-chain security
on:
workflow_dispatch:
# For Branch-Protection check. Only the default branch is supported. See
# https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
branch_protection_rule:
# To guarantee Maintained check is occasionally updated. See
# https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
schedule:
- cron: "45 21 * * 2"
push:
branches: ["main"]
# Declare default permissions as read only.
permissions: read-all
jobs:
analysis:
name: Scorecard analysis
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write
# Needed to publish results and get a badge (see publish_results below).
id-token: write
steps:
- name: "Checkout code"
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
with:
persist-credentials: false
- name: "Run analysis"
uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3
with:
results_file: results.sarif
results_format: sarif
# (Optional) "write" PAT token. Uncomment the `repo_token` line below if:
# - you want to enable the Branch-Protection check on a *public* repository, or
# - you are installing Scorecard on a *private* repository
# To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat.
# repo_token: ${{ secrets.SCORECARD_TOKEN }}
# Public repositories:
# - Publish results to OpenSSF REST API for easy access by consumers
# - Allows the repository to include the Scorecard badge.
# - See https://github.com/ossf/scorecard-action#publishing-results.
# For private repositories:
# - `publish_results` will always be set to `false`, regardless
# of the value entered here.
publish_results: true
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: SARIF file
path: results.sarif
retention-days: 5
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5
with:
sarif_file: results.sarif
================================================
FILE: .github/workflows/website-test.yml
================================================
name: Website - tests
on:
workflow_dispatch:
push:
branches:
- "main"
pull_request:
paths:
- "website/**"
- "js/**"
- "tests_data/**"
- ".github/workflows/website-*"
permissions:
contents: read
jobs:
build-and-test:
strategy:
matrix:
node-version: ["20"]
os: ["ubuntu-latest"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # pin@v4
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # pin@v4
with:
node-version: ${{ matrix.node-version }}
- name: Install js dependencies
working-directory: js
run: yarn install --frozen-lockfile
- name: Build js
working-directory: js
run: yarn run build
- name: Create magika link
working-directory: js
run: yarn link
- name: Link to local magika
working-directory: website
run: yarn link magika
- name: Install website dependencies
working-directory: website
run: yarn install --frozen-lockfile
- name: Build website
working-directory: website
run: yarn build
# TODO: add some actual testing
================================================
FILE: .gitignore
================================================
*.pyc
__pycache__/
.ipynb_checkpoints
venv/
tmp/
.env
*.swp
*.egg-info
dist/*
*.pickle
.s.yml
*/models-data/*
.vscode
scratchpad/
*/node_modules/*
docs/dist
js/dist
================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- family-names: "Fratantonio"
given-names: "Yanick"
- family-names: "Invernizzi"
given-names: "Luca"
- family-names: "Farah"
given-names: "Loua"
- family-names: "Kurt"
given-names: "Thomas"
- family-names: "Zhang"
given-names: "Marina"
- family-names: "Albertini"
given-names: "Ange"
- family-names: "Galilee"
given-names: "Francois"
- family-names: "Metitieri"
given-names: "Giancarlo"
- family-names: "Cretin"
given-names: "Julien"
- family-names: "Petit-Bianco"
given-names: "Alexandre"
- family-names: "Tao"
given-names: "David"
- family-names: "Bursztein"
given-names: "Elie"
title: "Magika: AI-Powered Content-Type Detection"
url: "https://arxiv.org/abs/2409.13768"
================================================
FILE: CONTRIBUTING.md
================================================
# How to Contribute
We would love to accept your patches and contributions to this project!
Check [open issues labeled as "help wanted"](https://github.com/google/magika/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) as a starting point.
## Before you begin
### Sign our Contributor License Agreement
Contributions to this project must be accompanied by a
[Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
You (or your employer) retain the copyright to your contribution; this simply
gives us permission to use and redistribute your contributions as part of the
project.
If you or your current employer have already signed the Google CLA (even if it
was for a different project), you probably don't need to do it again.
Visit <https://cla.developers.google.com/> to see your current agreements or to
sign a new one.
### Review our Community Guidelines
This project follows [Google's Open Source Community
Guidelines](https://opensource.google/conduct/).
## Contribution process
### Code Reviews
All submissions, including submissions by project members, require review. We
use [GitHub pull requests](https://docs.github.com/articles/about-pull-requests)
for this purpose.
================================================
FILE: Dockerfile
================================================
# syntax=docker/dockerfile:1
ARG PYTHON_VERSION=3.11
FROM python:${PYTHON_VERSION}-slim as base
WORKDIR /magika
# This requires buildx
# RUN --mount=type=cache,target=/root/.cache/pip \
# pip install magika
RUN pip install magika
ENTRYPOINT ["magika"]
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# Magika
[](https://pypi.python.org/pypi/magika)
[](https://npmjs.com/package/magika)
[](https://pypi.python.org/pypi/magika)
[](https://pypi.python.org/pypi/magika)
[](https://pkg.go.dev/github.com/google/magika/go)
<!-- [](https://scorecard.dev/viewer/?uri=github.com/google/magika) -->
[](https://www.bestpractices.dev/en/projects/8706)

[](https://github.com/google/magika/actions)
[](https://pepy.tech/projects/magika)
[](https://pepy.tech/projects/magika)
Magika is a novel AI-powered file type detection tool that relies on the recent advance of deep learning to provide accurate detection. Under the hood, Magika employs a custom, highly optimized model that only weighs about a few MBs, and enables precise file identification within milliseconds, even when running on a single CPU. Magika has been trained and evaluated on a dataset of ~100M samples across 200+ content types (covering both binary and textual file formats), and it achieves an average ~99% accuracy on our test set.
Here is an example of what Magika command line output looks like:
<p align="center">
<img src="./assets/magika-screenshot.png" width="600">
</p>
Magika is used at scale to help improve Google users' safety by routing Gmail, Drive, and Safe Browsing files to the proper security and content policy scanners, processing hundreds billions samples on a weekly basis. Magika has also been integrated with [VirusTotal](https://www.virustotal.com/) ([example](./assets/magika-vt.png)) and [abuse.ch](https://bazaar.abuse.ch/) ([example](./assets/magika-abusech.png)).
For more context you can read our initial [announcement post on Google's OSS blog](https://opensource.googleblog.com/2024/02/magika-ai-powered-fast-and-efficient-file-type-identification.html), you can consult [Magika's website](https://securityresearch.google/magika/), and you can read more in our [research paper](https://securityresearch.google/magika/additional-resources/research-papers-and-citation/), published at the IEEE/ACM International Conference on Software Engineering (ICSE) 2025.
You can try Magika without installing anything by using our [web demo](https://securityresearch.google/magika/demo/magika-demo/), which runs locally in your browser!
# Highlights
- Available as a command line tool written in Rust, a Python API, and additional bindings for Rust, JavaScript/TypeScript (with an experimental npm package, which powers our [web demo](https://securityresearch.google/magika/demo/magika-demo/)), and GoLang (WIP).
- Trained and evaluated on a dataset of ~100M files across [200+ content types](./assets/models/standard_v3_3/README.md).
- On our test set, Magika achieves ~99% average precision and recall, outperforming existing approaches -- especially on textual content types.
- After the model is loaded (which is a one-off overhead), the inference time is about 5ms per file, even when run on a single CPU.
- You can invoke Magika with even thousands of files at the same time. You can also use `-r` for recursively scanning a directory.
- Near-constant inference time, independently from the file size; Magika only uses a limited subset of the file's content.
- Magika uses a per-content-type threshold system that determines whether to "trust" the prediction for the model, or whether to return a generic label, such as "Generic text document" or "Unknown binary data".
- The tolerance to errors can be controlled via different prediction modes, such as `high-confidence`, `medium-confidence`, and `best-guess`.
- The client and the bindings are already open source, and more is coming soon!
# Table of Contents
1. [Getting Started](#getting-started)
1. [Installation](#installation)
1. [Quick Start](#quick-start)
1. [Documentation](#documentation)
1. [Security Vulnerabilities](#security-vulnerabilities)
1. [License](#license)
1. [Disclaimer](#disclaimer)
# Getting Started
## Installation
### Command Line Tool
Magika ships a CLI written in Rust, and can be installed in several ways.
Via `magika` python package:
```shell
pipx install magika
```
Via brew (macOS / Linux)
```shell
brew install magika
```
Via installer script:
```shell
curl -LsSf https://securityresearch.google/magika/install.sh | sh
```
or:
```shell
powershell -ExecutionPolicy Bypass -c "irm https://securityresearch.google/magika/install.ps1 | iex"
```
Via `magika-cli` Rust package:
```shell
cargo install --locked magika-cli
```
### Python package
```shell
pip install magika
```
### JavaScript package
```shell
npm install magika
```
## Quick Start
Here you can find a number of quick examples just to get you started.
To learn about Magika's inner workings, see the [Core Concepts](https://securityresearch.google/magika/core-concepts/) section of Magika's website.
### Command Line Tool Examples
```shell
% cd tests_data/basic && magika -r * | head
asm/code.asm: Assembly (code)
batch/simple.bat: DOS batch file (code)
c/code.c: C source (code)
css/code.css: CSS source (code)
csv/magika_test.csv: CSV document (code)
dockerfile/Dockerfile: Dockerfile (code)
docx/doc.docx: Microsoft Word 2007+ document (document)
docx/magika_test.docx: Microsoft Word 2007+ document (document)
eml/sample.eml: RFC 822 mail (text)
empty/empty_file: Empty file (inode)
```
```shell
% magika ./tests_data/basic/python/code.py --json
[
{
"path": "./tests_data/basic/python/code.py",
"result": {
"status": "ok",
"value": {
"dl": {
"description": "Python source",
"extensions": [
"py",
"pyi"
],
"group": "code",
"is_text": true,
"label": "python",
"mime_type": "text/x-python"
},
"output": {
"description": "Python source",
"extensions": [
"py",
"pyi"
],
"group": "code",
"is_text": true,
"label": "python",
"mime_type": "text/x-python"
},
"score": 0.996999979019165
}
}
}
]
```
```shell
% cat tests_data/basic/ini/doc.ini | magika -
-: INI configuration file (text)
```
```shell
% magika --help
Determines file content types using AI
Usage: magika [OPTIONS] [PATH]...
Arguments:
[PATH]...
List of paths to the files to analyze.
Use a dash (-) to read from standard input (can only be used once).
Options:
-r, --recursive
Identifies files within directories instead of identifying the directory itself
--no-dereference
Identifies symbolic links as is instead of identifying their content by following them
--colors
Prints with colors regardless of terminal support
--no-colors
Prints without colors regardless of terminal support
-s, --output-score
Prints the prediction score in addition to the content type
-i, --mime-type
Prints the MIME type instead of the content type description
-l, --label
Prints a simple label instead of the content type description
--json
Prints in JSON format
--jsonl
Prints in JSONL format
--format <CUSTOM>
Prints using a custom format (use --help for details).
The following placeholders are supported:
%p The file path
%l The unique label identifying the content type
%d The description of the content type
%g The group of the content type
%m The MIME type of the content type
%e Possible file extensions for the content type
%s The score of the content type for the file
%S The score of the content type for the file in percent
%b The model output if overruled (empty otherwise)
%% A literal %
-h, --help
Print help (see a summary with '-h')
-V, --version
Print version
```
For more examples and documentation about the CLI, see https://crates.io/crates/magika-cli.
### Python Examples
```python
>>> from magika import Magika
>>> m = Magika()
>>> res = m.identify_bytes(b'function log(msg) {console.log(msg);}')
>>> print(res.output.label)
javascript
```
```python
>>> from magika import Magika
>>> m = Magika()
>>> res = m.identify_path('./tests_data/basic/ini/doc.ini')
>>> print(res.output.label)
ini
```
```python
>>> from magika import Magika
>>> m = Magika()
>>> with open('./tests_data/basic/ini/doc.ini', 'rb') as f:
>>> res = m.identify_stream(f)
>>> print(res.output.label)
ini
```
For more examples and documentation about the Python module, see the [Python `Magika` module](https://securityresearch.google/magika/cli-and-bindings/python/) section.
# Documentation
Please consult [Magika's website](https://securityresearch.google/magika) for detailed documentation about:
- Core Concepts
- How Magika works
- Models & content types
- Prediction modes
- Understanding the output
- CLI & Bindings (Python module, JavaScript module, ...)
- Contributing
- FAQ
- ...
# Security Vulnerabilities
Please contact us directly at magika-dev@google.com.
# License
Apache 2.0; see [`LICENSE`](LICENSE) for details.
# Disclaimer
This project is not an official Google project. It is not supported by
Google and Google specifically disclaims all warranties as to its quality,
merchantability, or fitness for a particular purpose.
================================================
FILE: assets/content_types_kb.min.json
================================================
{"3gp":{"mime_type":"video/3gpp","group":"video","description":"3GPP multimedia file","extensions":["3gp"],"is_text":false},"3ds":{"mime_type":"application/octet-stream","group":"unknown","description":"Nintendo 3DS roms","extensions":["3ds"],"is_text":false},"3dsx":{"mime_type":"application/octet-stream","group":"unknown","description":"Nintendo 3DS homebrew","extensions":["3dsx"],"is_text":false},"3dsm":{"mime_type":"application/x-3ds","group":"image","description":"3D studio Max","extensions":["3ds"],"is_text":false},"3mf":{"mime_type":"application/vnd.ms-package.3dmanufacturing-3dmodel+xml","group":"image","description":"3D Manufacturing Format","extensions":["3mf"],"is_text":false},"abnf":{"mime_type":"text/plain","group":null,"description":"augmented Backus\u2013Naur form","extensions":["abnf"],"is_text":false},"ace":{"mime_type":"application/x-ace-compressed","group":"archive","description":"ACE archive","extensions":["ace"],"is_text":false},"ada":{"mime_type":"text/x-ada","group":"code","description":"ADA source","extensions":[],"is_text":false},"aff":{"mime_type":"text/plain","group":null,"description":"Hunspell Affix","extensions":["aff"],"is_text":true},"ai":{"mime_type":"application/pdf","group":"document","description":"Adobe Illustrator Artwork","extensions":["ai"],"is_text":false},"aidl":{"mime_type":"text/plain","group":null,"description":"Android Interface Definition Language","extensions":["aidl"],"is_text":true},"algol68":{"mime_type":null,"group":null,"description":null,"extensions":["a68"],"is_text":false},"ani":{"mime_type":"application/x-navi-animation","group":null,"description":"Animated cursor","extensions":["ani"],"is_text":false},"apk":{"mime_type":"application/vnd.android.package-archive","group":"executable","description":"Android package","extensions":["apk"],"is_text":false},"applebplist":{"mime_type":"application/x-bplist","group":"application","description":"Apple binary property list","extensions":["bplist","plist"],"is_text":false},"appledouble":{"mime_type":"multipart/appledouble","group":"unknown","description":"AppleDouble","extensions":[],"is_text":false},"appleplist":{"mime_type":"application/x-plist","group":"application","description":"Apple property list","extensions":["plist"],"is_text":true},"applesingle":{"mime_type":"application/applefile","group":"unknown","description":"AppleSingle","extensions":[],"is_text":false},"ar":{"mime_type":"application/x-archive","group":"archive","description":"AR Archive","extensions":[],"is_text":false},"arc":{"mime_type":"application/x-arc","group":"archive","description":"Arc","extensions":["arc"],"is_text":false},"arj":{"mime_type":"application/arj","group":"archive","description":"Arj","extensions":[],"is_text":false},"arrow":{"mime_type":"vnd.apache.arrow.file","group":null,"description":null,"extensions":[],"is_text":false},"asc":{"mime_type":"application/pgp-signature","group":"text","description":"PGP","extensions":["asc"],"is_text":true},"asd":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"au":{"mime_type":"audio/basic","group":"audio","description":"NeXT/Sun AU","extensions":["au"],"is_text":false},"asf":{"mime_type":"video/x-ms-wma","group":"application","description":"Microsoft Advanced Systems Format","extensions":["asf"],"is_text":false},"asm":{"mime_type":"text/x-asm","group":"code","description":"Assembly","extensions":["s","S","asm"],"is_text":true},"asp":{"mime_type":"text/html","group":"code","description":"ASP source","extensions":["aspx","asp"],"is_text":true},"autohotkey":{"mime_type":"text/plain","group":"code","description":"AutoHotKey script","extensions":[],"is_text":true},"autoit":{"mime_type":"text/plain","group":"code","description":"AutoIt script","extensions":["au3"],"is_text":true},"avi":{"mime_type":"video/x-msvideo","group":"video","description":"Audio Video Interleave","extensions":["avi"],"is_text":false},"avif":{"mime_type":"image/avif","group":"video","description":"AV1 Image File Format","extensions":["avif","avifs"],"is_text":false},"avro":{"mime_type":"application/x-avro-binary","group":null,"description":"Apache Avro binary","extensions":["avro"],"is_text":false},"awk":{"mime_type":"text/plain","group":"code","description":"Awk","extensions":["awk"],"is_text":true},"ax":{"mime_type":"application/x-dosexec","group":"executable","description":"Directshow filter","extensions":["ax"],"is_text":false},"batch":{"mime_type":"text/x-msdos-batch","group":"code","description":"DOS batch file","extensions":["bat"],"is_text":true},"bazel":{"mime_type":"text/plain","group":"code","description":"Bazel build file","extensions":["bzl"],"is_text":true},"bcad":{"mime_type":"application/octet-stream","group":"document","description":"bCAD Drawing","extensions":["bdf"],"is_text":false},"bib":{"mime_type":"text/x-bibtex","group":"text","description":"BibTeX","extensions":["bib"],"is_text":true},"bmp":{"mime_type":"image/bmp","group":"image","description":"BMP image data","extensions":["bmp"],"is_text":false},"bpg":{"mime_type":"image/bpg","group":"image","description":"BPG","extensions":["bpg"],"is_text":false},"bpl":{"mime_type":null,"group":"unknown","description":null,"extensions":["bpl"],"is_text":false},"brainfuck":{"mime_type":"text/x-brainfuck","group":"code","description":"Brainfuck source","extensions":["b","bf"],"is_text":true},"brf":{"mime_type":"text/plain","group":"text","description":"Braille Ready Format","extensions":["brf","bfm"],"is_text":false},"bzip":{"mime_type":"application/x-bzip2","group":"archive","description":"bzip2 compressed data","extensions":["bz2","tbz2","tar.bz2"],"is_text":false},"bzip3":{"mime_type":"application/x-bzip3","group":"archive","description":"bzip3 compressed data","extensions":["bz3"],"is_text":false},"c":{"mime_type":"text/x-c","group":"code","description":"C source","extensions":["c"],"is_text":true},"cab":{"mime_type":"application/vnd.ms-cab-compressed","group":"archive","description":"Microsoft Cabinet archive data","extensions":["cab"],"is_text":false},"cad":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"cat":{"mime_type":"application/octet-stream","group":"application","description":"Windows Catalog file","extensions":["cat"],"is_text":false},"cdf":{"mime_type":null,"group":"archive","description":null,"extensions":[],"is_text":false},"chm":{"mime_type":"application/chm","group":"application","description":"MS Windows HtmlHelp Data","extensions":["chm"],"is_text":false},"clojure":{"mime_type":"text/x-clojure","group":"code","description":"Clojure","extensions":["clj","cljs","cljc","cljr"],"is_text":true},"cmake":{"mime_type":"text/x-cmake","group":"code","description":"CMake build file","extensions":["cmake"],"is_text":true},"cobol":{"mime_type":"text/x-cobol","group":"code","description":"Cobol","extensions":["cbl","cob","cpy","CBL","COB","CPY"],"is_text":true},"coff":{"mime_type":"application/x-coff","group":"executable","description":"Intel 80386 COFF","extensions":["obj","o"],"is_text":false},"coffeescript":{"mime_type":"text/coffeescript","group":"code","description":"CoffeeScript","extensions":["coffee"],"is_text":true},"com":{"mime_type":"application/x-dosexec","group":"executable","description":null,"extensions":[],"is_text":false},"cpl":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["cpl"],"is_text":false},"cpp":{"mime_type":"text/x-c","group":"code","description":"C++ source","extensions":["cc","cpp","cxx","c++","cppm","ixx"],"is_text":true},"crt":{"mime_type":"application/x-x509-ca-cert","group":"text","description":"Certificates (binary format)","extensions":["der","cer","crt"],"is_text":false},"crx":{"mime_type":"application/x-chrome-extension","group":"executable","description":"Google Chrome extension","extensions":["crx"],"is_text":false},"cs":{"mime_type":"text/plain","group":"code","description":"C# source","extensions":["cs","csx"],"is_text":true},"csproj":{"mime_type":"text/plain","group":"code","description":".NET project config","extensions":["csproj"],"is_text":true},"css":{"mime_type":"text/css","group":"code","description":"CSS source","extensions":["css"],"is_text":true},"csv":{"mime_type":"text/csv","group":"code","description":"CSV document","extensions":["csv"],"is_text":true},"ctl":{"mime_type":"application/octet-stream","group":null,"description":null,"extensions":[],"is_text":false},"dart":{"mime_type":"text/plain","group":"code","description":"Dart source","extensions":["dart"],"is_text":true},"deb":{"mime_type":"application/vnd.debian.binary-package","group":"archive","description":"Debian binary package","extensions":["deb"],"is_text":false},"dex":{"mime_type":"application/x-android-dex","group":"executable","description":"Dalvik dex file","extensions":["dex"],"is_text":false},"dey":{"mime_type":"application/x-android-dey","group":"executable","description":"Dalvik dex file","extensions":[],"is_text":false},"dicom":{"mime_type":"application/dicom","group":"image","description":"DICOM","extensions":["dcm"],"is_text":false},"diff":{"mime_type":"text/plain","group":"text","description":"Diff file","extensions":["diff","patch"],"is_text":true},"directory":{"mime_type":"inode/directory","group":"inode","description":"A directory","extensions":[],"is_text":false},"django":{"mime_type":"text/x-django","group":"code","description":"Django source","extensions":[],"is_text":false},"dll":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["dll"],"is_text":false},"dm":{"mime_type":"text/plain","group":"code","description":"Dream Maker","extensions":["dm"],"is_text":true},"dmigd":{"mime_type":"text/plain","group":"text","description":"Dominion Mods","extensions":["dm"],"is_text":true},"dmg":{"mime_type":"application/x-apple-diskimage","group":"archive","description":"Apple disk image","extensions":["dmg"],"is_text":false},"dmscript":{"mime_type":"text/plain","group":"code","description":"Digital Micrograph Script","extensions":["s"],"is_text":true},"doc":{"mime_type":"application/msword","group":"document","description":"Microsoft Word CDF document","extensions":["doc"],"is_text":false},"dockerfile":{"mime_type":"text/x-dockerfile","group":"code","description":"Dockerfile","extensions":[],"is_text":true},"docx":{"mime_type":"application/vnd.openxmlformats-officedocument.wordprocessingml.document","group":"document","description":"Microsoft Word 2007+ document","extensions":["docx","docm"],"is_text":false},"dosmbr":{"mime_type":"application/octet-stream","group":null,"description":"Master boot record","extensions":[],"is_text":false},"dotx":{"mime_type":"application/vnd.openxmlformats-officedocument.wordprocessingml.template","group":"document","description":"Office Word 2007 template","extensions":["dotx"],"is_text":false},"dsstore":{"mime_type":"application/octet-stream","group":"unknown","description":"Application Desktop Services Store","extensions":[],"is_text":false},"dwg":{"mime_type":"image/x-dwg","group":"image","description":"Autocad Drawing","extensions":["dwg"],"is_text":false},"dxf":{"mime_type":"image/vnd.dxf","group":"image","description":"Audocad Drawing Exchange Format","extensions":["dxf"],"is_text":true},"dylib":{"mime_type":"application/x-mach-o","group":"executable","description":"Mach-O executable","extensions":["dylib"],"is_text":false},"ebml":{"mime_type":"application/octet-stream","group":"unknown","description":"Extensible Binary Meta Language","extensions":[],"is_text":false},"elf":{"mime_type":"application/x-executable-elf","group":"executable","description":"ELF executable","extensions":["elf"],"is_text":false},"elixir":{"mime_type":"text/plain","group":"code","description":"Elixir script","extensions":["exs"],"is_text":true},"emf":{"mime_type":"application/octet-stream","group":"application","description":"Windows Enhanced Metafile image data","extensions":["emf"],"is_text":false},"eml":{"mime_type":"message/rfc822","group":"text","description":"RFC 822 mail","extensions":["eml"],"is_text":true},"empty":{"mime_type":"inode/x-empty","group":"inode","description":"Empty file","extensions":[],"is_text":false},"epub":{"mime_type":"application/epub+zip","group":"document","description":"EPUB document","extensions":["epub"],"is_text":false},"erb":{"mime_type":"text/x-ruby","group":"code","description":"Embedded Ruby source","extensions":["erb"],"is_text":true},"erlang":{"mime_type":"text/x-erlang","group":"code","description":"Erlang source","extensions":["erl","hrl"],"is_text":true},"ese":{"mime_type":"application/x-ms-ese","group":null,"description":"ESE Db","extensions":["dat"],"is_text":false},"exe":{"mime_type":"application/x-dosexec","group":"executable","description":"PE executable","extensions":["exe"],"is_text":false},"exp":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"flac":{"mime_type":"audio/flac","group":"audio","description":"FLAC audio bitstream data","extensions":["flac"],"is_text":false},"flutter":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"flv":{"mime_type":"video/x-flv","group":"video","description":"Flash Video","extensions":["flv"],"is_text":false},"fortran":{"mime_type":"text/x-fortran","group":"document","description":"Fortran","extensions":["f90","f95","f03","F90"],"is_text":true},"fpx":{"mime_type":null,"group":"image","description":"Flashpix","extensions":["fpx"],"is_text":false},"gemfile":{"mime_type":"text/plain","group":"code","description":"Gemfile file","extensions":[],"is_text":true},"gemspec":{"mime_type":"text/plain","group":"code","description":"Gemspec file","extensions":["gemspec"],"is_text":true},"gif":{"mime_type":"image/gif","group":"image","description":"GIF image data","extensions":["gif"],"is_text":false},"gitattributes":{"mime_type":"text/plain","group":"code","description":"Gitattributes file","extensions":[],"is_text":true},"gitmodules":{"mime_type":"text/plain","group":"code","description":"Gitmodules file","extensions":[],"is_text":true},"gleam":{"mime_type":null,"group":"code","description":"Gleam source","extensions":["gleam"],"is_text":true},"go":{"mime_type":"text/x-golang","group":"code","description":"Golang source","extensions":["go"],"is_text":true},"gpx":{"mime_type":null,"group":null,"description":"XML document","extensions":["gpx"],"is_text":false},"gradle":{"mime_type":"text/x-groovy","group":"code","description":"Gradle source","extensions":["gradle"],"is_text":true},"groovy":{"mime_type":"text/x-groovy","group":"code","description":"Groovy source","extensions":["groovy"],"is_text":true},"gzip":{"mime_type":"application/gzip","group":"archive","description":"gzip compressed data","extensions":["gz","gzip","tgz","tar.gz"],"is_text":false},"h":{"mime_type":"text/x-c","group":"code","description":"C header source","extensions":["h"],"is_text":true},"h5":{"mime_type":"application/x-hdf5","group":"archive","description":"Hierarchical Data Format v5","extensions":["h5","hdf5"],"is_text":false},"handlebars":{"mime_type":"text/x-handlebars-template","group":"code","description":"Handlebars source","extensions":["hbs","handlebars"],"is_text":true},"haskell":{"mime_type":"text/plain","group":"code","description":"Haskell source","extensions":["hs","lhs"],"is_text":true},"hcl":{"mime_type":"text/x-hcl","group":"code","description":"HashiCorp configuration language","extensions":["hcl"],"is_text":true},"heif":{"mime_type":"image/heic","group":"image","description":"High Efficiency Image File","extensions":["heif","heifs","heic","heics"],"is_text":false},"hfs":{"mime_type":"application/x-hfs","group":null,"description":null,"extensions":["hfs"],"is_text":false},"hlp":{"mime_type":"application/winhlp","group":"application","description":"MS Windows help","extensions":["hlp"],"is_text":false},"hpp":{"mime_type":"text/x-h","group":"code","description":null,"extensions":["hh","hpp","hxx","h++"],"is_text":true},"hta":{"mime_type":"application/hta","group":"code","description":"HTML Application","extensions":["hta"],"is_text":false},"htaccess":{"mime_type":"text/x-apache-conf","group":"code","description":"Apache access configuration","extensions":[],"is_text":true},"html":{"mime_type":"text/html","group":"code","description":"HTML document","extensions":["html","htm","xhtml","xht"],"is_text":true},"hve":{"mime_type":null,"group":"unknown","description":null,"extensions":[],"is_text":false},"hwp":{"mime_type":"application/x-hwp","group":"document","description":"Hangul Word Processor","extensions":["hwp"],"is_text":false},"icc":{"mime_type":"application/vnd.iccprofile","group":null,"description":"ICC profile","extensions":["icc"],"is_text":false},"icns":{"mime_type":"image/x-icns","group":"image","description":"Mac OS X icon","extensions":["icns"],"is_text":false},"ico":{"mime_type":"image/vnd.microsoft.icon","group":"image","description":"MS Windows icon resource","extensions":["ico"],"is_text":false},"ics":{"mime_type":"text/calendar","group":"application","description":"Internet Calendaring and Scheduling","extensions":["ics"],"is_text":true},"ignorefile":{"mime_type":"text/plain","group":"code","description":"Ignorefile","extensions":[],"is_text":true},"img":{"mime_type":null,"group":null,"description":null,"extensions":["img"],"is_text":false},"ini":{"mime_type":"text/plain","group":"text","description":"INI configuration file","extensions":["ini"],"is_text":true},"internetshortcut":{"mime_type":"application/x-mswinurl","group":"application","description":"MS Windows Internet shortcut","extensions":["url"],"is_text":true},"iosapp":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"ipynb":{"mime_type":"application/json","group":"code","description":"Jupyter notebook","extensions":["ipynb"],"is_text":true},"iso":{"mime_type":"application/x-iso9660-image","group":"archive","description":"ISO 9660 CD-ROM filesystem data","extensions":["iso"],"is_text":false},"jar":{"mime_type":"application/java-archive","group":"archive","description":"Java archive data (JAR)","extensions":["jar","klib"],"is_text":false},"java":{"mime_type":"text/x-java","group":"code","description":"Java source","extensions":["java"],"is_text":true},"javabytecode":{"mime_type":"application/x-java-applet","group":"executable","description":"Java compiled bytecode","extensions":["class"],"is_text":false},"javascript":{"mime_type":"application/javascript","group":"code","description":"JavaScript source","extensions":["js","mjs","cjs"],"is_text":true},"jinja":{"mime_type":"text/x-jinja2-template","group":"code","description":"Jinja template","extensions":["jinja","jinja2","j2"],"is_text":true},"jng":{"mime_type":"image/jng","group":"image","description":"JPEG network graphics","extensions":["jng"],"is_text":false},"jnlp":{"mime_type":"application/x-java-jnlp-file","group":"code","description":"Java Network Launch Protocol","extensions":["jnlp"],"is_text":true},"jp2":{"mime_type":"image/jpeg2000","group":"image","description":"jpeg2000","extensions":["jp2"],"is_text":false},"jpeg":{"mime_type":"image/jpeg","group":"image","description":"JPEG image data","extensions":["jpg","jpeg"],"is_text":false},"json":{"mime_type":"application/json","group":"code","description":"JSON document","extensions":["json"],"is_text":true},"jsonc":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"jsonl":{"mime_type":"application/json","group":"code","description":"JSONL document","extensions":["jsonl","jsonld"],"is_text":true},"jsx":{"mime_type":"application/javascript","group":"code","description":"JSX source","extensions":["jsx","mjsx","cjsx"],"is_text":true},"julia":{"mime_type":"text/x-julia","group":"code","description":"Julia source","extensions":["jl"],"is_text":true},"jxl":{"mime_type":"image/jxl","group":"image","description":"JPEG XL","extensions":["jxl"],"is_text":false},"ko":{"mime_type":"application/x-executable-elf","group":"executable","description":"ELF executable, kernel object","extensions":["ko"],"is_text":false},"kotlin":{"mime_type":"text/plain","group":"code","description":"Kotlin source","extensions":["kt","kts"],"is_text":true},"ks":{"mime_type":null,"group":null,"description":"Tyrano","extensions":["ks"],"is_text":true},"latex":{"mime_type":"text/x-tex","group":"text","description":"LaTeX document","extensions":["tex","sty"],"is_text":true},"latexaux":{"mime_type":null,"group":null,"description":null,"extensions":["aux"],"is_text":false},"less":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"lha":{"mime_type":"application/x-lha","group":"archive","description":"LHarc archive","extensions":["lha","lzh"],"is_text":false},"license":{"mime_type":"text/plain","group":"text","description":"License file","extensions":[],"is_text":true},"lisp":{"mime_type":"text/x-lisp","group":"code","description":"Lisp source","extensions":["lisp","lsp","l","cl"],"is_text":true},"litcs":{"mime_type":null,"group":null,"description":"Literate CS","extensions":["litcoffee"],"is_text":false},"lnk":{"mime_type":"application/x-ms-shortcut","group":"application","description":"MS Windows shortcut","extensions":["lnk"],"is_text":false},"lock":{"mime_type":"text/plain","group":"application","description":"Lock file","extensions":["lock"],"is_text":true},"lrz":{"mime_type":"application/x-lrzip","group":null,"description":"LRZip","extensions":["lrz"],"is_text":false},"lua":{"mime_type":"text/plain","group":"code","description":"Lua","extensions":["lua"],"is_text":true},"lz":{"mime_type":"application/x-lzip","group":"archive","description":"LZip","extensions":["lz"],"is_text":false},"lz4":{"mime_type":"application/x-lz4","group":"archive","description":"LZ4","extensions":["lz4"],"is_text":false},"lzx":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"m3u":{"mime_type":"text/plain","group":"application","description":"M3U playlist","extensions":["m3u8","m3u"],"is_text":true},"m4":{"mime_type":"text/plain","group":"code","description":"GNU Macro","extensions":["m4"],"is_text":true},"macho":{"mime_type":"application/x-mach-o","group":"executable","description":"Mach-O executable","extensions":[],"is_text":false},"maff":{"mime_type":"application/x-maff","group":null,"description":null,"extensions":["maff"],"is_text":false},"makefile":{"mime_type":"text/x-makefile","group":"code","description":"Makefile source","extensions":[],"is_text":true},"markdown":{"mime_type":"text/markdown","group":"text","description":"Markdown document","extensions":["md","markdown"],"is_text":true},"matlab":{"mime_type":"text/x-matlab","group":"code","description":"Matlab Source","extensions":["m","matlab"],"is_text":true},"mht":{"mime_type":"application/x-mimearchive","group":"code","description":"MHTML document","extensions":["mht"],"is_text":true},"midi":{"mime_type":"audio/midi","group":"audio","description":"Midi","extensions":["mid"],"is_text":false},"mkv":{"mime_type":"video/x-matroska","group":"video","description":"Matroska","extensions":["mkv"],"is_text":false},"mp2":{"mime_type":null,"group":null,"description":"MP2 stream","extensions":["mp2"],"is_text":false},"mp3":{"mime_type":"audio/mpeg","group":"audio","description":"MP3 media file","extensions":["mp3"],"is_text":false},"mp4":{"mime_type":"video/mp4","group":"video","description":"MP4 media file","extensions":["mp4"],"is_text":false},"mpegts":{"mime_type":"video/MP2T","group":"video","description":"MPEG Transport stream","extensions":["ts","tsv","tsa","m2t"],"is_text":false},"mscompress":{"mime_type":"application/x-ms-compress-szdd","group":"archive","description":"MS Compress archive data","extensions":[],"is_text":false},"msi":{"mime_type":"application/x-msi","group":"archive","description":"Microsoft Installer file","extensions":["msi"],"is_text":false},"msix":{"mime_type":"application/msix","group":"application","description":"Windows app package","extensions":["msix"],"is_text":false},"mst":{"mime_type":null,"group":null,"description":null,"extensions":["mst"],"is_text":false},"mui":{"mime_type":"application/x-dosexec","group":"application","description":"PE Windows executable","extensions":["mui"],"is_text":false},"mum":{"mime_type":"text/xml","group":"application","description":"Windows Update Package file","extensions":["mum"],"is_text":true},"mun":{"mime_type":null,"group":null,"description":null,"extensions":["mun"],"is_text":false},"nim":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"npy":{"mime_type":"application/octet-stream","group":"archive","description":"Numpy Array","extensions":["npy"],"is_text":false},"npz":{"mime_type":"application/octet-stream","group":"archive","description":"Numpy Arrays Archive","extensions":["npz"],"is_text":false},"null":{"mime_type":null,"group":null,"description":null,"extensions":["null"],"is_text":false},"nupkg":{"mime_type":"application/octet-stream","group":null,"description":"NuGet Package","extensions":["nupkg"],"is_text":false},"object":{"mime_type":null,"group":null,"description":null,"extensions":["o"],"is_text":false},"objectivec":{"mime_type":"text/x-objcsrc","group":"code","description":"ObjectiveC source","extensions":["m","mm"],"is_text":true},"ocaml":{"mime_type":"text-ocaml","group":"code","description":"OCaml","extensions":["ml","mli"],"is_text":true},"ocx":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["ocx"],"is_text":false},"odex":{"mime_type":"application/x-executable-elf","group":"executable","description":"ODEX ELF executable","extensions":["odex"],"is_text":false},"odin":{"mime_type":null,"group":"code","description":"Odin","extensions":["odin"],"is_text":true},"odp":{"mime_type":"application/vnd.oasis.opendocument.presentation","group":"document","description":"OpenDocument Presentation","extensions":["odp"],"is_text":false},"ods":{"mime_type":"application/vnd.oasis.opendocument.spreadsheet","group":"document","description":"OpenDocument Spreadsheet","extensions":["ods"],"is_text":false},"odt":{"mime_type":"application/vnd.oasis.opendocument.text","group":"document","description":"OpenDocument Text","extensions":["odt"],"is_text":false},"ogg":{"mime_type":"audio/ogg","group":"audio","description":"Ogg data","extensions":["ogg"],"is_text":false},"ole":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"one":{"mime_type":"application/msonenote","group":"document","description":"One Note","extensions":["one"],"is_text":false},"onnx":{"mime_type":"application/octet-stream","group":"archive","description":"Open Neural Network Exchange","extensions":["onnx"],"is_text":false},"ooxml":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"otf":{"mime_type":"font/otf","group":"font","description":"OpenType font","extensions":["otf"],"is_text":false},"outlook":{"mime_type":"application/vnd.ms-outlook","group":"application","description":"MS Outlook Message","extensions":[],"is_text":false},"palmos":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"parquet":{"mime_type":"application/vnd.apache.parquet","group":"unknown","description":"Apache Parquet","extensions":["pqt","parquet"],"is_text":false},"pascal":{"mime_type":"text/x-pascal","group":"code","description":"Pascal source","extensions":["pas","pp"],"is_text":true},"pbm":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"pcap":{"mime_type":"application/vnd.tcpdump.pcap","group":"application","description":"pcap capture file","extensions":["pcap","pcapng"],"is_text":false},"pdb":{"mime_type":"application/octet-stream","group":"application","description":"Windows Program Database","extensions":["pdb"],"is_text":false},"pdf":{"mime_type":"application/pdf","group":"document","description":"PDF document","extensions":["pdf"],"is_text":false},"pebin":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["exe","dll"],"is_text":false},"pem":{"mime_type":"application/x-pem-file","group":"application","description":"PEM certificate","extensions":["pem","pub","gpg"],"is_text":true},"perl":{"mime_type":"text/x-perl","group":"code","description":"Perl source","extensions":["pl"],"is_text":true},"pgp":{"mime_type":"application/pgp-keys","group":null,"description":"PGP","extensions":["gpg","pgp"],"is_text":false},"php":{"mime_type":"text/x-php","group":"code","description":"PHP source","extensions":["php"],"is_text":true},"pickle":{"mime_type":"application/octet-stream","group":"application","description":"Python pickle","extensions":["pickle","pkl"],"is_text":false},"png":{"mime_type":"image/png","group":"image","description":"PNG image","extensions":["png"],"is_text":false},"po":{"mime_type":"text/gettext-translation","group":"application","description":"Portable Object (PO) for i18n","extensions":["po"],"is_text":true},"postscript":{"mime_type":"application/postscript","group":"document","description":"PostScript document","extensions":["ps"],"is_text":false},"powershell":{"mime_type":"application/x-powershell","group":"code","description":"Powershell source","extensions":["ps1"],"is_text":true},"ppt":{"mime_type":"application/vnd.ms-powerpoint","group":"document","description":"Microsoft PowerPoint CDF document","extensions":["ppt"],"is_text":false},"pptx":{"mime_type":"application/vnd.openxmlformats-officedocument.presentationml.presentation","group":"document","description":"Microsoft PowerPoint 2007+ document","extensions":["pptx","pptm"],"is_text":false},"printfox":{"mime_type":null,"group":null,"description":"c64","extensions":[],"is_text":false},"prolog":{"mime_type":"text/x-prolog","group":"code","description":"Prolog source","extensions":["pl","pro","P"],"is_text":true},"proteindb":{"mime_type":"application/octet-stream","group":"application","description":"Protein DB","extensions":["pdb"],"is_text":true},"proto":{"mime_type":"text/x-proto","group":"code","description":"Protocol buffer definition","extensions":["proto"],"is_text":true},"protobuf":{"mime_type":"application/protobuf","group":"unknown","description":"Protocol buffers","extensions":["protobuf","pb"],"is_text":false},"psd":{"mime_type":"image/vnd.adobe.photoshop","group":"image","description":"Adobe Photoshop","extensions":["psd"],"is_text":false},"pytorch":{"mime_type":"application/octet-stream","group":"application","description":"Pytorch storage file","extensions":["pt","pth"],"is_text":false},"pub":{"mime_type":"application/x-mspublisher","group":null,"description":null,"extensions":["pub"],"is_text":false},"python":{"mime_type":"text/x-python","group":"code","description":"Python source","extensions":["py","pyi"],"is_text":true},"pythonbytecode":{"mime_type":"application/x-bytecode.python","group":"executable","description":"Python compiled bytecode","extensions":["pyc","pyo"],"is_text":false},"pythonpar":{"mime_type":null,"group":null,"description":null,"extensions":["par"],"is_text":false},"qoi":{"mime_type":"image/x-qoi","group":"image","description":"Quite Ok Image","extensions":["qoi"],"is_text":false},"qt":{"mime_type":"video/quicktime","group":"video","description":"QuickTime","extensions":["mov"],"is_text":false},"r":{"mime_type":"text/x-R","group":"code","description":"R (language)","extensions":["R"],"is_text":true},"randomascii":{"mime_type":"text/plain","group":"text","description":"Random ASCII characters","extensions":[],"is_text":true},"randombytes":{"mime_type":"application/octet-stream","group":"unknown","description":"Random bytes","extensions":[],"is_text":false},"randomtxt":{"mime_type":"text/plain","group":"text","description":"Random text","extensions":[],"is_text":true},"rar":{"mime_type":"application/x-rar","group":"archive","description":"RAR archive data","extensions":["rar"],"is_text":false},"rdf":{"mime_type":"application/rdf+xml","group":"text","description":"Resource Description Framework document (RDF)","extensions":["rdf"],"is_text":true},"rdp":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"riff":{"mime_type":"application/x-riff","group":null,"description":null,"extensions":[],"is_text":false},"rlib":{"mime_type":"application/x-archive","group":"archive","description":"rust library","extensions":["rlib"],"is_text":false},"rll":{"mime_type":null,"group":"executable","description":"Resource Library","extensions":["rll"],"is_text":false},"rpm":{"mime_type":"application/x-rpm","group":"archive","description":"RedHat Package Manager archive (RPM)","extensions":["rpm"],"is_text":false},"rst":{"mime_type":"text/x-rst","group":"text","description":"ReStructuredText document","extensions":["rst"],"is_text":true},"rtf":{"mime_type":"text/rtf","group":"text","description":"Rich Text Format document","extensions":["rtf"],"is_text":true},"ruby":{"mime_type":"application/x-ruby","group":"code","description":"Ruby source","extensions":["rb"],"is_text":true},"rust":{"mime_type":"application/x-rust","group":"code","description":"Rust source","extensions":["rs"],"is_text":true},"rzip":{"mime_type":null,"group":null,"description":"Rzip","extensions":["rz"],"is_text":false},"scala":{"mime_type":"application/x-scala","group":"code","description":"Scala source","extensions":["scala"],"is_text":true},"scheme":{"mime_type":"text/x-scheme","group":"code","description":null,"extensions":["scm","ss"],"is_text":false},"scr":{"mime_type":"application/x-dosexec","group":"executable","description":"PE Windows executable","extensions":["scr"],"is_text":false},"scriptwsf":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"scss":{"mime_type":"text/x-scss","group":"code","description":"SCSS source","extensions":["scss"],"is_text":true},"sevenzip":{"mime_type":"application/x-7z-compressed","group":"archive","description":"7-zip archive data","extensions":["7z"],"is_text":false},"sgml":{"mime_type":"application/sgml","group":"text","description":"sgml","extensions":["sgml"],"is_text":true},"sh3d":{"mime_type":null,"group":null,"description":null,"extensions":["sh3d"],"is_text":false},"shell":{"mime_type":"text/x-shellscript","group":"code","description":"Shell script","extensions":["sh"],"is_text":true},"smali":{"mime_type":"application/x-smali","group":"code","description":"Smali source","extensions":["smali"],"is_text":true},"snap":{"mime_type":"application/octet-stream","group":"archive","description":"Snap archive","extensions":["snap"],"is_text":false},"so":{"mime_type":"application/x-executable-elf","group":"executable","description":"ELF executable, shared library","extensions":["so"],"is_text":false},"solidity":{"mime_type":null,"group":"code","description":"Solidity source","extensions":["sol"],"is_text":true},"sql":{"mime_type":"application/x-sql","group":"code","description":"SQL source","extensions":["sql"],"is_text":true},"sqlite":{"mime_type":null,"group":"application","description":"SQLITE database","extensions":["sqlite","sqlite3"],"is_text":false},"squashfs":{"mime_type":"application/octet-stream","group":"archive","description":"Squash filesystem","extensions":[],"is_text":false},"srt":{"mime_type":"text/srt","group":"application","description":"SubRip Text Format","extensions":["srt"],"is_text":true},"stlbinary":{"mime_type":"application/sla","group":"image","description":"Stereolithography CAD (binary)","extensions":["stl"],"is_text":false},"stltext":{"mime_type":"application/sla","group":"image","description":"Stereolithography CAD (text)","extensions":["stl"],"is_text":true},"sum":{"mime_type":null,"group":"application","description":"Checksum file","extensions":["sum"],"is_text":true},"svd":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"svg":{"mime_type":"image/svg+xml","group":"image","description":"SVG Scalable Vector Graphics image data","extensions":["svg"],"is_text":true},"swf":{"mime_type":"application/x-shockwave-flash","group":"executable","description":"Small Web File","extensions":["swf"],"is_text":false},"swift":{"mime_type":"text/x-swift","group":"code","description":"Swift","extensions":["swift"],"is_text":true},"symlink":{"mime_type":"inode/symlink","group":"inode","description":"Symbolic link","extensions":[],"is_text":false},"symlinktext":{"mime_type":"text/plain","group":"application","description":"Symbolic link (textual representation)","extensions":[],"is_text":true},"sys":{"mime_type":"application/x-windows-driver","group":"executable","description":"PE Windows executable","extensions":["sys"],"is_text":false},"tar":{"mime_type":"application/x-tar","group":"archive","description":"POSIX tar archive","extensions":["tar"],"is_text":false},"tcl":{"mime_type":"application/x-tcl","group":"code","description":"Tickle","extensions":["tcl"],"is_text":true},"textproto":{"mime_type":"text/plain","group":"code","description":"Text protocol buffer","extensions":["textproto","textpb","pbtxt"],"is_text":true},"tga":{"mime_type":"image/x-tga","group":"image","description":"Targa image data","extensions":["tga"],"is_text":false},"thumbsdb":{"mime_type":"image/vnd.ms-thumb","group":"application","description":"Windows thumbnail cache","extensions":[],"is_text":false},"tiff":{"mime_type":"image/tiff","group":"image","description":"TIFF image data","extensions":["tiff","tif"],"is_text":false},"tmdx":{"mime_type":null,"group":null,"description":null,"extensions":["tmdx","tmvx"],"is_text":false},"toml":{"mime_type":"application/toml","group":"text","description":"Tom's obvious, minimal language","extensions":["toml"],"is_text":true},"torrent":{"mime_type":"application/x-bittorrent","group":"application","description":"BitTorrent file","extensions":["torrent"],"is_text":false},"troff":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"tsv":{"mime_type":"text/tsv","group":"code","description":"TSV document","extensions":["tsv"],"is_text":true},"tsx":{"mime_type":"text/x-typescript","group":"code","description":"TSX source","extensions":["tsx","mtsx","ctsx"],"is_text":true},"ttf":{"mime_type":"font/sfnt","group":"font","description":"TrueType Font data","extensions":["ttf","ttc"],"is_text":false},"twig":{"mime_type":"text/x-twig","group":"code","description":"Twig template","extensions":["twig"],"is_text":true},"txt":{"mime_type":"text/plain","group":"text","description":"Generic text document","extensions":["txt"],"is_text":true},"txtascii":{"mime_type":"text/plain","group":"text","description":"Generic text document encoded in ASCII","extensions":["txt"],"is_text":true},"txtutf16":{"mime_type":"text/plain","group":"text","description":"Generic text document encoded in UTF-16","extensions":["txt"],"is_text":true},"txtutf8":{"mime_type":"text/plain","group":"text","description":"Generic text document encoded in UTF-8","extensions":["txt"],"is_text":true},"typescript":{"mime_type":"application/typescript","group":"code","description":"TypeScript source","extensions":["ts","mts","cts"],"is_text":true},"udf":{"mime_type":"application/x-udf-image","group":null,"description":"Universal Disc Format","extensions":[],"is_text":false},"undefined":{"mime_type":"application/undefined","group":"undefined","description":"Undefined","extensions":[],"is_text":false},"unixcompress":{"mime_type":"application/x-compress","group":null,"description":null,"extensions":["z"],"is_text":false},"unknown":{"mime_type":"application/octet-stream","group":"unknown","description":"Unknown binary data","extensions":[],"is_text":false},"vba":{"mime_type":"text/vbscript","group":"code","description":"MS Visual Basic source (VBA)","extensions":["vbs","vba","vb"],"is_text":true},"vbe":{"mime_type":null,"group":"code","description":"EncryptedVBS","extensions":["vbe"],"is_text":false},"vcard":{"mime_type":"text/vcard","group":null,"description":null,"extensions":["vcard"],"is_text":false},"vcs":{"mime_type":null,"group":null,"description":null,"extensions":[],"is_text":false},"vcxproj":{"mime_type":"application/xml","group":"code","description":"Visual Studio MSBuild project","extensions":["vcxproj"],"is_text":true},"verilog":{"mime_type":"text/x-verilog","group":"code","description":"Verilog source","extensions":["v","verilog","vlg","vh"],"is_text":true},"vhd":{"mime_type":"application/x-vhd","group":null,"description":"Virtual Hard Disk","extensions":[],"is_text":false},"vhdl":{"mime_type":"text/x-vhdl","group":"code","description":"VHDL source","extensions":["vhd"],"is_text":true},"visio":{"mime_type":"application/vnd.ms-visio.drawing.main+xml","group":"document","description":"Microsoft Visio","extensions":["vsd","vsdm","vsdx","vdw"],"is_text":false},"vtt":{"mime_type":"text/vtt","group":"text","description":"Web Video Text Tracks","extensions":["vtt","webvtt"],"is_text":true},"vue":{"mime_type":"application/javascript","group":"code","description":"Vue source","extensions":["vue"],"is_text":true},"wad":{"mime_type":"application/wad","group":"archive","description":"WAD","extensions":["wad"],"is_text":false},"wasm":{"mime_type":"application/wasm","group":"executable","description":"Web Assembly","extensions":["wasm"],"is_text":false},"wav":{"mime_type":"audio/x-wav","group":"audio","description":"Waveform Audio file (WAV)","extensions":["wav"],"is_text":false},"webm":{"mime_type":"video/webm","group":"video","description":"WebM media file","extensions":["webm"],"is_text":false},"webp":{"mime_type":"image/webp","group":"image","description":"WebP media file","extensions":["webp"],"is_text":false},"webtemplate":{"mime_type":"text/plain","group":"code","description":"Web templating language","extensions":[],"is_text":true},"wim":{"mime_type":"application/x-ms-wim","group":"unknown","description":"Windows Imaging Format","extensions":["wim","swm","esd"],"is_text":false},"winregistry":{"mime_type":"text/x-ms-regedit","group":"application","description":"Windows Registry text","extensions":["reg"],"is_text":true},"wma":{"mime_type":"audio/x-ms-wma","group":"audio","description":"Windows Media Audio","extensions":["wma"],"is_text":false},"wmf":{"mime_type":"image/wmf","group":"image","description":"Windows metafile","extensions":["wmf"],"is_text":false},"wmv":{"mime_type":"video/x-ms-wmv","group":"video","description":"Windows Media Video","extensions":["wmv"],"is_text":false},"woff":{"mime_type":"font/woff","group":"font","description":"Web Open Font Format","extensions":["woff"],"is_text":false},"woff2":{"mime_type":"font/woff2","group":"font","description":"Web Open Font Format v2","extensions":["woff2"],"is_text":false},"xar":{"mime_type":"application/x-xar","group":"archive","description":"XAR archive compressed data","extensions":["pkg","xar"],"is_text":false},"xcf":{"mime_type":"image/x-xcf","group":"image","description":"Gimp image","extensions":["xcf"],"is_text":false},"xls":{"mime_type":"application/vnd.ms-excel","group":"document","description":"Microsoft Excel CDF document","extensions":["xls"],"is_text":false},"xlsb":{"mime_type":"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet","group":"document","description":"Microsoft Excel 2007+ document (binary format)","extensions":["xlsb"],"is_text":false},"xlsx":{"mime_type":"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet","group":"document","description":"Microsoft Excel 2007+ document","extensions":["xlsx","xlsm"],"is_text":false},"xml":{"mime_type":"text/xml","group":"code","description":"XML document","extensions":["xml"],"is_text":true},"xpi":{"mime_type":"application/zip","group":"archive","description":"Compressed installation archive (XPI)","extensions":["xpi"],"is_text":false},"xsd":{"mime_type":null,"group":null,"description":null,"extensions":["xsd"],"is_text":false},"xz":{"mime_type":"application/x-xz","group":"archive","description":"XZ compressed data","extensions":["xz"],"is_text":false},"yaml":{"mime_type":"application/x-yaml","group":"code","description":"YAML source","extensions":["yml","yaml"],"is_text":true},"yara":{"mime_type":"text/x-yara","group":"code","description":"YARA rule","extensions":["yar","yara"],"is_text":true},"zig":{"mime_type":"text/zig","group":"code","description":"Zig source","extensions":["zig"],"is_text":true},"zip":{"mime_type":"application/zip","group":"archive","description":"Zip archive data","extensions":["zip"],"is_text":false},"zlibstream":{"mime_type":"application/zlib","group":"application","description":"zlib compressed data","extensions":[],"is_text":false},"zst":{"mime_type":"application/zstd","group":"archive","description":"Zstandard","extensions":["zst"],"is_text":false}}
================================================
FILE: assets/models/CHANGELOG.md
================================================
# Changelog
Here we document the main changes of the various models.
Indicated inference speed calculated by averaging 100 inferences (within one invocation) on an AMD Ryzen 9 7950X 16-Core Processor CPU.
## `standard_v3_3` - 2025-04-11
- [216 possible tool's outputs](./standard_v3_3/README.md), ~99% average accuracy, ~2ms inference speed.
- Better dataset balance between javascript vs. typescript (leading to an increased accuracy for typescript, 85% => 95%).
- New synthetic datasets with utf8-encoded, non-ascii characters for simple text and JSON.
- More thresholds tuning.
## `standard_v3_2` - 2025-03-17
- [216 possible tool's outputs](./standard_v3_2/README.md), ~99% average accuracy, ~2ms inference speed.
- Difference with respect `standard_v3_1`: trained on a new (synthetic) dataset of CSV files to address a regression with CSV files (https://github.com/google/magika/issues/983); model selection now uses minimal test loss instead of other heuristics.
## `standard_v3_1`
- [216 possible tool's outputs](./standard_v3_1/README.md).
- Overall same average accuracy of `standard_v3_0`, ~99%, but more robust detections of short textual input and improved detection of Javascript.
- Inference speed: ~2ms (similar to `standard_v3_0`).
- Augmentation techniques used during training: CutMix, which was used for `v1` but not for `v2_1`; and "Random Snippet Selection", with which we train the model with random snippets extracted from samples in our dataset (this is only enabled for key textual content types).
- Tweaked balance among content types in training dataset.
## `standard_v3_0`
- [216 possible tool's outputs](./standard_v3_0/README.md).
- Overall same average accuracy of `standard_v2_1`, ~99%.
- Inference speed: ~2ms (~3x faster than `standard_v2_1`, ~20% faster than `standard_v1`).
## `standard_v2_1`
- [Support for 200+ content types](./standard_v2_1/README.md), almost double what supported in `standard_v1`.
- Overall average accuracy of ~99%.
- Inference speed: ~6.2ms, which is slower than `standard_v1`; See `fast_v2_1` in case you need something faster (at the price of less accuracy).
## `fast_v2_1`
- Similar to `standard_v2_1`, but significantly faster (about 4x faster).
- Overall average accuracy of ~98.5%.
## `standard_v1`
- Initial release.
- Support for about 100 content types.
- Average accuracy 99%+.
- Inference speed: ~2.6ms.
================================================
FILE: assets/models/begonly_v2_1/config.min.json
================================================
{"beg_size": 2048, "mid_size": 0, "end_size": 0, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "pytorch", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {"latex": 0.95, "pascal": 0.95}, "overwrite_map": {}}
================================================
FILE: assets/models/begonly_v2_1/metadata.json
================================================
{"model_name_hash": "e66844a04ae7a03bd9f228d9b778ec8429d361d0dca09b951b327ffad5beb07a", "namespace_hash": "ce3c9130af6416f40d71c5934f927acbd174f904a550fca2185aa3cd3528ca35"}
================================================
FILE: assets/models/fast_v2_1/config.min.json
================================================
{"beg_size": 512, "mid_size": 0, "end_size": 512, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "pytorch", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {"latex": 0.95, "pascal": 0.95}, "overwrite_map": {}}
================================================
FILE: assets/models/fast_v2_1/metadata.json
================================================
{"model_name_hash": "83b9d2bd0c450deffc70624554c99fa63e1830db852cdce860b7e215fa176f9f", "namespace_hash": "ce3c9130af6416f40d71c5934f927acbd174f904a550fca2185aa3cd3528ca35"}
================================================
FILE: assets/models/standard_v1/README.md
================================================
# Model documentation
The list of supported content types is indicated by the `target_labels_space` list in the model config, which you can find [here](https://github.com/google/magika/blob/7f947319f1ebe09626368a3f989a0863fcd7c52a/assets/models/standard_v1/model_config.json#L440-L554).
Note: This model will be deprecated soon, in favor of more recent models (e.g., `standard_v3_x`).
================================================
FILE: assets/models/standard_v1/content_types_config.json
================================================
{
"3gp": {
"name": "3gp",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"ace": {
"name": "ace",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"aff": {
"name": "aff",
"extensions": [
"aff"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"ai": {
"name": "ai",
"extensions": [
"ai"
],
"mime_type": "application/pdf",
"group": "document",
"magic": "PDF document",
"description": "Adobe Illustrator Artwork",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "ai",
"target_label": "ai",
"correct_labels": [
"ai",
"pdf"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"algol68": {
"name": "algol68",
"extensions": [
"a68"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"apk": {
"name": "apk",
"extensions": [
"apk"
],
"mime_type": "application/vnd.android.package-archive",
"group": "executable",
"magic": "Java archive data",
"description": "Android package",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"zip_archive",
"archive"
],
"model_target_label": "zip",
"target_label": "apk",
"correct_labels": [
"apk"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"appleplist": {
"name": "appleplist",
"extensions": [
"plist"
],
"mime_type": "application/x-plist",
"group": "application",
"magic": "Apple binary property list",
"description": "Android property list",
"vt_type": "appleplist",
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"text"
],
"model_target_label": "appleplist",
"target_label": "appleplist",
"correct_labels": [
"appleplist"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"arj": {
"name": "arj",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"asm": {
"name": "asm",
"extensions": [
"S",
"asm"
],
"mime_type": "text/x-asm",
"group": "code",
"magic": "assembler source",
"description": "Assembly",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "asm",
"target_label": "asm",
"correct_labels": [
"asm"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"asp": {
"name": "asp",
"extensions": [
"aspx",
"asp"
],
"mime_type": "text/html",
"group": "code",
"magic": "HTML document",
"description": "ASP source",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "asp",
"target_label": "asp",
"correct_labels": [
"asp"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"avi": {
"name": "avi",
"extensions": [
"avi"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"ax": {
"name": "ax",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"batch": {
"name": "batch",
"extensions": [
"bat"
],
"mime_type": "text/x-msdos-batch",
"group": "code",
"magic": "DOS batch file",
"description": "DOS batch file",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "batch",
"target_label": "batch",
"correct_labels": [
"batch"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"bcad": {
"name": "bcad",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"bib": {
"name": "bib",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"bmp": {
"name": "bmp",
"extensions": [
"bmp"
],
"mime_type": "image/bmp",
"group": "image",
"magic": "PC bitmap",
"description": "BMP image data",
"vt_type": "bmp",
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"media"
],
"model_target_label": "bmp",
"target_label": "bmp",
"correct_labels": [
"bmp"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"bpl": {
"name": "bpl",
"extensions": [
"bpl"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"brainfuck": {
"name": "brainfuck",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"bzip": {
"name": "bzip",
"extensions": [
"bz2",
"tbz2",
"tar.bz2"
],
"mime_type": "application/x-bzip2",
"group": "archive",
"magic": "bzip2 compressed data",
"description": "bzip2 compressed data",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"archive"
],
"model_target_label": "bzip",
"target_label": "bzip",
"correct_labels": [
"bzip"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"c": {
"name": "c",
"extensions": [
"c",
"cpp",
"h",
"hpp",
"cc"
],
"mime_type": "text/x-c",
"group": "code",
"magic": "C source",
"description": "C source",
"vt_type": "c,cpp",
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "c",
"target_label": "c",
"correct_labels": [
"c",
"cpp"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"cab": {
"name": "cab",
"extensions": [
"cab"
],
"mime_type": "application/vnd.ms-cab-compressed",
"group": "archive",
"magic": "Microsoft Cabinet archive data",
"description": "Microsoft Cabinet archive data",
"vt_type": "cab",
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "cab",
"target_label": "cab",
"correct_labels": [
"cab"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"cad": {
"name": "cad",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"cat": {
"name": "cat",
"extensions": [
"cat"
],
"mime_type": "application/octet-stream",
"group": "application",
"magic": "data",
"description": "Windows Catalog file",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "cat",
"target_label": "cat",
"correct_labels": [
"cat",
"ctl"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"cdf": {
"name": "cdf",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"chm": {
"name": "chm",
"extensions": [
"chm"
],
"mime_type": "application/chm",
"group": "application",
"magic": "MS Windows HtmlHelp Data",
"description": "MS Windows HtmlHelp Data",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "chm",
"target_label": "chm",
"correct_labels": [
"chm"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"clojure": {
"name": "clojure",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"cmake": {
"name": "cmake",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"cobol": {
"name": "cobol",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"coff": {
"name": "coff",
"extensions": [],
"mime_type": "application/x-coff",
"group": "executable",
"magic": "Intel 80386 COFF",
"description": "Intel 80386 COFF",
"vt_type": "coff",
"datasets": [
"vt-type"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "coff",
"target_label": "coff",
"correct_labels": [
"coff",
"exp"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"coffee": {
"name": "coffee",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"com": {
"name": "com",
"extensions": [],
"mime_type": "application/x-dosexec",
"group": null,
"magic": null,
"description": null,
"vt_type": "com",
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"cpl": {
"name": "cpl",
"extensions": [
"cpl"
],
"mime_type": "application/x-dosexec",
"group": "executable",
"magic": "PE32 executable",
"description": "PE Windows executable",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"pebin"
],
"model_target_label": "pebin",
"target_label": "pebin",
"correct_labels": [
"pebin",
"cpl"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"cpp": {
"name": "cpp",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"crx": {
"name": "crx",
"extensions": [
"crx"
],
"mime_type": "application/x-chrome-extension",
"group": "executable",
"magic": "Google Chrome extension",
"description": "Google Chrome extension",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"zip_archive",
"archive"
],
"model_target_label": "crx",
"target_label": "crx",
"correct_labels": [
"crx"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"cs": {
"name": "cs",
"extensions": [
"cs"
],
"mime_type": "text/plain",
"group": "code",
"magic": "ASCII text",
"description": "C# source",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "cs",
"target_label": "cs",
"correct_labels": [
"cs"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"css": {
"name": "css",
"extensions": [
"css"
],
"mime_type": "text/css",
"group": "code",
"magic": "ASCII text",
"description": "CSS source",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "css",
"target_label": "css",
"correct_labels": [
"css"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"csv": {
"name": "csv",
"extensions": [
"csv"
],
"mime_type": "text/csv",
"group": "code",
"magic": "CSV text",
"description": "CSV document",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "csv",
"target_label": "csv",
"correct_labels": [
"csv"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"ctl": {
"name": "ctl",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"dart": {
"name": "dart",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"deb": {
"name": "deb",
"extensions": [
"deb"
],
"mime_type": "application/vnd.debian.binary-package",
"group": "archive",
"magic": "Debian binary package",
"description": "Debian binary package",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"archive"
],
"model_target_label": "deb",
"target_label": "deb",
"correct_labels": [
"deb"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"dex": {
"name": "dex",
"extensions": [
"dex"
],
"mime_type": "application/x-android-dex",
"group": "executable",
"magic": "Dalvik dex file",
"description": "Dalvik dex file",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "dex",
"target_label": "dex",
"correct_labels": [
"dex"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"dey": {
"name": "dey",
"extensions": [],
"mime_type": "application/x-android-dey",
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"diff": {
"name": "diff",
"extensions": [
"diff"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"directory": {
"name": "directory",
"extensions": [],
"mime_type": "inode/directory",
"group": "inode",
"magic": "directory",
"description": "A directory",
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": "directory",
"correct_labels": [
"directory"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": false
},
"dll": {
"name": "dll",
"extensions": [
"dll"
],
"mime_type": "application/x-dosexec",
"group": "executable",
"magic": "PE Windows executable",
"description": "PE Windows executable",
"vt_type": "pedll",
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"pebin"
],
"model_target_label": "pebin",
"target_label": "pebin",
"correct_labels": [
"pebin",
"dll"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"dm": {
"name": "dm",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"dmg": {
"name": "dmg",
"extensions": [
"dmg"
],
"mime_type": "application/x-apple-diskimage",
"group": "archive",
"magic": "Apple disk image",
"description": "Apple disk image",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "dmg",
"target_label": "dmg",
"correct_labels": [
"dmg"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"doc": {
"name": "doc",
"extensions": [
"doc"
],
"mime_type": "application/msword",
"group": "document",
"magic": "Composite Document File",
"description": "Microsoft Word CDF document",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"cdf"
],
"model_target_label": "cdf",
"target_label": "doc",
"correct_labels": [
"doc"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"dockerfile": {
"name": "dockerfile",
"extensions": [
"=Dockerfile"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"docx": {
"name": "docx",
"extensions": [
"docx",
"docm"
],
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"group": "document",
"magic": "Microsoft Word 2007+",
"description": "Microsoft Word 2007+ document",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"ooxml",
"zip_archive",
"archive"
],
"model_target_label": "zip",
"target_label": "docx",
"correct_labels": [
"docx",
"tmdx"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"dosmbr": {
"name": "dosmbr",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"dylib": {
"name": "dylib",
"extensions": [
"dylib"
],
"mime_type": "application/x-mach-o",
"group": "executable",
"magic": "Mach-O executable",
"description": "Mach-O executable",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"macho"
],
"model_target_label": "macho",
"target_label": "macho",
"correct_labels": [
"macho",
"dylib"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"elf": {
"name": "elf",
"extensions": [
"elf",
"so"
],
"mime_type": "application/x-executable-elf",
"group": "executable",
"magic": "ELF executable",
"description": "ELF executable",
"vt_type": "elf",
"datasets": [
"vt-type"
],
"parent": null,
"tags": [
"binary",
"elf"
],
"model_target_label": "elf",
"target_label": "elf",
"correct_labels": [
"elf",
"so"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"elixir": {
"name": "elixir",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"emf": {
"name": "emf",
"extensions": [
"emf"
],
"mime_type": "application/octet-stream",
"group": "application",
"magic": "Windows Enhanced Metafile",
"description": "Windows Enhanced Metafile image data",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "emf",
"target_label": "emf",
"correct_labels": [
"emf"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"eml": {
"name": "eml",
"extensions": [
"eml"
],
"mime_type": "message/rfc822",
"group": "text",
"magic": "RFC 822 mail",
"description": "RFC 822 mail",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"text"
],
"model_target_label": "eml",
"target_label": "eml",
"correct_labels": [
"eml"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"empty": {
"name": "empty",
"extensions": [],
"mime_type": "inode/x-empty",
"group": "inode",
"magic": "empty",
"description": "Empty file",
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": "empty",
"correct_labels": [
"empty"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": false
},
"epub": {
"name": "epub",
"extensions": [
"epub"
],
"mime_type": "application/epub+zip",
"group": "document",
"magic": "EPUB document",
"description": "EPUB document",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"zip_archive",
"archive"
],
"model_target_label": "zip",
"target_label": "epub",
"correct_labels": [
"epub"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"erlang": {
"name": "erlang",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"ese": {
"name": "ese",
"extensions": [],
"mime_type": "application/x-ms-ese",
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"exe": {
"name": "exe",
"extensions": [
"exe"
],
"mime_type": "application/x-dosexec",
"group": "executable",
"magic": "ELF executable",
"description": "ELF executable",
"vt_type": "peexe",
"datasets": [
"vt-ext",
"vt-ext-malicious"
],
"parent": null,
"tags": [
"binary",
"pebin"
],
"model_target_label": "pebin",
"target_label": "pebin",
"correct_labels": [
"pebin",
"exe"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"exp": {
"name": "exp",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"flac": {
"name": "flac",
"extensions": [
"flac"
],
"mime_type": "audio/flac",
"group": "audio",
"magic": "FLAC audio bitstream data",
"description": "FLAC audio bitstream data",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "flac",
"target_label": "flac",
"correct_labels": [
"flac"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"fortran": {
"name": "fortran",
"extensions": [
"f90",
"f95",
"f03"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"fpx": {
"name": "fpx",
"extensions": [
"fpx"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": "fpx",
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"gif": {
"name": "gif",
"extensions": [
"gif"
],
"mime_type": "image/gif",
"group": "image",
"magic": "GIF image data",
"description": "GIF image data",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"media"
],
"model_target_label": "gif",
"target_label": "gif",
"correct_labels": [
"gif"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"go": {
"name": "go",
"extensions": [
"go"
],
"mime_type": "text/x-golang",
"group": "code",
"magic": "ASCII text",
"description": "Golang source",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "go",
"target_label": "go",
"correct_labels": [
"go"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"gpx": {
"name": "gpx",
"extensions": [
"gpx"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"groovy": {
"name": "groovy",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"gzip": {
"name": "gzip",
"extensions": [
"gz",
"gzip",
"tgz",
"tar.gz"
],
"mime_type": "application/gzip",
"group": "archive",
"magic": "gzip compressed data",
"description": "gzip compressed data",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"archive"
],
"model_target_label": "gzip",
"target_label": "gzip",
"correct_labels": [
"gzip"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"haskell": {
"name": "haskell",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"hfs": {
"name": "hfs",
"extensions": [
"hfs"
],
"mime_type": "application/x-hfs",
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"hlp": {
"name": "hlp",
"extensions": [
"hlp"
],
"mime_type": "application/winhlp",
"group": "application",
"magic": "MS Windows help",
"description": "MS Windows help",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "hlp",
"target_label": "hlp",
"correct_labels": [
"hlp"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"hta": {
"name": "hta",
"extensions": [
"hta"
],
"mime_type": "application/hta",
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"html": {
"name": "html",
"extensions": [
"html",
"htm",
"xhtml",
"xht"
],
"mime_type": "text/html",
"group": "code",
"magic": "HTML document",
"description": "HTML document",
"vt_type": "html",
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "html",
"target_label": "html",
"correct_labels": [
"html"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"hve": {
"name": "hve",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"ico": {
"name": "ico",
"extensions": [
"ico"
],
"mime_type": "image/vnd.microsoft.icon",
"group": "image",
"magic": "MS Windows icon resource",
"description": "MS Windows icon resource",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "ico",
"target_label": "ico",
"correct_labels": [
"ico"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"img": {
"name": "img",
"extensions": [
"img"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"ini": {
"name": "ini",
"extensions": [
"ini"
],
"mime_type": "text/plain",
"group": "text",
"magic": "Generic INItialization configuration",
"description": "INI configuration file",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "ini",
"target_label": "ini",
"correct_labels": [
"ini"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"internetshortcut": {
"name": "internetshortcut",
"extensions": [
"url"
],
"mime_type": "application/x-mswinurl",
"group": "application",
"magic": "MS Windows 95 Internet shortcut",
"description": "MS Windows Internet shortcut",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"text"
],
"model_target_label": "internetshortcut",
"target_label": "internetshortcut",
"correct_labels": [
"internetshortcut"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"iosapp": {
"name": "iosapp",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"iso": {
"name": "iso",
"extensions": [
"iso"
],
"mime_type": "application/x-iso9660-image",
"group": "archive",
"magic": "ISO 9660 CD-ROM filesystem data",
"description": "ISO 9660 CD-ROM filesystem data",
"vt_type": "isoimage",
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "iso",
"target_label": "iso",
"correct_labels": [
"iso",
"udf"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"jar": {
"name": "jar",
"extensions": [
"jar"
],
"mime_type": "application/java-archive",
"group": "archive",
"magic": "Java archive data (JAR)",
"description": "Java archive data (JAR)",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"zip_archive",
"archive"
],
"model_target_label": "jar",
"target_label": "jar",
"correct_labels": [
"jar"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"java": {
"name": "java",
"extensions": [
"java"
],
"mime_type": "text/x-java",
"group": "code",
"magic": "Java source",
"description": "Java source",
"vt_type": "java",
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "java",
"target_label": "java",
"correct_labels": [
"java"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"javabytecode": {
"name": "javabytecode",
"extensions": [
"class"
],
"mime_type": "application/x-java-applet",
"group": "executable",
"magic": "compiled Java class data",
"description": "Java compiled bytecode",
"vt_type": "class",
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "javabytecode",
"target_label": "javabytecode",
"correct_labels": [
"javabytecode"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"javascript": {
"name": "javascript",
"extensions": [
"js"
],
"mime_type": "application/javascript",
"group": "code",
"magic": "JavaScript source",
"description": "JavaScript source",
"vt_type": "javascript",
"datasets": [
"github",
"vt-ext",
"vt-ext-malicious"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "javascript",
"target_label": "javascript",
"correct_labels": [
"javascript",
"typescript"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"jpeg": {
"name": "jpeg",
"extensions": [
"jpg",
"jpeg"
],
"mime_type": "image/jpeg",
"group": "image",
"magic": "JPEG image data",
"description": "JPEG image data",
"vt_type": "jpeg",
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"media"
],
"model_target_label": "jpeg",
"target_label": "jpeg",
"correct_labels": [
"jpeg"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"json": {
"name": "json",
"extensions": [
"json"
],
"mime_type": "application/json",
"group": "code",
"magic": "JSON data",
"description": "JSON document",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "json",
"target_label": "json",
"correct_labels": [
"json"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"julia": {
"name": "julia",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"ko": {
"name": "ko",
"extensions": [
"ko"
],
"mime_type": "application/x-executable-elf",
"group": "executable",
"magic": "ELF executable",
"description": "ELF executable",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary",
"elf"
],
"model_target_label": "elf",
"target_label": "elf",
"correct_labels": [
"elf",
"ko"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"kotlin": {
"name": "kotlin",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"latex": {
"name": "latex",
"extensions": [
"tex"
],
"mime_type": "text/x-tex",
"group": "text",
"magic": "LaTeX document",
"description": "LaTeX document",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "latex",
"target_label": "latex",
"correct_labels": [
"latex"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"lisp": {
"name": "lisp",
"extensions": [
"lisp"
],
"mime_type": "text/x-lisp",
"group": "code",
"magic": "Lisp/Scheme program",
"description": "Lisp source",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "lisp",
"target_label": "lisp",
"correct_labels": [
"lisp"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"lnk": {
"name": "lnk",
"extensions": [
"lnk"
],
"mime_type": "application/x-ms-shortcut",
"group": "application",
"magic": "MS Windows shortcut",
"description": "MS Windows shortcut",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "lnk",
"target_label": "lnk",
"correct_labels": [
"lnk"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"lua": {
"name": "lua",
"extensions": [],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"m3u": {
"name": "m3u",
"extensions": [
"m3u8",
"m3u"
],
"mime_type": "text/plain",
"group": "application",
"magic": "M3U playlist",
"description": "M3U playlist",
"vt_type": null,
"datasets": [
"vt-ext"
],
"parent": null,
"tags": [
"binary"
],
"model_target_label": "m3u",
"target_label": "m3u",
"correct_labels": [
"m3u"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"macho": {
"name": "macho",
"extensions": [],
"mime_type": "application/x-mach-o",
"group": "executable",
"magic": "Mach-O executable",
"description": "Mach-O executable",
"vt_type": "macho",
"datasets": [
"vt-type"
],
"parent": null,
"tags": [
"binary",
"macho"
],
"model_target_label": "macho",
"target_label": "macho",
"correct_labels": [
"macho",
"dylib"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"maff": {
"name": "maff",
"extensions": [
"maff"
],
"mime_type": null,
"group": null,
"magic": null,
"description": null,
"vt_type": null,
"datasets": [],
"parent": null,
"tags": [],
"model_target_label": null,
"target_label": null,
"correct_labels": [],
"in_scope_for_output_content_type": false,
"in_scope_for_training": false
},
"makefile": {
"name": "makefile",
"extensions": [
"=Makefile"
],
"mime_type": "text/x-makefile",
"group": "code",
"magic": "makefile script",
"description": "Makefile source",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
"text",
"dl_target"
],
"model_target_label": "makefile",
"target_label": "makefile",
"correct_labels": [
"makefile"
],
"in_scope_for_output_content_type": true,
"in_scope_for_training": true
},
"markdown": {
"name": "markdown",
"extensions": [
"md"
],
"mime_type": "text/markdown",
"group": "text",
"magic": "ASCII text",
"description": "Markdown document",
"vt_type": null,
"datasets": [
"github"
],
"parent": null,
"tags": [
gitextract_9puqy48w/
├── .cargo/
│ └── config.toml
├── .dockerignore
├── .gemini/
│ └── config.yaml
├── .gitattributes
├── .github/
│ ├── CODEOWNERS
│ ├── ISSUE_TEMPLATE/
│ │ ├── misdetection.md
│ │ └── new_content_type_request.md
│ ├── dependabot.yml
│ ├── labeler.yml
│ ├── scorecard.yml
│ └── workflows/
│ ├── cli-latest.yml
│ ├── cli-release.yml
│ ├── codeql.yml
│ ├── docs-check.yml
│ ├── github-issue-labeler.yml
│ ├── github-pages.yml
│ ├── go-test.yml
│ ├── js-check-import-scenarios.yml
│ ├── js-docs-builder.yml
│ ├── js-publish.yml
│ ├── js-test.yml
│ ├── python-build-and-release-package.yml
│ ├── python-test-published-package.yml
│ ├── python-test-published-rc-package.yml
│ ├── python-test-suite.yml
│ ├── rust-test.yml
│ ├── scorecard.yml
│ └── website-test.yml
├── .gitignore
├── CITATION.cff
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README.md
├── assets/
│ ├── content_types_kb.min.json
│ └── models/
│ ├── CHANGELOG.md
│ ├── begonly_v2_1/
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ ├── model.keras
│ │ └── model.onnx
│ ├── fast_v2_1/
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ ├── model.keras
│ │ └── model.onnx
│ ├── standard_v1/
│ │ ├── README.md
│ │ ├── content_types_config.json
│ │ ├── magika_config.json
│ │ ├── model.h5
│ │ ├── model_config.json
│ │ └── thresholds.json
│ ├── standard_v2_0/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ ├── model.keras
│ │ └── model.onnx
│ ├── standard_v2_1/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ ├── model.keras
│ │ └── model.onnx
│ ├── standard_v3_0/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.onnx
│ ├── standard_v3_1/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.onnx
│ ├── standard_v3_2/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.onnx
│ └── standard_v3_3/
│ ├── README.md
│ ├── config.min.json
│ ├── metadata.json
│ └── model.onnx
├── dist-workspace.toml
├── docs/
│ ├── concepts.md
│ └── js.md
├── go/
│ ├── README.md
│ ├── cli/
│ │ ├── cli.go
│ │ ├── cli_test.go
│ │ ├── main.go
│ │ └── tests_data/
│ │ └── magika_test_pptx.txt
│ ├── docker/
│ │ └── Dockerfile
│ ├── example/
│ │ └── main.go
│ ├── go.mod
│ ├── go.sum
│ ├── magika/
│ │ ├── config.go
│ │ ├── content.go
│ │ ├── features.go
│ │ ├── features_test.go
│ │ ├── scanner.go
│ │ └── scanner_test.go
│ └── onnx/
│ ├── onnx.go
│ ├── onnx_runtime.go
│ ├── onnx_runtime.h
│ ├── onnx_runtime_test.go
│ └── onnx_zero.go
├── js/
│ ├── .gitignore
│ ├── CHANGELOG.md
│ ├── README.md
│ ├── magika-cli.ts
│ ├── magika-node.ts
│ ├── magika.ts
│ ├── package.json
│ ├── postBuild.js
│ ├── simple_examples/
│ │ ├── browser-esmodule-example/
│ │ │ ├── index.html
│ │ │ ├── index.js
│ │ │ ├── package.json
│ │ │ ├── playwright.config.ts
│ │ │ └── test/
│ │ │ └── simple.spec.ts
│ │ ├── node-commonjs-example/
│ │ │ ├── index.js
│ │ │ └── package.json
│ │ ├── node-esmodule-example/
│ │ │ ├── index.js
│ │ │ └── package.json
│ │ ├── run_examples.sh
│ │ └── typescript-esmodule-example/
│ │ ├── index.ts
│ │ └── package.json
│ ├── src/
│ │ ├── .npmignore
│ │ ├── content-type-info.ts
│ │ ├── content-type-label.ts
│ │ ├── content-types-infos.ts
│ │ ├── magika-options.ts
│ │ ├── magika-prediction.ts
│ │ ├── magika-result.ts
│ │ ├── model-config-node.ts
│ │ ├── model-config.ts
│ │ ├── model-features.ts
│ │ ├── model-node.ts
│ │ ├── model-prediction.ts
│ │ ├── model.ts
│ │ ├── overwrite-reason.ts
│ │ ├── prediction-mode.ts
│ │ └── status.ts
│ ├── test/
│ │ ├── features-extraction-vs-reference.test.ts
│ │ ├── inference-vs-reference.test.ts
│ │ ├── magika-cli.test.ts
│ │ ├── magika.test.ts
│ │ ├── tfnHook.ts
│ │ └── utils.ts
│ ├── tsconfig.cjs.json
│ ├── tsconfig.esm.json
│ └── tsconfig.json
├── python/
│ ├── .gitignore
│ ├── .python-version
│ ├── CHANGELOG.md
│ ├── README.md
│ ├── mypy.ini
│ ├── pyproject.toml
│ ├── pytest.ini
│ ├── scripts/
│ │ ├── check_changelog.sh
│ │ ├── check_copyright.py
│ │ ├── check_documentation.py
│ │ ├── check_source.sh
│ │ ├── generate_reference.py
│ │ ├── pre_release_check.py
│ │ ├── prepare_pyproject_for_pure_python_wheel.py
│ │ ├── run_quick_test_magika_cli.py
│ │ ├── run_quick_test_magika_module.py
│ │ ├── sync.py
│ │ └── test_magika_model.py
│ ├── src/
│ │ └── magika/
│ │ ├── __init__.py
│ │ ├── cli/
│ │ │ ├── magika_client.py
│ │ │ └── magika_rust_client_not_found_warning.py
│ │ ├── colors.py
│ │ ├── config/
│ │ │ └── content_types_kb.min.json
│ │ ├── logger.py
│ │ ├── magika.py
│ │ ├── models/
│ │ │ └── standard_v3_3/
│ │ │ ├── README.md
│ │ │ ├── config.min.json
│ │ │ ├── metadata.json
│ │ │ └── model.onnx
│ │ ├── py.typed
│ │ └── types/
│ │ ├── __init__.py
│ │ ├── content_type_info.py
│ │ ├── content_type_label.py
│ │ ├── magika_error.py
│ │ ├── magika_prediction.py
│ │ ├── magika_result.py
│ │ ├── model.py
│ │ ├── overwrite_reason.py
│ │ ├── prediction_mode.py
│ │ ├── seekable.py
│ │ ├── status.py
│ │ └── strenum.py
│ └── tests/
│ ├── __init__.py
│ ├── test_features_extraction_vs_reference.py
│ ├── test_inference_vs_reference.py
│ ├── test_magika_python_module.py
│ ├── test_python_magika_client.py
│ └── utils.py
├── rust/
│ ├── .gitignore
│ ├── README.md
│ ├── changelog.sh
│ ├── cli/
│ │ ├── CHANGELOG.md
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── output
│ │ ├── publish.sh
│ │ ├── src/
│ │ │ └── main.rs
│ │ └── test.sh
│ ├── color.sh
│ ├── gen/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── content_types
│ │ ├── src/
│ │ │ └── main.rs
│ │ └── test.sh
│ ├── latest.sh
│ ├── lib/
│ │ ├── CHANGELOG.md
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── src/
│ │ │ ├── builder.rs
│ │ │ ├── config.rs
│ │ │ ├── content.rs
│ │ │ ├── error.rs
│ │ │ ├── file.rs
│ │ │ ├── future.rs
│ │ │ ├── input.rs
│ │ │ ├── lib.rs
│ │ │ ├── model.rs
│ │ │ └── session.rs
│ │ └── test.sh
│ ├── onnx/
│ │ ├── build.sh
│ │ └── maturin.sh
│ ├── publish.sh
│ ├── rustfmt.toml
│ ├── sync.sh
│ ├── taplo.toml
│ └── test.sh
├── tests_data/
│ ├── README.md
│ ├── basic/
│ │ ├── asm/
│ │ │ └── code.asm
│ │ ├── batch/
│ │ │ └── simple.bat
│ │ ├── c/
│ │ │ └── code.c
│ │ ├── css/
│ │ │ └── code.css
│ │ ├── csv/
│ │ │ └── magika_test.csv
│ │ ├── dockerfile/
│ │ │ └── Dockerfile
│ │ ├── docx/
│ │ │ ├── doc.docx
│ │ │ └── magika_test.docx
│ │ ├── eml/
│ │ │ └── sample.eml
│ │ ├── empty/
│ │ │ └── empty_file
│ │ ├── epub/
│ │ │ ├── doc.epub
│ │ │ └── magika_test.epub
│ │ ├── flac/
│ │ │ └── test.flac
│ │ ├── handlebars/
│ │ │ └── example.handlebars
│ │ ├── html/
│ │ │ └── doc.html
│ │ ├── ignorefile/
│ │ │ ├── example.ignorefile
│ │ │ └── other.ignorefile
│ │ ├── ini/
│ │ │ └── doc.ini
│ │ ├── javascript/
│ │ │ └── code.js
│ │ ├── jinja/
│ │ │ └── example.j2
│ │ ├── json/
│ │ │ └── doc.json
│ │ ├── latex/
│ │ │ └── sample.tex
│ │ ├── makefile/
│ │ │ └── simple.Makefile
│ │ ├── markdown/
│ │ │ ├── README.md
│ │ │ ├── magika_test.md
│ │ │ └── simple.md
│ │ ├── mht/
│ │ │ └── sample.mht
│ │ ├── odp/
│ │ │ └── magika_test.odp
│ │ ├── ods/
│ │ │ └── magika_test.ods
│ │ ├── odt/
│ │ │ ├── doc.odt
│ │ │ └── magika_test.odt
│ │ ├── ogg/
│ │ │ └── test.ogg
│ │ ├── outlook/
│ │ │ └── sample.msg
│ │ ├── pem/
│ │ │ ├── doc.pem
│ │ │ └── doc.pub
│ │ ├── pptx/
│ │ │ └── magika_test.pptx
│ │ ├── psd/
│ │ │ └── MagikaTest.psd
│ │ ├── python/
│ │ │ └── code.py
│ │ ├── pytorch/
│ │ │ └── example.pth
│ │ ├── rtf/
│ │ │ ├── doc.rtf
│ │ │ └── magika_test.rtf
│ │ ├── ruby/
│ │ │ └── code.rb
│ │ ├── rust/
│ │ │ ├── asm.rs
│ │ │ ├── code.rs
│ │ │ ├── test_case1.rs
│ │ │ └── test_case2.rs
│ │ ├── smali/
│ │ │ └── code.smali
│ │ ├── srt/
│ │ │ └── code.srt
│ │ ├── swift/
│ │ │ └── code.swift
│ │ ├── toml/
│ │ │ └── doc.toml
│ │ ├── tsv/
│ │ │ └── magika_test.tsv
│ │ ├── twig/
│ │ │ └── example.twig
│ │ ├── txt/
│ │ │ ├── complex-sentence.txt
│ │ │ ├── few-words.txt
│ │ │ ├── lorem-big.txt
│ │ │ ├── lorem-small.txt
│ │ │ ├── magika_test_pptx.txt
│ │ │ ├── many-words.txt
│ │ │ ├── one-sentence-with-newline.txt
│ │ │ ├── one-sentence.txt
│ │ │ └── random-ascii.txt
│ │ ├── typescript/
│ │ │ └── code.ts
│ │ ├── xlsx/
│ │ │ └── magika_test.xlsx
│ │ ├── yaml/
│ │ │ ├── dependabot.yml
│ │ │ └── python-test.yml
│ │ ├── yara/
│ │ │ └── rule.yar
│ │ └── zig/
│ │ └── code.zig
│ ├── current_missdetections/
│ │ ├── html/
│ │ │ └── malformed-html-gh-521.html
│ │ └── xls/
│ │ └── password-protected-example.xls
│ ├── mitra/
│ │ ├── bzip/
│ │ │ └── bzip2.bz2
│ │ ├── cab/
│ │ │ └── cab.cab
│ │ ├── elf/
│ │ │ ├── elf.elf
│ │ │ └── elf64.elf
│ │ ├── flac/
│ │ │ ├── flac.flac
│ │ │ └── tiny.flac
│ │ ├── iso/
│ │ │ └── iso.iso
│ │ ├── ogg/
│ │ │ └── vorbis.ogg
│ │ ├── pcap/
│ │ │ └── pcap.pcap
│ │ ├── php/
│ │ │ └── php.php
│ │ ├── rtf/
│ │ │ └── rich.rtf
│ │ ├── tga/
│ │ │ └── footer.tga
│ │ ├── tiff/
│ │ │ ├── tiff-be.tif
│ │ │ └── tiff-le.tif
│ │ ├── webm/
│ │ │ └── webm.webm
│ │ ├── xar/
│ │ │ ├── hello-world.xar
│ │ │ └── mini.xar
│ │ └── xz/
│ │ └── xz.xz
│ └── mitra_candidates/
│ ├── DS_Store
│ ├── ace.ace
│ ├── dicom.dcm
│ ├── hdf5.h5
│ ├── html.htm
│ ├── jp2-stream.jp2
│ ├── jp2.jp2
│ ├── lha.lzh
│ ├── lzip.lz
│ ├── mini.bplist
│ ├── mini.plist
│ ├── mini.protobuf
│ ├── pcapng.pcapng
│ ├── photoshop.psd
│ ├── qoi.qoi
│ ├── raw.tga
│ ├── tiny.avro
│ ├── wad.wad
│ └── wasm.wasm
├── website/
│ ├── .gitignore
│ ├── README.md
│ ├── index.html
│ ├── jsconfig.json
│ ├── package.json
│ ├── public/
│ │ ├── model/
│ │ │ ├── config.json
│ │ │ └── model.json
│ │ └── models/
│ │ ├── standard_v3_2/
│ │ │ ├── config.min.json
│ │ │ ├── metadata.json
│ │ │ └── model.json
│ │ └── standard_v3_3/
│ │ ├── README.md
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.json
│ ├── src/
│ │ ├── App.vue
│ │ └── main.js
│ └── vite.config.js
└── website-ng/
├── .gcloudignore
├── .gitignore
├── README.md
├── app.yaml
├── astro.config.mjs
├── components.json
├── content.config.ts
├── jsrepo.json
├── package.json
├── public/
│ └── models/
│ ├── standard_v3_2/
│ │ ├── config.min.json
│ │ ├── metadata.json
│ │ └── model.json
│ └── standard_v3_3/
│ ├── README.md
│ ├── config.min.json
│ ├── metadata.json
│ └── model.json
├── src/
│ ├── components/
│ │ └── MagikaDemo.svelte
│ ├── content/
│ │ └── docs/
│ │ ├── additional-resources/
│ │ │ ├── changelog.md
│ │ │ ├── disclaimer.md
│ │ │ ├── faq.md
│ │ │ ├── license.md
│ │ │ ├── related-blog-posts.md
│ │ │ └── research-papers-and-citation.md
│ │ ├── cli-and-bindings/
│ │ │ ├── cli.md
│ │ │ ├── js-api.md
│ │ │ ├── js.md
│ │ │ ├── other-bindings.md
│ │ │ ├── overview.md
│ │ │ ├── python.md
│ │ │ └── rust.md
│ │ ├── contributing/
│ │ │ ├── creating-new-bindings.md
│ │ │ ├── how-to-contribute.md
│ │ │ ├── known-limitations.md
│ │ │ └── reporting-security-vulnerabilities.md
│ │ ├── core-concepts/
│ │ │ ├── how-magika-works.md
│ │ │ ├── models-and-content-types.md
│ │ │ ├── prediction-modes.md
│ │ │ └── understanding-the-output.md
│ │ ├── demo/
│ │ │ └── magika-demo.mdx
│ │ ├── getting-started/
│ │ │ ├── installation.mdx
│ │ │ └── quick-start.md
│ │ ├── index.mdx
│ │ ├── introduction/
│ │ │ └── overview.md
│ │ └── models/
│ │ └── standard_v3_3.md
│ ├── content.config.ts
│ ├── lib/
│ │ ├── components/
│ │ │ └── ui/
│ │ │ ├── button/
│ │ │ │ ├── button.svelte
│ │ │ │ └── index.ts
│ │ │ ├── card/
│ │ │ │ ├── card-action.svelte
│ │ │ │ ├── card-content.svelte
│ │ │ │ ├── card-description.svelte
│ │ │ │ ├── card-footer.svelte
│ │ │ │ ├── card-header.svelte
│ │ │ │ ├── card-title.svelte
│ │ │ │ ├── card.svelte
│ │ │ │ └── index.ts
│ │ │ ├── file-drop-zone/
│ │ │ │ ├── file-drop-zone.svelte
│ │ │ │ ├── index.ts
│ │ │ │ └── types.ts
│ │ │ ├── input/
│ │ │ │ ├── index.ts
│ │ │ │ └── input.svelte
│ │ │ ├── label/
│ │ │ │ ├── index.ts
│ │ │ │ └── label.svelte
│ │ │ ├── progress/
│ │ │ │ ├── index.ts
│ │ │ │ └── progress.svelte
│ │ │ ├── tabs/
│ │ │ │ ├── index.ts
│ │ │ │ ├── tabs-content.svelte
│ │ │ │ ├── tabs-list.svelte
│ │ │ │ ├── tabs-trigger.svelte
│ │ │ │ └── tabs.svelte
│ │ │ └── textarea/
│ │ │ ├── index.ts
│ │ │ └── textarea.svelte
│ │ ├── utils/
│ │ │ └── utils.ts
│ │ └── utils.ts
│ ├── pages/
│ │ ├── install.ps1.ts
│ │ └── install.sh.ts
│ └── styles/
│ └── global.css
├── svelte.config.js
└── tsconfig.json
SYMBOL INDEX (590 symbols across 98 files)
FILE: go/cli/cli.go
constant assetsDirEnv (line 13) | assetsDirEnv = "MAGIKA_ASSETS_DIR"
constant modelNameEnv (line 14) | modelNameEnv = "MAGIKA_MODEL"
function cli (line 20) | func cli(w io.Writer, args ...string) error {
FILE: go/cli/cli_test.go
function TestCLI (line 13) | func TestCLI(t *testing.T) {
FILE: go/cli/main.go
function main (line 21) | func main() {
FILE: go/example/main.go
constant assetsDir (line 24) | assetsDir = "/opt/magika/assets"
constant modelName (line 26) | modelName = "standard_v3_3"
function main (line 29) | func main() {
FILE: go/magika/config.go
constant configFile (line 11) | configFile = "config.min.json"
constant contentTypesKBFile (line 12) | contentTypesKBFile = "content_types_kb.min.json"
constant modelFile (line 13) | modelFile = "model.onnx"
constant modelsDir (line 14) | modelsDir = "models"
type Config (line 19) | type Config struct
function ReadConfig (line 35) | func ReadConfig(assetsDir, name string) (Config, error) {
function contentTypesKBPath (line 50) | func contentTypesKBPath(assetDir string) string {
function configPath (line 56) | func configPath(assetDir, name string) string {
function modelPath (line 61) | func modelPath(assetDir, name string) string {
FILE: go/magika/content.go
constant contentTypeLabelEmpty (line 10) | contentTypeLabelEmpty = "empty"
constant contentTypeLabelTxt (line 11) | contentTypeLabelTxt = "txt"
constant contentTypeLabelUnknown (line 12) | contentTypeLabelUnknown = "unknown"
type ContentType (line 16) | type ContentType struct
function readContentTypesKB (line 29) | func readContentTypesKB(assetsDir string) (map[string]ContentType, error) {
FILE: go/magika/features.go
type Features (line 10) | type Features struct
method Flatten (line 51) | func (f Features) Flatten() []int32 {
function ExtractFeatures (line 23) | func ExtractFeatures(cfg Config, r io.ReaderAt, size int) (Features, err...
type errReader (line 63) | type errReader struct
method readAt (line 69) | func (e *errReader) readAt(off, n int) []byte {
function buildFeatures (line 88) | func buildFeatures(cfg Config, beg, mid, end []byte) Features {
function padInt32 (line 111) | func padInt32(cfg Config, b []byte, prefix, size int) []int32 {
function safeSlice (line 128) | func safeSlice(b []byte, from, to int) []byte {
FILE: go/magika/features_test.go
function TestExtractFeatures (line 15) | func TestExtractFeatures(t *testing.T) {
function TestReferenceExtractFeatures (line 41) | func TestReferenceExtractFeatures(t *testing.T) {
function loadArtifacts (line 70) | func loadArtifacts(t *testing.T, path string) ([]byte, error) {
FILE: go/magika/scanner.go
type Scanner (line 16) | type Scanner struct
method Scan (line 51) | func (s *Scanner) Scan(r io.ReaderAt, size int) (ContentType, error) {
method scanScore (line 59) | func (s *Scanner) scanScore(r io.ReaderAt, size int) (ContentType, flo...
method contentType (line 95) | func (s *Scanner) contentType(best int, score float32) (ContentType, e...
function NewScanner (line 24) | func NewScanner(assetsDir, name string) (*Scanner, error) {
FILE: go/magika/scanner_test.go
function TestScannerBasic (line 17) | func TestScannerBasic(t *testing.T) {
function TestScannerSmall (line 49) | func TestScannerSmall(t *testing.T) {
function TestScannerReference (line 80) | func TestScannerReference(t *testing.T) {
function newTestScanner (line 151) | func newTestScanner(t *testing.T) *Scanner {
FILE: go/onnx/onnx.go
type Onnx (line 4) | type Onnx interface
FILE: go/onnx/onnx_runtime.go
function NewOnnx (line 16) | func NewOnnx(modelPath string, sizeTarget int) (Onnx, error) {
type onnxRuntime (line 29) | type onnxRuntime struct
method Run (line 36) | func (ort *onnxRuntime) Run(features []int32) ([]float32, error) {
FILE: go/onnx/onnx_runtime.h
function OrtApi (line 11) | const OrtApi *GetApiBase() {
function OrtStatus (line 15) | OrtStatus *CreateSession(const OrtApi *ort, const char *model, OrtSessio...
function OrtStatus (line 27) | OrtStatus *Run(const OrtApi *ort, OrtSession *session, OrtMemoryInfo *me...
FILE: go/onnx/onnx_runtime_test.go
function TestONNXRuntime (line 13) | func TestONNXRuntime(t *testing.T) {
FILE: go/onnx/onnx_zero.go
function NewOnnx (line 7) | func NewOnnx(string, int) (Onnx, error) {
FILE: js/magika-node.ts
class MagikaNode (line 45) | class MagikaNode extends Magika {
method constructor (line 49) | protected constructor() {
method create (line 64) | public static async create(options?: MagikaOptions): Promise<MagikaNod...
method load (line 70) | protected async load(options?: MagikaOptions): Promise<void> {
method identifyStream (line 97) | public async identifyStream(
method _identifyFromStream (line 105) | private async _identifyFromStream(
FILE: js/magika.ts
class Magika (line 44) | class Magika {
method constructor (line 55) | protected constructor() {
method create (line 70) | public static async create(options?: MagikaOptions): Promise<Magika> {
method load (line 76) | protected async load(options?: MagikaOptions): Promise<void> {
method identifyBytes (line 93) | public async identifyBytes(fileBytes: Uint8Array): Promise<MagikaResul...
method getModelName (line 98) | public getModelName(): string {
method _getResultFromFewBytes (line 102) | private _getResultFromFewBytes(
method _lstrip (line 131) | private static _lstrip(fileBytes: Uint8Array): Uint8Array {
method _rstrip (line 142) | private static _rstrip(fileBytes: Uint8Array): Uint8Array {
method _identifyFromBytes (line 153) | protected async _identifyFromBytes(
method _getOutputLabelFromModelPrediction (line 182) | private _getOutputLabelFromModelPrediction(
method _extractFeaturesFromBytes (line 222) | protected static _extractFeaturesFromBytes(
method _getContentTypeInfo (line 253) | private _getContentTypeInfo(label: ContentTypeLabel): ContentTypeInfo {
method _getResultFromLabelsAndScore (line 257) | private _getResultFromLabelsAndScore(
method _getResultFromFeatures (line 279) | private async _getResultFromFeatures(
method _getModelName (line 296) | protected _getModelName(pathOrUrl: string): string {
FILE: js/simple_examples/browser-esmodule-example/index.js
function main (line 3) | async function main() {
FILE: js/simple_examples/node-commonjs-example/index.js
function main (line 3) | async function main() {
FILE: js/simple_examples/typescript-esmodule-example/index.ts
function main (line 3) | async function main(): Promise<void> {
FILE: js/src/content-type-info.ts
type ContentTypeInfo (line 17) | interface ContentTypeInfo {
FILE: js/src/content-type-label.ts
type ContentTypeLabel (line 19) | enum ContentTypeLabel {
FILE: js/src/content-types-infos.ts
type ContentTypesInfos (line 20) | type ContentTypesInfos = Record<ContentTypeLabel, ContentTypeInfo>;
FILE: js/src/magika-options.ts
type MagikaOptions (line 15) | interface MagikaOptions {
FILE: js/src/magika-prediction.ts
type MagikaPrediction (line 19) | interface MagikaPrediction {
FILE: js/src/magika-result.ts
type MagikaResult (line 18) | interface MagikaResult {
FILE: js/src/model-config-node.ts
class ModelConfigNode (line 18) | class ModelConfigNode extends ModelConfig {
method loadFile (line 19) | async loadFile(configPath: string): Promise<void> {
FILE: js/src/model-config.ts
class ModelConfig (line 17) | class ModelConfig {
method loadUrl (line 31) | async loadUrl(configURL: string): Promise<void> {
method setConfig (line 43) | protected setConfig(config: Record<string, any>): void {
FILE: js/src/model-features.ts
class ModelFeatures (line 15) | class ModelFeatures {
method constructor (line 20) | constructor(
method withStart (line 43) | withStart(data: Uint8Array, offset: number): this {
method withEnd (line 51) | withEnd(data: Uint8Array, offset: number): this {
method toArray (line 59) | toArray(): number[] {
FILE: js/src/model-node.ts
class ModelNode (line 19) | class ModelNode extends Model {
method loadFile (line 20) | async loadFile(modelPath: string): Promise<void> {
FILE: js/src/model-prediction.ts
type ModelPrediction (line 17) | interface ModelPrediction {
FILE: js/src/model.ts
class Model (line 22) | class Model {
method constructor (line 25) | constructor(public model_config: ModelConfig) {}
method loadUrl (line 27) | async loadUrl(modelURL: string): Promise<void> {
method predict (line 33) | async predict(features: ModelFeatures): Promise<ModelPrediction> {
FILE: js/src/overwrite-reason.ts
type OverwriteReason (line 15) | enum OverwriteReason {
FILE: js/src/prediction-mode.ts
type PredictionMode (line 15) | enum PredictionMode {
FILE: js/src/status.ts
type Status (line 15) | enum Status {
FILE: js/test/features-extraction-vs-reference.test.ts
constant FEATURES_EXTRACTION_EXAMPLES (line 20) | const FEATURES_EXTRACTION_EXAMPLES: FeaturesExtractionExamples = [
class TestableMagika (line 24) | class TestableMagika extends Magika {
method extractFeaturesFromBytes (line 25) | public static extractFeaturesFromBytes(
type FeaturesExtractionExample (line 79) | interface FeaturesExtractionExample {
type FeaturesExtractionExampleArgs (line 86) | interface FeaturesExtractionExampleArgs {
type FeaturesExtractionExampleMetadata (line 95) | interface FeaturesExtractionExampleMetadata {
type ExampleModelFeatures (line 101) | interface ExampleModelFeatures {
type FeaturesExtractionExamples (line 111) | type FeaturesExtractionExamples = FeaturesExtractionExample[];
function parseGzippedFeaturesExtractionExamples (line 113) | function parseGzippedFeaturesExtractionExamples(): FeaturesExtractionExa...
FILE: js/test/inference-vs-reference.test.ts
constant EXAMPLES_BY_PATH_INFO (line 27) | const EXAMPLES_BY_PATH_INFO: Array<[string, ExampleByPath]> = (() => {
constant EXAMPLES_BY_CONTENT (line 35) | const EXAMPLES_BY_CONTENT: ExamplesByContent = [
type Prediction (line 125) | interface Prediction {
type ExampleByPath (line 132) | interface ExampleByPath {
type ExamplesByPath (line 139) | type ExamplesByPath = ExampleByPath[];
type ExampleByContent (line 141) | interface ExampleByContent {
type ExamplesByContent (line 148) | type ExamplesByContent = ExampleByContent[];
function parseGzippedExamplesByPath (line 150) | function parseGzippedExamplesByPath(): ExamplesByPath {
function parseGzippedExamplesByContent (line 167) | function parseGzippedExamplesByContent(): ExamplesByContent {
function validatePredictionMode (line 184) | function validatePredictionMode(prediction_mode: PredictionMode): boolean {
function validatePrediction (line 188) | function validatePrediction(prediction?: Prediction): boolean {
FILE: js/test/magika-cli.test.ts
function executeCli (line 23) | async function executeCli(
FILE: js/test/magika.test.ts
constant BASIC_TEST_FILES (line 59) | const BASIC_TEST_FILES: Array<[string, string, Dirent]> = [
FILE: js/test/tfnHook.ts
class TfnMock (line 17) | class TfnMock {
method get (line 30) | get(): any {
method reset (line 41) | static reset() {
FILE: js/test/utils.ts
function parseGzippedJSON (line 18) | function parseGzippedJSON(filePath: string): Array<any> {
FILE: python/scripts/check_copyright.py
function main (line 29) | def main():
function get_relevant_files_paths (line 54) | def get_relevant_files_paths() -> list[Path]:
function has_copyright (line 93) | def has_copyright(path: Path) -> bool:
FILE: python/scripts/check_documentation.py
function main (line 49) | def main(verbose: bool) -> None:
function check_versions_are_up_to_date (line 67) | def check_versions_are_up_to_date() -> bool:
function get_python_latest_stable_version (line 124) | def get_python_latest_stable_version() -> str:
function get_python_default_model_name (line 132) | def get_python_default_model_name() -> str:
function get_javascript_default_model_name (line 139) | def get_javascript_default_model_name() -> str:
function get_demo_model_name (line 146) | def get_demo_model_name() -> str:
function get_rust_default_model_name (line 157) | def get_rust_default_model_name() -> str:
function check_markdown_links (line 163) | def check_markdown_links(verbose: bool) -> bool:
function enumerate_markdown_files_in_dir (line 202) | def enumerate_markdown_files_in_dir(rel_dir: Path) -> list[Path]:
function extract_uris_infos_from_file (line 220) | def extract_uris_infos_from_file(path: Path, verbose: bool) -> list[UriI...
function get_max_stable_version_for_crate (line 313) | def get_max_stable_version_for_crate(crate_name: str) -> str:
function get_latest_version_for_npm_package (line 320) | def get_latest_version_for_npm_package(package_name: str) -> str:
function extract_one_match_with_regex_from_file (line 327) | def extract_one_match_with_regex_from_file(path: Path, regex: str) -> str:
class UriInfo (line 347) | class UriInfo:
FILE: python/scripts/generate_reference.py
function main (line 25) | def main():
FILE: python/scripts/pre_release_check.py
function main (line 51) | def main(
function get_rust_cli_version (line 146) | def get_rust_cli_version() -> str:
function get_magika_package_version_via_pip_show (line 166) | def get_magika_package_version_via_pip_show() -> str:
function is_valid_python_version (line 184) | def is_valid_python_version(version: str) -> bool:
function test_is_valid_python_version (line 217) | def test_is_valid_python_version() -> None:
FILE: python/scripts/prepare_pyproject_for_pure_python_wheel.py
function main (line 22) | def main() -> None:
FILE: python/scripts/run_quick_test_magika_cli.py
function main (line 39) | def main(client_path: Optional[Path]) -> None:
FILE: python/scripts/run_quick_test_magika_module.py
function main (line 35) | def main(print_inference_stats: bool, repeat: int) -> None:
FILE: python/scripts/sync.py
class Target (line 71) | class Target(enum.StrEnum):
function main (line 83) | def main(target: Target, models_names_str: Optional[str]) -> None:
function update_python_content_type_kb (line 107) | def update_python_content_type_kb() -> None:
function add_model_to_python_package (line 115) | def add_model_to_python_package(model_name: str) -> None:
function update_python_content_type_label_py (line 154) | def update_python_content_type_label_py() -> None:
function update_js_content_type_files (line 218) | def update_js_content_type_files() -> None:
FILE: python/scripts/test_magika_model.py
function main (line 35) | def main(model_dir_or_name: str) -> None:
function log_ok (line 76) | def log_ok(msg: str) -> None:
function log_error (line 80) | def log_error(msg: str) -> None:
FILE: python/src/magika/cli/magika_client.py
function main (line 133) | def main(
function _should_read_from_stdin (line 339) | def _should_read_from_stdin(files_paths: List[Path]) -> bool:
function _get_magika_result_from_stdin (line 343) | def _get_magika_result_from_stdin(magika: Magika) -> MagikaResult:
FILE: python/src/magika/cli/magika_rust_client_not_found_warning.py
function main (line 31) | def main() -> None: # noqa: D103
FILE: python/src/magika/logger.py
class SimpleLogger (line 28) | class SimpleLogger:
method __init__ (line 36) | def __init__(self, use_colors: bool = False):
method setLevel (line 40) | def setLevel(self, level: int) -> None:
method raw_print_to_stdout (line 43) | def raw_print_to_stdout(self, msg: str) -> None:
method raw_print (line 46) | def raw_print(
method debug (line 58) | def debug(self, msg: str) -> None:
method info (line 65) | def info(self, msg: str) -> None:
method warning (line 69) | def warning(self, msg: str) -> None:
method error (line 76) | def error(self, msg: str) -> None:
function get_logger (line 84) | def get_logger(use_colors: bool = False) -> SimpleLogger:
FILE: python/src/magika/magika.py
class Magika (line 50) | class Magika:
method __init__ (line 57) | def __init__(
method __repr__ (line 125) | def __repr__(self) -> str:
method __str__ (line 128) | def __str__(self) -> str:
method get_module_version (line 131) | def get_module_version(self) -> str:
method get_model_name (line 135) | def get_model_name(self) -> str:
method identify_path (line 139) | def identify_path(self, path: Union[str, os.PathLike]) -> MagikaResult:
method identify_paths (line 150) | def identify_paths(
method identify_bytes (line 168) | def identify_bytes(self, content: bytes) -> MagikaResult:
method identify_stream (line 177) | def identify_stream(self, stream: BinaryIO) -> MagikaResult:
method get_output_content_types (line 213) | def get_output_content_types(self) -> List[ContentTypeLabel]:
method get_model_content_types (line 241) | def get_model_content_types(self) -> List[ContentTypeLabel]:
method _get_default_model_name (line 260) | def _get_default_model_name() -> str:
method _load_content_types_kb (line 269) | def _load_content_types_kb(
method _load_model_config (line 306) | def _load_model_config(model_config_path: Path) -> ModelConfig:
method _init_onnx_session (line 330) | def _init_onnx_session(self) -> rt.InferenceSession:
method _get_ct_info (line 344) | def _get_ct_info(self, content_type: ContentTypeLabel) -> ContentTypeI...
method _get_results_from_paths (line 347) | def _get_results_from_paths(self, paths: List[Path]) -> List[MagikaRes...
method _get_result_from_path (line 393) | def _get_result_from_path(self, path: Path) -> MagikaResult:
method _get_result_from_seekable (line 396) | def _get_result_from_seekable(self, seekable: Seekable) -> MagikaResult:
method _extract_features_from_seekable (line 404) | def _extract_features_from_seekable(
method _get_beg_ints_with_padding (line 482) | def _get_beg_ints_with_padding(
method _get_end_ints_with_padding (line 506) | def _get_end_ints_with_padding(
method _get_model_outputs_from_features (line 529) | def _get_model_outputs_from_features(
method _get_results_from_features (line 544) | def _get_results_from_features(
method _get_output_label_from_dl_label_and_score (line 578) | def _get_output_label_from_dl_label_and_score(
method _get_result_from_labels_and_score (line 636) | def _get_result_from_labels_and_score(
method _get_result_or_features_from_path (line 654) | def _get_result_or_features_from_path(
method _get_result_or_features_from_seekable (line 712) | def _get_result_or_features_from_seekable(
method _get_result_from_few_bytes (line 774) | def _get_result_from_few_bytes(
method _get_label_from_few_bytes (line 786) | def _get_label_from_few_bytes(self, content: bytes) -> ContentTypeLabel:
method _get_raw_predictions (line 794) | def _get_raw_predictions(
FILE: python/src/magika/types/content_type_info.py
class ContentTypeInfo (line 26) | class ContentTypeInfo:
method ct_label (line 47) | def ct_label(self) -> str:
method score (line 62) | def score(self) -> float:
method magic (line 75) | def magic(self) -> str:
FILE: python/src/magika/types/content_type_label.py
class ContentTypeLabel (line 24) | class ContentTypeLabel(StrEnum):
method __repr__ (line 385) | def __repr__(self) -> str:
FILE: python/src/magika/types/magika_error.py
class MagikaError (line 18) | class MagikaError(Exception):
FILE: python/src/magika/types/magika_prediction.py
class MagikaPrediction (line 26) | class MagikaPrediction:
FILE: python/src/magika/types/magika_result.py
class MagikaResult (line 26) | class MagikaResult:
method __init__ (line 29) | def __init__(
method __post_init__ (line 47) | def __post_init__(self) -> None:
method path (line 57) | def path(self) -> Path:
method ok (line 66) | def ok(self) -> bool:
method status (line 75) | def status(self) -> Status:
method prediction (line 84) | def prediction(self) -> MagikaPrediction:
method dl (line 106) | def dl(self) -> ContentTypeInfo:
method output (line 117) | def output(self) -> ContentTypeInfo:
method score (line 128) | def score(self) -> float:
method asdict (line 138) | def asdict(self) -> Dict:
method __repr__ (line 155) | def __repr__(self) -> str:
method __str__ (line 158) | def __str__(self) -> str:
FILE: python/src/magika/types/model.py
class ModelFeatures (line 25) | class ModelFeatures:
class ModelOutput (line 38) | class ModelOutput:
class ModelConfig (line 44) | class ModelConfig:
FILE: python/src/magika/types/overwrite_reason.py
class OverwriteReason (line 23) | class OverwriteReason(LowerCaseStrEnum):
FILE: python/src/magika/types/prediction_mode.py
class PredictionMode (line 25) | class PredictionMode(LowerCaseStrEnum):
method get_valid_prediction_modes (line 36) | def get_valid_prediction_modes() -> List[str]:
FILE: python/src/magika/types/seekable.py
class Seekable (line 21) | class Seekable:
method __init__ (line 22) | def __init__(self, stream: BinaryIO) -> None:
method size (line 28) | def size(self) -> int:
method read_at (line 31) | def read_at(self, offset: int, size: int) -> bytes:
FILE: python/src/magika/types/status.py
class Status (line 22) | class Status(LowerCaseStrEnum):
FILE: python/src/magika/types/strenum.py
class StrEnum (line 32) | class StrEnum(str, enum.Enum):
method __new__ (line 40) | def __new__(cls, value: Union[str, StrEnum], *args, **kwargs): # type...
method __str__ (line 47) | def __str__(self) -> str:
method _generate_next_value_ (line 50) | def _generate_next_value_(name, *_): # type: ignore[no-untyped-def,ov...
class LowerCaseStrEnum (line 54) | class LowerCaseStrEnum(StrEnum):
method _generate_next_value_ (line 55) | def _generate_next_value_(name, *_): # type: ignore[no-untyped-def,ov...
FILE: python/tests/test_features_extraction_vs_reference.py
function cli (line 43) | def cli():
function run_tests (line 49) | def run_tests(debug: bool) -> None:
function generate_tests (line 55) | def generate_tests(test_mode: bool) -> None:
function test_features_extraction_vs_reference (line 59) | def test_features_extraction_vs_reference(debug: bool = False) -> None:
function test_reference_generation (line 81) | def test_reference_generation() -> None:
function _generate_reference_features_extraction (line 85) | def _generate_reference_features_extraction(test_mode: bool) -> None:
function _dump_reference_features_extraction_examples (line 92) | def _dump_reference_features_extraction_examples(
function _generate_reference_features_extraction_tests_cases (line 114) | def _generate_reference_features_extraction_tests_cases() -> List[
function _generate_reference_features_extraction_tests_cases_inputs (line 145) | def _generate_reference_features_extraction_tests_cases_inputs() -> List[
function _generate_content_from_metadata (line 211) | def _generate_content_from_metadata(
function _get_examples_from_reference (line 238) | def _get_examples_from_reference() -> List[FeaturesExtractionExample]:
function _check_features_vs_reference_example_features (line 253) | def _check_features_vs_reference_example_features(
class FeaturesExtractionExample (line 281) | class FeaturesExtractionExample:
class FeaturesExtractionExampleArgs (line 291) | class FeaturesExtractionExampleArgs:
class FeaturesExtractionExampleMetadata (line 301) | class FeaturesExtractionExampleMetadata:
FILE: python/tests/test_inference_vs_reference.py
function cli (line 47) | def cli():
function run_tests (line 53) | def run_tests(debug: bool) -> None:
function generate_tests (line 59) | def generate_tests(test_mode: bool) -> None:
function test_inference_vs_reference (line 63) | def test_inference_vs_reference(debug: bool = False) -> None:
function test_reference_generation (line 129) | def test_reference_generation() -> None:
function _get_examples_by_path (line 135) | def _get_examples_by_path(model_name: str) -> List[ExampleByPath]:
function _get_examples_by_content (line 155) | def _get_examples_by_content(model_name: str) -> List[ExampleByContent]:
function _generate_reference_for_inference (line 175) | def _generate_reference_for_inference(test_mode: bool) -> None:
function _generate_examples_by_path (line 183) | def _generate_examples_by_path(
function _generate_examples_by_content (line 226) | def _generate_examples_by_content(
function _dump_examples_by_path (line 339) | def _dump_examples_by_path(
function _dump_examples_by_content (line 367) | def _dump_examples_by_content(
class CornerCaseInfo (line 397) | class CornerCaseInfo:
method __repr__ (line 403) | def __repr__(self) -> str:
class LabelCategory (line 413) | class LabelCategory(enum.Enum):
class ScoreRange (line 420) | class ScoreRange(enum.Enum):
class CornerCaseCollector (line 427) | class CornerCaseCollector:
method __init__ (line 428) | def __init__(self, magika: Magika):
method inspect_content (line 457) | def inspect_content(
method is_complete (line 467) | def is_complete(self) -> bool:
method get_missing_examples (line 470) | def get_missing_examples(self) -> Set[CornerCaseInfo]:
method get_missing_examples_num (line 473) | def get_missing_examples_num(self) -> int:
method _get_cornern_case_example (line 476) | def _get_cornern_case_example(
method _get_label_category (line 486) | def _get_label_category(self, dl_label: ContentTypeLabel) -> LabelCate...
method _is_generic (line 499) | def _is_generic(self, dl_label: ContentTypeLabel) -> bool:
method _is_text (line 502) | def _is_text(self, dl_label: ContentTypeLabel) -> bool:
method _has_threshold (line 505) | def _has_threshold(self, dl_label: ContentTypeLabel) -> bool:
method _get_threshold (line 508) | def _get_threshold(self, dl_label: ContentTypeLabel) -> float:
method _has_overwrite (line 511) | def _has_overwrite(self, dl_label: ContentTypeLabel) -> bool:
method _get_score_range (line 514) | def _get_score_range(self, dl_label: ContentTypeLabel, score: float) -...
method get_corner_case_candidates_generator (line 526) | def get_corner_case_candidates_generator(
function _check_result_vs_reference_example (line 593) | def _check_result_vs_reference_example(
class ExampleByPath (line 613) | class ExampleByPath:
class ExampleByContent (line 623) | class ExampleByContent:
class Prediction (line 633) | class Prediction:
FILE: python/tests/test_magika_python_module.py
function test_magika_module_check_version (line 36) | def test_magika_module_check_version() -> None:
function test_magika_module_with_one_test_file (line 50) | def test_magika_module_with_one_test_file() -> None:
function test_magika_module_with_explicit_model_dir (line 63) | def test_magika_module_with_explicit_model_dir() -> None:
function test_magika_module_with_basic_tests_by_paths (line 76) | def test_magika_module_with_basic_tests_by_paths() -> None:
function test_magika_module_with_basic_tests_by_path (line 84) | def test_magika_module_with_basic_tests_by_path() -> None:
function test_magika_module_with_basic_tests_by_bytes (line 94) | def test_magika_module_with_basic_tests_by_bytes() -> None:
function test_magika_module_with_basic_tests_by_stream (line 107) | def test_magika_module_with_basic_tests_by_stream() -> None:
function test_magika_module_with_all_models (line 120) | def test_magika_module_with_all_models() -> None:
function test_magika_module_with_previously_missdetected_samples (line 131) | def test_magika_module_with_previously_missdetected_samples() -> None:
function test_magika_module_with_empty_content (line 140) | def test_magika_module_with_empty_content() -> None:
function test_magika_module_with_short_content (line 170) | def test_magika_module_with_short_content() -> None:
function test_magika_module_with_python_and_non_python_content (line 208) | def test_magika_module_with_python_and_non_python_content() -> None:
function test_magika_module_identify_stream_does_not_alter_position (line 225) | def test_magika_module_identify_stream_does_not_alter_position() -> None:
function test_magika_module_with_whitespaces (line 245) | def test_magika_module_with_whitespaces() -> None:
function test_magika_module_with_different_prediction_modes (line 302) | def test_magika_module_with_different_prediction_modes() -> None:
function test_magika_module_overwrite_reason (line 401) | def test_magika_module_overwrite_reason() -> None:
function test_magika_module_with_directory (line 470) | def test_magika_module_with_directory() -> None:
function test_magika_module_multiple_copies_of_the_same_file (line 483) | def test_magika_module_multiple_copies_of_the_same_file() -> None:
function test_magika_module_with_symlink (line 499) | def test_magika_module_with_symlink() -> None:
function test_magika_module_with_non_existing_file (line 528) | def test_magika_module_with_non_existing_file() -> None:
function test_magika_module_with_permission_error (line 540) | def test_magika_module_with_permission_error() -> None:
function test_magika_module_with_really_many_files (line 573) | def test_magika_module_with_really_many_files() -> None:
function test_magika_module_with_big_file (line 588) | def test_magika_module_with_big_file() -> None:
function test_api_call_with_bad_types (line 612) | def test_api_call_with_bad_types() -> None:
function test_access_magika_result_and_prediction (line 645) | def test_access_magika_result_and_prediction():
function test_access_backward_compatibility_layer (line 692) | def test_access_backward_compatibility_layer() -> None:
function test_get_model_and_output_content_types (line 721) | def test_get_model_and_output_content_types() -> None:
function test_magika_imports (line 764) | def test_magika_imports():
function get_expected_content_type_label_from_test_file_path (line 795) | def get_expected_content_type_label_from_test_file_path(
function check_result_vs_expected_result (line 801) | def check_result_vs_expected_result(
function check_results_vs_expected_results (line 812) | def check_results_vs_expected_results(
FILE: python/tests/test_python_magika_client.py
function test_python_magika_client (line 19) | def test_python_magika_client() -> None:
FILE: python/tests/utils.py
function get_repo_root_dir (line 25) | def get_repo_root_dir() -> Path:
function get_tests_data_dir (line 29) | def get_tests_data_dir() -> Path:
function get_basic_tests_files_dir (line 39) | def get_basic_tests_files_dir() -> Path:
function get_mitra_tests_files_dir (line 45) | def get_mitra_tests_files_dir() -> Path:
function get_previously_missdetected_files_dir (line 51) | def get_previously_missdetected_files_dir() -> Path:
function get_basic_test_files_paths (line 57) | def get_basic_test_files_paths() -> List[Path]:
function get_mitra_test_files_paths (line 63) | def get_mitra_test_files_paths() -> List[Path]:
function get_previously_missdetected_files_paths (line 69) | def get_previously_missdetected_files_paths() -> List[Path]:
function get_reference_features_extraction_examples_path (line 75) | def get_reference_features_extraction_examples_path() -> Path:
function get_reference_for_inference_examples_by_path_path (line 79) | def get_reference_for_inference_examples_by_path_path(model_name: str) -...
function get_reference_for_inference_examples_by_content_path (line 87) | def get_reference_for_inference_examples_by_content_path(model_name: str...
function get_one_basic_test_file_path (line 95) | def get_one_basic_test_file_path() -> Path:
function get_random_ascii_bytes (line 99) | def get_random_ascii_bytes(size: int) -> bytes:
function get_random_bytes (line 108) | def get_random_bytes(size: int) -> bytes:
function get_lines_from_stream (line 112) | def get_lines_from_stream(stream: str) -> List[str]:
function write_random_file_with_size (line 123) | def write_random_file_with_size(sample_path: Path, sample_size: int) -> ...
function get_models_dir (line 138) | def get_models_dir() -> Path:
function get_default_model_dir (line 142) | def get_default_model_dir() -> Path:
function generate_whitespaces (line 148) | def generate_whitespaces(size: int) -> bytes:
function generate_pattern (line 154) | def generate_pattern(size: int, only_printable: bool) -> bytes:
function gzip_compress (line 169) | def gzip_compress(content: bytes) -> bytes:
function gzip_decompress (line 173) | def gzip_decompress(content: bytes) -> bytes:
function get_imported_objects_after_wildcard (line 177) | def get_imported_objects_after_wildcard() -> Dict[str, Any]:
FILE: rust/cli/src/main.rs
type Flags (line 37) | struct Flags {
type Version (line 64) | struct Version;
method into_resettable (line 66) | fn into_resettable(self) -> clap::builder::Resettable<clap::builder::S...
type Colors (line 75) | struct Colors {
type Modifiers (line 87) | struct Modifiers {
type Format (line 103) | struct Format {
type Experimental (line 131) | struct Experimental {
function main (line 160) | async fn main() -> Result<()> {
function extract_features (line 245) | async fn extract_features(
type ProcessPath (line 279) | enum ProcessPath {
method from (line 286) | fn from(value: FeaturesOrRuled) -> Self {
function process_path (line 294) | async fn process_path(
function build_session (line 330) | fn build_session(flags: &Flags) -> Result<Session> {
function infer_batch (line 358) | async fn infer_batch(
type Reorder (line 374) | struct Reorder {
method is_empty (line 380) | fn is_empty(&self) -> bool {
method push (line 384) | fn push(&mut self, response: Response) {
method pop (line 390) | fn pop(&mut self) -> Option<Response> {
type Batch (line 397) | struct Batch {
type Response (line 403) | struct Response {
method format (line 438) | fn format(self, flags: &Flags) -> Result<ColoredString> {
method json (line 496) | fn json(self) -> Result<serde_json::Value> {
method label (line 514) | fn label(&self) -> &str {
method description (line 521) | fn description(&self) -> Cow<'_, str> {
method group (line 528) | fn group(&self) -> &str {
method mime_type (line 535) | fn mime_type(&self) -> &str {
method extensions (line 542) | fn extensions(&self) -> &[&str] {
method score (line 549) | fn score(&self) -> f32 {
method color (line 556) | fn color(&self, result: ColoredString) -> ColoredString {
type JsonError (line 411) | enum JsonError {
method from (line 425) | fn from(value: magika::Error) -> Self {
type JsonResult (line 418) | struct JsonResult<'a> {
function join (line 578) | fn join<T: AsRef<str>>(xs: impl IntoIterator<Item = T>) -> String {
FILE: rust/gen/src/main.rs
function main (line 23) | fn main() -> Result<()> {
function generate_content_types (line 40) | fn generate_content_types(
function generate_model_config (line 112) | fn generate_model_config(content_types: &[String], model_config: ModelCo...
function create_generated_file (line 182) | fn create_generated_file(path: impl AsRef<Path>) -> Result<File> {
type ContentType (line 194) | struct ContentType {
type ModelConfig (line 204) | struct ModelConfig {
function enum_name (line 223) | fn enum_name(xs: &str) -> String {
function const_name (line 234) | fn const_name(xs: &str) -> String {
FILE: rust/lib/src/builder.rs
type Builder (line 21) | pub struct Builder {
method with_inter_threads (line 30) | pub fn with_inter_threads(mut self, num_threads: usize) -> Self {
method with_intra_threads (line 36) | pub fn with_intra_threads(mut self, num_threads: usize) -> Self {
method with_optimization_level (line 42) | pub fn with_optimization_level(mut self, opt_level: GraphOptimizationL...
method with_parallel_execution (line 48) | pub fn with_parallel_execution(mut self, parallel_execution: bool) -> ...
method build (line 54) | pub fn build(self) -> Result<Session> {
method build_ (line 58) | fn build_(self) -> ort::Result<Session> {
FILE: rust/lib/src/config.rs
type ModelConfig (line 20) | pub(crate) struct ModelConfig {
method features_size (line 36) | pub(crate) fn features_size(&self) -> usize {
method split_features (line 40) | pub(crate) fn split_features<'a>(&self, features: &'a mut [i32]) -> Sp...
type SplitFeatures (line 30) | pub(crate) struct SplitFeatures<'a> {
FILE: rust/lib/src/content.rs
constant MODEL_NAME (line 21) | pub const MODEL_NAME: &str = "standard_v3_3";
constant MODEL_MAJOR_VERSION (line 24) | pub const MODEL_MAJOR_VERSION: u32 = 3;
type ContentType (line 2000) | pub enum ContentType {
constant SIZE (line 2438) | pub(crate) const SIZE: usize = 217;
method info (line 2441) | pub fn info(self) -> &'static TypeInfo {
FILE: rust/lib/src/error.rs
type Result (line 16) | pub type Result<T> = core::result::Result<T, Error>;
type Error (line 20) | pub enum Error {
FILE: rust/lib/src/file.rs
type FileType (line 25) | pub enum FileType {
method content_type (line 67) | pub fn content_type(&self) -> Option<ContentType> {
method info (line 77) | pub fn info(&self) -> &'static TypeInfo {
method score (line 89) | pub fn score(&self) -> f32 {
method convert (line 132) | pub(crate) fn convert(tensor: ArrayViewD<f32>) -> Vec<FileType> {
type InferredType (line 41) | pub struct InferredType {
method content_type (line 101) | pub fn content_type(&self) -> ContentType {
type OverwriteReason (line 57) | pub enum OverwriteReason {
type TypeInfo (line 111) | pub struct TypeInfo {
FILE: rust/lib/src/future.rs
function exec (line 28) | pub(crate) fn exec<T>(mut future: impl Future<Output = T>) -> T {
type Env (line 38) | pub(crate) trait Env {
method symlink_metadata (line 40) | async fn symlink_metadata(path: &Path) -> Result<Metadata>;
method open (line 41) | async fn open(path: &Path) -> Result<Self::File>;
method ort_session_run (line 42) | async fn ort_session_run(
type File (line 49) | type File = std::fs::File;
method symlink_metadata (line 51) | async fn symlink_metadata(path: &Path) -> Result<Metadata> {
method open (line 55) | async fn open(path: &Path) -> Result<Self::File> {
method ort_session_run (line 59) | async fn ort_session_run(
type File (line 68) | type File = tokio::fs::File;
method symlink_metadata (line 70) | async fn symlink_metadata(path: &Path) -> Result<Metadata> {
method open (line 74) | async fn open(path: &Path) -> Result<Self::File> {
method ort_session_run (line 78) | async fn ort_session_run(
type SyncEnv (line 47) | pub(crate) enum SyncEnv {}
type AsyncEnv (line 66) | pub(crate) enum AsyncEnv {}
function panic_waker (line 94) | fn panic_waker() -> Waker {
FILE: rust/lib/src/input.rs
type Features (line 25) | pub struct Features(pub(crate) Vec<i32>);
type SyncInput (line 28) | pub trait SyncInput {
method length (line 30) | fn length(&self) -> Result<u64>;
method read_at (line 33) | fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<()>;
method length (line 51) | fn length(&self) -> Result<u64> {
method read_at (line 55) | fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<()> {
method length (line 63) | fn length(&self) -> Result<u64> {
method read_at (line 67) | fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<()> {
method length (line 74) | fn length(&self) -> Result<u64> {
method read_at (line 78) | fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<()> {
type AsyncInput (line 37) | pub trait AsyncInput {
method length (line 39) | fn length(&self) -> impl Future<Output = Result<u64>>;
method read_at (line 42) | fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> impl Future<O...
method length (line 84) | fn length(&self) -> impl Future<Output = Result<u64>> {
method read_at (line 88) | fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> impl Future<O...
method length (line 94) | async fn length(&self) -> Result<u64> {
method read_at (line 98) | async fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> Result<...
constant _ (line 45) | const _: () = const {
type FeaturesOrRuled (line 106) | pub enum FeaturesOrRuled {
method extract_sync (line 118) | pub fn extract_sync(file: impl SyncInput) -> Result<Self> {
method extract_async (line 125) | pub async fn extract_async(file: impl AsyncInput) -> Result<Self> {
method extract (line 129) | pub(crate) async fn extract(file: impl AsyncInput) -> Result<Self> {
function extract_features_async (line 148) | async fn extract_features_async(
function copy_features (line 167) | fn copy_features(dst: &mut [i32], src: &[u8], align: usize) {
function strip_prefix (line 177) | fn strip_prefix(xs: &[u8]) -> &[u8] {
function strip_suffix (line 181) | fn strip_suffix(xs: &[u8]) -> &[u8] {
function strip (line 185) | fn strip(mut xs: &[u8], mut split: impl FnMut(&[u8]) -> Option<(&u8, &[u...
function is_whitespace (line 195) | fn is_whitespace(x: u8) -> bool {
function features_extraction_reference (line 211) | fn features_extraction_reference() {
FILE: rust/lib/src/lib.rs
type Prediction (line 81) | struct Prediction {
function assert_float (line 88) | fn assert_float(actual: f32, expected: f32, debug: &str) {
function assert_prediction (line 95) | fn assert_prediction(actual: FileType, expected: Prediction, debug: &str) {
function identify_by_path_reference (line 119) | fn identify_by_path_reference() {
function identify_by_content_reference (line 146) | fn identify_by_content_reference() {
FILE: rust/lib/src/model.rs
constant CONFIG (line 23) | pub(crate) const CONFIG: ModelConfig = ModelConfig {
constant THRESHOLDS (line 34) | const THRESHOLDS: [f32; ContentType::SIZE] = [0.5, 0.5, 0.5, 0.5, 0.5, 0...
constant OVERWRITE_MAP (line 35) | const OVERWRITE_MAP: [ContentType; ContentType::SIZE] = [
type Label (line 258) | pub(crate) enum Label {
method content_type (line 477) | pub(crate) fn content_type(self) -> ContentType {
constant NUM_LABELS (line 475) | pub(crate) const NUM_LABELS: usize = 214;
FILE: rust/lib/src/session.rs
type Session (line 24) | pub struct Session {
method new (line 30) | pub fn new() -> Result<Self> {
method builder (line 35) | pub fn builder() -> Builder {
method identify_file_sync (line 40) | pub fn identify_file_sync(&mut self, file: impl AsRef<Path>) -> Result...
method identify_file_async (line 45) | pub async fn identify_file_async(&mut self, file: impl AsRef<Path>) ->...
method identify_file (line 49) | async fn identify_file<E: Env>(&mut self, file: &Path) -> Result<FileT...
method identify_content_sync (line 62) | pub fn identify_content_sync(&mut self, file: impl SyncInput) -> Resul...
method identify_content_async (line 67) | pub async fn identify_content_async(&mut self, file: impl AsyncInput) ...
method identify_content (line 71) | async fn identify_content<E: Env>(&mut self, file: impl AsyncInput) ->...
method identify_features_sync (line 79) | pub fn identify_features_sync(&mut self, features: &Features) -> Resul...
method identify_features_async (line 84) | pub async fn identify_features_async(&mut self, features: &Features) -...
method identify_features (line 88) | async fn identify_features<E: Env>(&mut self, features: &Features) -> ...
method identify_features_batch_sync (line 95) | pub fn identify_features_batch_sync(&mut self, features: &[Features]) ...
method identify_features_batch_async (line 100) | pub async fn identify_features_batch_async(
method identify_features_batch (line 106) | async fn identify_features_batch<E: Env>(
FILE: tests_data/basic/c/code.c
function main (line 3) | int main() {
FILE: tests_data/basic/javascript/code.js
function q (line 6) | function q(){for(var a=r,b={},c=0;c<a.length;++c)b[a[c]]=c;return b}
function u (line 6) | function u(){var a="ABCDEFGHIJKLMNOPQRSTUVWXYZ";a+=a.toLowerCase()+"0123...
function aa (line 7) | function aa(a){function b(k){for(;d<a.length;){var m=a.charAt(d++),l=v[m...
function T (line 10) | function T(){var a=K(),b=a.gl;b&&b.decorators||(b={decorators:[]},a.gl=b...
function W (line 10) | function W(a){return new RegExp("(.*?)(^|&)"+a+"=([^&]*)&?(.*)")}
function la (line 12) | function la(a,b){a=[J.userAgent,(new Date).getTimezoneOffset(),J.userLan...
function ma (line 13) | function ma(a){return function(b){var c=R(G.location.href),d=c.search.re...
function pa (line 13) | function pa(a,b){if(a=W(a).exec(b)){var c=a[2],d=a[4];b=a[1];d&&(b=b+c+d...
function oa (line 14) | function oa(a,b,c){function d(f,h){f=pa("_gl",f);f.length&&(f=h+f);retur...
function Y (line 16) | function Y(a,b,c,d){function e(k){k=pa(a,k);var m=k.charAt(k.length-1);k...
function qa (line 16) | function qa(a,b){var c="FORM"===(a.tagName||"").toUpperCase(),d=V(b,1,c)...
function ta (line 17) | function ta(a,b,c,d){if(c.tagName){if("a"===c.tagName.toLowerCase())retu...
function Z (line 17) | function Z(a,b,c,d){c.href&&(a=Y(a,b,c.href,void 0===d?!1:d),C.test(a)&&...
function sa (line 18) | function sa(a,b,c){if(c&&c.action){var d=(c.method||"").toLowerCase();if...
function fa (line 19) | function fa(a){try{a:{for(var b=100;a&&0<b;){if(a.href&&a.nodeName.match...
function ha (line 19) | function ha(a){try{if(a.action){var b=Q(R(a.action),"host");qa(a,b)}}cat...
function La (line 21) | function La(a){var b=1,c;if(a)for(b=0,c=a.length-1;0<=c;c--){var d=a.cha...
function J (line 21) | function J(a){vd.set(a)}
function df (line 29) | function df(a,b,c){b=Oe(b);var d={};if(!b||!b.length)return d;for(var e=...
function Ja (line 38) | function Ja(a){if(100!=a.get(Ka)&&La(P(a,Q))%1E4>=100*R(a,Ka))throw"abor...
function Ma (line 38) | function Ma(a){if(G(P(a,Na)))throw"abort";}
function Oa (line 38) | function Oa(){var a=M.location.protocol;if("http:"!=a&&"https:"!=a)throw...
function pf (line 39) | function pf(a){var b=!1,c=!1;if(vd.get(89)){c=!0;var d=a.get(kb),e=M.loc...
function Pa (line 40) | function Pa(a){try{O.navigator.sendBeacon?J(42):O.XMLHttpRequest&&"withC...
function Sa (line 41) | function Sa(a){var b=P(a,fa);!b&&a.get(Vd)&&(b="beacon");var c=P(a,gd),d...
function Hc (line 42) | function Hc(a){qc().expId&&a.set(Nc,qc().expId);qc().expVar&&a.set(Oc,qc...
function cd (line 43) | function cd(){if(O.navigator&&"preview"==O.navigator.loadPurpose)throw"a...
function yd (line 43) | function yd(a){var b=O.gaDevIds||[];if(ka(b)){var c=a.get("&did");qa(c)&...
function vb (line 43) | function vb(a){if(!a.get(Na))throw"abort";}
function Pe (line 44) | function Pe(a){try{if(!a.get(Qe)&&(a.set(Qe,!0),!a.get(">m"))){var b=v...
function lf (line 45) | function lf(a){if(null==a||0===a.length)return!1;a=Number(a);var b=Date....
function Ta (line 45) | function Ta(a){var b=R(a,Ua);500<=b&&J(15);var c=P(a,Va);if("transaction...
function hf (line 48) | function hf(a,b){var c=gf[a];c&&J(c);"displayFeaturesTask"===a&&void 0==...
function mf (line 48) | function mf(a,b){if(a)if("object"===typeof a)for(var c in a)a.hasOwnProp...
function $a (line 48) | function $a(a){var b=ue.get(a);if(!b)for(var c=0;c<ve.length;c++){var d=...
function yc (line 48) | function yc(a){var b;ue.map(function(c,d){d.F==a&&(b=d)});return b&&b.name}
function S (line 48) | function S(a,b,c,d,e){a=new bb(a,b,c,d,e);ue.set(a.name,a);return a.name}
function cb (line 48) | function cb(a,b){ve.push([new RegExp("^"+a+"$"),b])}
function T (line 48) | function T(a,b,c){return S(a,b,c,void 0,db)}
function db (line 49) | function db(){}
function X (line 57) | function X(a,b,c,d){b[a]=function(){try{return d&&J(d),c.apply(this,argu...
function Xc (line 64) | function Xc(a,b,c){"none"==b&&(b="");var d=[],e=Ca(a);a="__utma"==a?6:2;...
function Zc (line 64) | function Zc(a,b){if(null==a)var c=a=1;else c=La(a),a=La(D(a,".")?a.subst...
function Bc (line 64) | function Bc(a){if(a.get(Ze))return J(35),De.generate($e(a));var b=P(a,Q)...
function Ic (line 65) | function Ic(a,b){var c=new Date,d=O.navigator,e=d.plugins||[];a=[a,d.use...
function pa (line 65) | function pa(a,b){var c=new Date,d=O.navigator,e=c.getHours()+Math.floor(...
function d (line 69) | function d(g){try{g=g||O.event;a:{var ca=g.target||g.srcElement;for(g=10...
function sd (line 70) | function sd(a,b){if(b==M.location.hostname)return!1;for(var c=0;c<a.leng...
function ke (line 71) | function ke(a,b){return b!=Ic(a,0)&&b!=Ic(a,-1)&&b!=Ic(a,-2)&&b!=pa(a,0)...
function $e (line 71) | function $e(a){var b=af(a),c={};c._ga=a.get(Q);c._gid=a.get(I)||void 0;c...
function af (line 71) | function af(a){function b(e){return void 0==e||""===e?0:Number(e)}var c=...
function b (line 71) | function b(d,e){e&&(c+="&"+d+"="+K(e))}
function b (line 75) | function b(e,g){d.model.data.set(e,g);a.hasOwnProperty(e)&&hf(e,g)}
function c (line 75) | function c(e,g){d.model.data.set(e,g);d.filters.add(e)}
function td (line 78) | function td(a,b){var c=P(a,U);a.data.set(la,"_ga"==c?"_gid":c+"_gid");if...
function pd (line 83) | function pd(a){var b=O.navigator,c=O.screen,d=M.location;a.set(lb,of(!!a...
function ud (line 87) | function ud(a){return 0<=a.indexOf(".")||0<=a.indexOf(":")}
function tf (line 93) | function tf(a,b){var c=O.google_tag_data;c||(c=O.google_tag_data={});var...
function uf (line 93) | function uf(a){return{allowAdFeatures:a.get(Ud),allowAdPersonalizationSi...
function rf (line 94) | function rf(a){return void 0===a.get(Ie)&&void 0===a.get(fa)&&void 0===a...
FILE: tests_data/basic/python/code.py
function print_primes (line 1) | def print_primes(max_n: int) -> None:
function is_prime (line 7) | def is_prime(n: int) -> bool:
FILE: tests_data/basic/ruby/code.rb
class Kalimat (line 1) | class Kalimat
method initialize (line 2) | def initialize(nama = "Dunia")
method sapaan (line 5) | def sapaan
method perpisahan (line 8) | def perpisahan
FILE: tests_data/basic/rust/asm.rs
function main (line 3) | fn main() {
FILE: tests_data/basic/rust/code.rs
function main (line 1) | fn main() {
FILE: tests_data/basic/rust/test_case1.rs
function load_model (line 2) | fn load_model() {
function main (line 9) | fn main() {
FILE: tests_data/basic/rust/test_case2.rs
function create_csv (line 5) | fn create_csv() -> std::io::Result<()> {
function create_json (line 15) | fn create_json() -> std::io::Result<()> {
function main (line 22) | fn main() -> std::io::Result<()> {
FILE: tests_data/basic/typescript/code.ts
type Person (line 2) | interface Person {
function greet (line 7) | function greet(person: Person): string {
FILE: website-ng/src/lib/components/ui/file-drop-zone/index.ts
constant BYTE (line 19) | const BYTE = 1;
constant KILOBYTE (line 20) | const KILOBYTE = 1024;
constant MEGABYTE (line 21) | const MEGABYTE = 1024 * KILOBYTE;
constant GIGABYTE (line 22) | const GIGABYTE = 1024 * MEGABYTE;
constant ACCEPT_IMAGE (line 25) | const ACCEPT_IMAGE = "image/*";
constant ACCEPT_VIDEO (line 26) | const ACCEPT_VIDEO = "video/*";
constant ACCEPT_AUDIO (line 27) | const ACCEPT_AUDIO = "audio/*";
FILE: website-ng/src/lib/components/ui/file-drop-zone/types.ts
type FileRejectedReason (line 8) | type FileRejectedReason =
type FileDropZonePropsWithoutHTML (line 13) | type FileDropZonePropsWithoutHTML = WithChildren<{
type FileDropZoneProps (line 50) | type FileDropZoneProps = FileDropZonePropsWithoutHTML &
FILE: website-ng/src/lib/utils.ts
function cn (line 4) | function cn(...inputs: ClassValue[]) {
type WithoutChild (line 9) | type WithoutChild<T> = T extends { child?: any } ? Omit<T, "child"> : T;
type WithoutChildren (line 11) | type WithoutChildren<T> = T extends { children?: any } ? Omit<T, "childr...
type WithoutChildrenOrChild (line 12) | type WithoutChildrenOrChild<T> = WithoutChildren<WithoutChild<T>>;
type WithElementRef (line 13) | type WithElementRef<T, U extends HTMLElement = HTMLElement> = T & { ref?...
FILE: website-ng/src/lib/utils/utils.ts
function cn (line 8) | function cn(...inputs: ClassValue[]) {
type WithoutChild (line 13) | type WithoutChild<T> = T extends { child?: any } ? Omit<T, "child"> : T;
type WithoutChildren (line 15) | type WithoutChildren<T> = T extends { children?: any }
type WithoutChildrenOrChild (line 18) | type WithoutChildrenOrChild<T> = WithoutChildren<WithoutChild<T>>;
type WithElementRef (line 19) | type WithElementRef<T, U extends HTMLElement = HTMLElement> = T & {
FILE: website-ng/src/pages/install.ps1.ts
function GET (line 5) | async function GET({ redirect }: APIContext) {
FILE: website-ng/src/pages/install.sh.ts
function GET (line 5) | async function GET({ redirect }: APIContext) {
FILE: website/vite.config.js
method configureServer (line 14) | configureServer(server) {
Condensed preview — 432 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,053K chars).
[
{
"path": ".cargo/config.toml",
"chars": 35,
"preview": "[build]\ntarget-dir = \"rust/target\"\n"
},
{
"path": ".dockerignore",
"chars": 716,
"preview": "# Include any files or directories that you don't want to be copied to your\n# container here (e.g., local build artifact"
},
{
"path": ".gemini/config.yaml",
"chars": 55,
"preview": "code_review:\n pull_request_opened:\n summary: false\n"
},
{
"path": ".gitattributes",
"chars": 21,
"preview": "/tests_data/** -text\n"
},
{
"path": ".github/CODEOWNERS",
"chars": 392,
"preview": "# Default owners (lowest precedence).\n* @reyammer @invernizzi\n\n# Julien owns the Rust code\n/rust/ @ia0\n\n# Yanick owns t"
},
{
"path": ".github/ISSUE_TEMPLATE/misdetection.md",
"chars": 654,
"preview": "---\nname: Misdetection\nabout: Report a file, or files, that have been misdetected as something that they\n aren't.\ntitle"
},
{
"path": ".github/ISSUE_TEMPLATE/new_content_type_request.md",
"chars": 3218,
"preview": "---\nname: New content type request\nabout: Suggest a new content type for Magika to detect\ntitle: \"[NEW CONTENT TYPE REQU"
},
{
"path": ".github/dependabot.yml",
"chars": 506,
"preview": "version: 2\r\n\r\nupdates:\r\n - package-ecosystem: \"github-actions\"\r\n directory: \"/\"\r\n schedule:\r\n interval: \"mon"
},
{
"path": ".github/labeler.yml",
"chars": 25,
"preview": "needs triage:\n - \"/.*/\"\n"
},
{
"path": ".github/scorecard.yml",
"chars": 560,
"preview": "# Scorecard maintainer annotations.\n# See https://github.com/ossf/scorecard/blob/main/config/README.md\n\nannotations:\n #"
},
{
"path": ".github/workflows/cli-latest.yml",
"chars": 560,
"preview": "name: Update the trampoline release\n\non:\n # This is called by cli-release.yml but we don't use the input.\n workflow_ca"
},
{
"path": ".github/workflows/cli-release.yml",
"chars": 13075,
"preview": "# This file was autogenerated by dist: https://axodotdev.github.io/cargo-dist\n#\n# Copyright 2022-2024, axodotdev\n# SPDX-"
},
{
"path": ".github/workflows/codeql.yml",
"chars": 3793,
"preview": "# For most projects, this workflow file will not need changing; you simply need\n# to commit it to your repository.\n#\n# Y"
},
{
"path": ".github/workflows/docs-check.yml",
"chars": 741,
"preview": "name: Docs - Check documentation\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n pull_request:\n path"
},
{
"path": ".github/workflows/github-issue-labeler.yml",
"chars": 468,
"preview": "name: New issue labeler\non:\n # Runs on newly opened issues\n issues:\n types: [opened]\n\n# Sets permissions of the GIT"
},
{
"path": ".github/workflows/github-pages.yml",
"chars": 1602,
"preview": "name: Pages - deploy\n\non:\n # Runs on pushes targeting the default branch\n push:\n branches: [\"main\"]\n paths:\n "
},
{
"path": ".github/workflows/go-test.yml",
"chars": 493,
"preview": "name: Go - build and run tests\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n pull_request:\n paths:"
},
{
"path": ".github/workflows/js-check-import-scenarios.yml",
"chars": 2401,
"preview": "# Tests that the Magika library can be used in various scenarios (e.g., commonjs/node, esmodule/node, esmodule/browse)\nn"
},
{
"path": ".github/workflows/js-docs-builder.yml",
"chars": 910,
"preview": "name: JS - generate docs\n\non:\n # Runs on pushes targeting the default branch\n push:\n branches: [\"main\"]\n paths:\n"
},
{
"path": ".github/workflows/js-publish.yml",
"chars": 705,
"preview": "name: JS - publish\non:\n workflow_dispatch:\npermissions:\n contents: read\n\njobs:\n build:\n runs-on: ubuntu-latest\n "
},
{
"path": ".github/workflows/js-test.yml",
"chars": 875,
"preview": "name: JS - tests\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n pull_request:\n paths:\n - \"js/*"
},
{
"path": ".github/workflows/python-build-and-release-package.yml",
"chars": 17558,
"preview": "name: Python - build and release package\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n tags:\n "
},
{
"path": ".github/workflows/python-test-published-package.yml",
"chars": 1798,
"preview": "# This routinely checks that published packages are installable and work\n# properly. This makes sure that a new version "
},
{
"path": ".github/workflows/python-test-published-rc-package.yml",
"chars": 1884,
"preview": "# This routinely checks that the latest published -rc packages are installable\n# and work properly. This makes sure that"
},
{
"path": ".github/workflows/python-test-suite.yml",
"chars": 3207,
"preview": "name: Python - run test suite\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n pull_request:\n paths:\n"
},
{
"path": ".github/workflows/rust-test.yml",
"chars": 1214,
"preview": "name: Rust - test\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n pull_request:\n paths:\n - \".gi"
},
{
"path": ".github/workflows/scorecard.yml",
"chars": 2874,
"preview": "# This workflow uses actions that are not certified by GitHub. They are provided\n# by a third-party and are governed by "
},
{
"path": ".github/workflows/website-test.yml",
"chars": 1248,
"preview": "name: Website - tests\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n pull_request:\n paths:\n - "
},
{
"path": ".gitignore",
"chars": 169,
"preview": "*.pyc\n__pycache__/\n.ipynb_checkpoints\nvenv/\ntmp/\n.env\n*.swp\n*.egg-info\ndist/*\n*.pickle\n.s.yml\n\n*/models-data/*\n\n.vscode\n"
},
{
"path": "CITATION.cff",
"chars": 786,
"preview": "cff-version: 1.2.0\nmessage: \"If you use this software, please cite it as below.\"\nauthors:\n- family-names: \"Fratantonio\"\n"
},
{
"path": "CONTRIBUTING.md",
"chars": 1224,
"preview": "# How to Contribute\n\nWe would love to accept your patches and contributions to this project!\n\nCheck [open issues labeled"
},
{
"path": "Dockerfile",
"chars": 261,
"preview": "# syntax=docker/dockerfile:1\n\nARG PYTHON_VERSION=3.11\nFROM python:${PYTHON_VERSION}-slim as base\n\nWORKDIR /magika\n\n# Thi"
},
{
"path": "LICENSE",
"chars": 11357,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 10174,
"preview": "# Magika\n\n[](https://pypi.python.org/pypi/magika)\n[\n1. [List of possib"
},
{
"path": "assets/models/standard_v3_0/config.min.json",
"chars": 2074,
"preview": "{\"beg_size\":1024,\"mid_size\":0,\"end_size\":1024,\"use_inputs_at_offsets\":false,\"medium_confidence_threshold\":0.5,\"min_file_"
},
{
"path": "assets/models/standard_v3_0/metadata.json",
"chars": 188,
"preview": "{\"namespace_hash\":\"7ca577b96738951c36df428f8435c81780f92c6f9ef3a73d796a792ffc817703\",\"model_name_hash\":\"e5368af178b89eb9"
},
{
"path": "assets/models/standard_v3_1/README.md",
"chars": 16434,
"preview": "# Model documentation\n\n## Table of Contents\n\n1. [List of possible outputs](#list-of-possible-outputs)\n1. [List of possib"
},
{
"path": "assets/models/standard_v3_1/config.min.json",
"chars": 2122,
"preview": "{\"beg_size\":1024,\"mid_size\":0,\"end_size\":1024,\"use_inputs_at_offsets\":false,\"medium_confidence_threshold\":0.5,\"min_file_"
},
{
"path": "assets/models/standard_v3_1/metadata.json",
"chars": 20,
"preview": "{\"epoch_num\":\"200\"}\n"
},
{
"path": "assets/models/standard_v3_2/README.md",
"chars": 16448,
"preview": "# Model documentation\n\n## Table of Contents\n\n1. [List of possible outputs](#list-of-possible-outputs)\n1. [List of possib"
},
{
"path": "assets/models/standard_v3_2/config.min.json",
"chars": 2123,
"preview": "{\"beg_size\":1024,\"mid_size\":0,\"end_size\":1024,\"use_inputs_at_offsets\":false,\"medium_confidence_threshold\":0.5,\"min_file_"
},
{
"path": "assets/models/standard_v3_2/metadata.json",
"chars": 20,
"preview": "{\"epoch_num\":\"190\"}\n"
},
{
"path": "assets/models/standard_v3_3/README.md",
"chars": 16448,
"preview": "# Model documentation\n\n## Table of Contents\n\n1. [List of possible outputs](#list-of-possible-outputs)\n1. [List of possib"
},
{
"path": "assets/models/standard_v3_3/config.min.json",
"chars": 2141,
"preview": "{\"beg_size\":1024,\"mid_size\":0,\"end_size\":1024,\"use_inputs_at_offsets\":false,\"medium_confidence_threshold\":0.5,\"min_file_"
},
{
"path": "assets/models/standard_v3_3/metadata.json",
"chars": 19,
"preview": "{\"epoch_num\":\"91\"}\n"
},
{
"path": "dist-workspace.toml",
"chars": 1386,
"preview": "[workspace]\nmembers = [\"cargo:rust/cli\"]\n\n# Config for 'dist'\n[dist]\n# Skip checking whether the specified configuration"
},
{
"path": "docs/concepts.md",
"chars": 172,
"preview": "# Magika Concepts\n\nThe documentation has moved, see the [Core Concepts](https://securityresearch.google/magika/core-conc"
},
{
"path": "docs/js.md",
"chars": 176,
"preview": "# JavaScript Documentation\n\nThe documentation has moved, see the [JavaScript bindings](https://securityresearch.google/m"
},
{
"path": "go/README.md",
"chars": 2397,
"preview": "# Go library\n\nThis directory contains the Go library for Magika.\n\nThe inference relies on the [ONNX Runtime](https://onn"
},
{
"path": "go/cli/cli.go",
"chars": 1232,
"preview": "package main\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\n\t\"github.com/google/magika/go/magika\"\n)\n\nconst (\n\tassetsDirEnv = \"MA"
},
{
"path": "go/cli/cli_test.go",
"chars": 643,
"preview": "//go:build cgo && onnxruntime\n\npackage main\n\nimport (\n\t\"path\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/go-cmp/cmp\"\n)\n\n"
},
{
"path": "go/cli/main.go",
"chars": 497,
"preview": "/*\nCLI is a simple command line interface for magika.\n\nIt takes a list of files as argument, and infers their types in s"
},
{
"path": "go/cli/tests_data/magika_test_pptx.txt",
"chars": 73,
"preview": "This is a test for Magika!\n\nVery cool if this can be detected correctly!\n"
},
{
"path": "go/docker/Dockerfile",
"chars": 2569,
"preview": "# Sample Dockerfile to build an image that ties together an ONNX Runtime,\n# a Magika model, and a Magika CLI.\n#\n# It exp"
},
{
"path": "go/example/main.go",
"chars": 1073,
"preview": "//go:build cgo && onnxruntime\n\n// This package illustrates the usage of the Magika go binding.\n//\n// It requires the onn"
},
{
"path": "go/go.mod",
"chars": 99,
"preview": "module github.com/google/magika/go\n\ngo 1.22.3\n\nrequire github.com/google/go-cmp v0.6.0 // indirect\n"
},
{
"path": "go/go.sum",
"chars": 167,
"preview": "github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=\ngithub.com/google/go-cmp v0.6.0/go.mod h"
},
{
"path": "go/magika/config.go",
"chars": 2123,
"preview": "package magika\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"os\"\n\t\"path\"\n)\n\nconst (\n\tconfigFile = \"config.min.json\"\n\tconte"
},
{
"path": "go/magika/content.go",
"chars": 1166,
"preview": "package magika\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"os\"\n)\n\nconst (\n\tcontentTypeLabelEmpty = \"empty\"\n\tcontentTypeLabelTx"
},
{
"path": "go/magika/features.go",
"chars": 3660,
"preview": "package magika\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"io\"\n)\n\n// Features holds the features of a give slice of bytes.\ntype Features"
},
{
"path": "go/magika/features_test.go",
"chars": 2146,
"preview": "package magika\n\nimport (\n\t\"bytes\"\n\t\"compress/gzip\"\n\t\"encoding/json\"\n\t\"io\"\n\t\"os\"\n\t\"testing\"\n\n\t\"github.com/google/go-cmp/c"
},
{
"path": "go/magika/scanner.go",
"chars": 3348,
"preview": "package magika\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"unicode/utf8\"\n\n\t\"github.com/google/magika/go/onnx\"\n)\n\n// Scanner repre"
},
{
"path": "go/magika/scanner_test.go",
"chars": 3861,
"preview": "//go:build cgo && onnxruntime\n\npackage magika\n\nimport (\n\t\"bytes\"\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"os\"\n\t\"path\"\n\t\"testing\"\n\n\t\"git"
},
{
"path": "go/onnx/onnx.go",
"chars": 213,
"preview": "package onnx\n\n// Onnx represents something that can run inferences on features.\ntype Onnx interface {\n\t// Run returns th"
},
{
"path": "go/onnx/onnx_runtime.go",
"chars": 1290,
"preview": "//go:build cgo && onnxruntime\n\npackage onnx\n\n// #cgo LDFLAGS: -lonnxruntime\n// #include \"onnx_runtime.h\"\nimport \"C\"\n\nimp"
},
{
"path": "go/onnx/onnx_runtime.h",
"chars": 1981,
"preview": "#include <stdio.h>\n#include <onnxruntime_c_api.h>\n\n#define RETURN_ON_ERROR(expr) { \\\n\tOrtStatus* onnx_status = (exp"
},
{
"path": "go/onnx/onnx_runtime_test.go",
"chars": 962,
"preview": "//go:build cgo && onnxruntime\n\npackage onnx_test\n\nimport (\n\t\"math/rand/v2\"\n\t\"testing\"\n\n\t\"github.com/google/magika/go/mag"
},
{
"path": "go/onnx/onnx_zero.go",
"chars": 215,
"preview": "//go:build !(cgo && onnxruntime)\n\npackage onnx\n\n// NewOnnx returns a nil Onnx runtime.\n// This allows for building and u"
},
{
"path": "js/.gitignore",
"chars": 31,
"preview": "package-lock.json\nnode_modules\n"
},
{
"path": "js/CHANGELOG.md",
"chars": 517,
"preview": "# CHANGELOG\n## [1.0.0]\n - Mark end of experimental mode. No major changes.\n\n## [0.3.2]\n\n- Upgrade to `standard_v3_3` mod"
},
{
"path": "js/README.md",
"chars": 4589,
"preview": "# Magika TypeScript/JavaScript library\n\nMagika is a novel AI-powered file type detection tool that relies on the recent "
},
{
"path": "js/magika-cli.ts",
"chars": 4417,
"preview": "#! /usr/bin/env node\n// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n"
},
{
"path": "js/magika-node.ts",
"chars": 5939,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/magika.ts",
"chars": 10135,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/package.json",
"chars": 2574,
"preview": "{\n \"name\": \"magika\",\n \"version\": \"1.0.0\",\n \"description\": \"A tool to detect content types with deep learning.\","
},
{
"path": "js/postBuild.js",
"chars": 569,
"preview": "\n// format sub package.json for dual cjs and esm support\nimport fs from 'fs';\n\nconst formatPackage = (source, output, ty"
},
{
"path": "js/simple_examples/browser-esmodule-example/index.html",
"chars": 261,
"preview": "<!DOCTYPE html>\n<html lang=\"en\">\n\n<head>\n <meta charset=\"UTF-8\">\n <meta name=\"viewport\" content=\"width=device-widt"
},
{
"path": "js/simple_examples/browser-esmodule-example/index.js",
"chars": 635,
"preview": "import { Magika } from \"magika\";\n\nasync function main() {\n const magika = await Magika.create();\n const data = new Tex"
},
{
"path": "js/simple_examples/browser-esmodule-example/package.json",
"chars": 416,
"preview": "{\n \"name\": \"browser-esmodule-example\",\n \"version\": \"1.0.0\",\n \"description\": \"Magika browser esmodule example\",\n \"scr"
},
{
"path": "js/simple_examples/browser-esmodule-example/playwright.config.ts",
"chars": 310,
"preview": "import { defineConfig } from \"@playwright/test\";\n\nexport default defineConfig({\n // Run your local dev server before st"
},
{
"path": "js/simple_examples/browser-esmodule-example/test/simple.spec.ts",
"chars": 355,
"preview": "import { test, expect } from \"@playwright/test\";\n\ntest(\"can run Magika\", async ({ page }) => {\n await page.goto(\"http:/"
},
{
"path": "js/simple_examples/node-commonjs-example/index.js",
"chars": 303,
"preview": "const { MagikaNode: Magika } = require(\"magika/node\");\n\nasync function main() {\n const magika = await Magika.create();\n"
},
{
"path": "js/simple_examples/node-commonjs-example/package.json",
"chars": 315,
"preview": "{\n \"name\": \"magika-node-commonjs-example\",\n \"version\": \"1.0.0\",\n \"main\": \"index.js\",\n \"scripts\": {\n \"start\": \"nod"
},
{
"path": "js/simple_examples/node-esmodule-example/index.js",
"chars": 340,
"preview": "import { MagikaNode as Magika } from \"magika/node\";\nimport { TextEncoder } from \"util\";\n\nconst magika = await Magika.cre"
},
{
"path": "js/simple_examples/node-esmodule-example/package.json",
"chars": 339,
"preview": "{\n \"name\": \"magika-node-esmodule-example\",\n \"version\": \"1.0.0\",\n \"main\": \"index.js\",\n \"type\": \"module\",\n \"scripts\":"
},
{
"path": "js/simple_examples/run_examples.sh",
"chars": 564,
"preview": "#!/bin/bash\n\n# Exit on error\nset -e\n\nROOT_DIR=$(pwd)\nexport TF_CPP_MIN_LOG_LEVEL=2\nexport NODE_OPTIONS='--no-warnings'\n\n"
},
{
"path": "js/simple_examples/typescript-esmodule-example/index.ts",
"chars": 328,
"preview": "import { MagikaNode as Magika } from \"magika/node\";\n\nasync function main(): Promise<void> {\n const magika = await Magik"
},
{
"path": "js/simple_examples/typescript-esmodule-example/package.json",
"chars": 464,
"preview": "{\n \"name\": \"magika-typescript-esmodule-example\",\n \"version\": \"1.0.0\",\n \"main\": \"index.js\",\n \"type\": \"module\",\n \"scr"
},
{
"path": "js/src/.npmignore",
"chars": 0,
"preview": ""
},
{
"path": "js/src/content-type-info.ts",
"chars": 733,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/content-type-label.ts",
"chars": 7293,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/content-types-infos.ts",
"chars": 35139,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/magika-options.ts",
"chars": 721,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/magika-prediction.ts",
"chars": 964,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/magika-result.ts",
"chars": 786,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/model-config-node.ts",
"chars": 954,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/model-config.ts",
"chars": 2806,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/model-features.ts",
"chars": 1832,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/model-node.ts",
"chars": 943,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/model-prediction.ts",
"chars": 787,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/model.ts",
"chars": 2576,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/overwrite-reason.ts",
"chars": 709,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/prediction-mode.ts",
"chars": 730,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/src/status.ts",
"chars": 943,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/test/features-extraction-vs-reference.test.ts",
"chars": 3811,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/test/inference-vs-reference.test.ts",
"chars": 6682,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/test/magika-cli.test.ts",
"chars": 3121,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/test/magika.test.ts",
"chars": 8819,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/test/tfnHook.ts",
"chars": 1423,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/test/utils.ts",
"chars": 1068,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "js/tsconfig.cjs.json",
"chars": 191,
"preview": "{\n \"extends\": \"./tsconfig.json\",\n \"compilerOptions\": {\n \"module\": \"commonjs\",\n \"moduleResolution\": \""
},
{
"path": "js/tsconfig.esm.json",
"chars": 195,
"preview": "{\n \"extends\": \"./tsconfig.json\",\n \"compilerOptions\": {\n \"module\": \"nodenext\",\n \"moduleResolution\": \""
},
{
"path": "js/tsconfig.json",
"chars": 716,
"preview": "{\n \"compilerOptions\": {\n \"allowSyntheticDefaultImports\": true,\n \"baseUrl\": \".\",\n \"declaration\": "
},
{
"path": "python/.gitignore",
"chars": 110,
"preview": "*.pyc\n__pycache__/\n**/.ruff_cache/\n.ipynb_checkpoints\nvenv/\n.env\n*.swp\n*.h5\n*.egg-info\ndist/*\n*.pickle\n.s.yml\n"
},
{
"path": "python/.python-version",
"chars": 5,
"preview": "3.12\n"
},
{
"path": "python/CHANGELOG.md",
"chars": 14915,
"preview": "# Changelog\n\nAll notable changes to this project will be documented in this file.\n\nThe format is based on [Keep a Change"
},
{
"path": "python/README.md",
"chars": 8439,
"preview": "# Magika Python Package\n\n[](https://pypi.python.org/pypi/magika)\n[![NP"
},
{
"path": "python/mypy.ini",
"chars": 1223,
"preview": "[mypy]\nshow_error_codes = True\nfollow_imports = silent\nlocal_partial_types = true\nstrict_equality = true\nno_implicit_opt"
},
{
"path": "python/pyproject.toml",
"chars": 3150,
"preview": "[project]\nname = \"magika\"\ndescription = \"A tool to determine the content type of a file with deep learning\"\nauthors = [\n"
},
{
"path": "python/pytest.ini",
"chars": 76,
"preview": "[pytest]\nlog_cli = 1\nlog_level = WARNING\n\nmarkers =\n smoketest\n slow\n\n"
},
{
"path": "python/scripts/check_changelog.sh",
"chars": 985,
"preview": "#!/bin/bash\n# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may no"
},
{
"path": "python/scripts/check_copyright.py",
"chars": 3386,
"preview": "# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/scripts/check_documentation.py",
"chars": 12182,
"preview": "# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/scripts/check_source.sh",
"chars": 883,
"preview": "#!/bin/bash\n# Copyright 2023-2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you m"
},
{
"path": "python/scripts/generate_reference.py",
"chars": 1346,
"preview": "# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/scripts/pre_release_check.py",
"chars": 7213,
"preview": "# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/scripts/prepare_pyproject_for_pure_python_wheel.py",
"chars": 1445,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/scripts/run_quick_test_magika_cli.py",
"chars": 3206,
"preview": "#!/usr/bin/env python\n# Copyright 2023-2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\""
},
{
"path": "python/scripts/run_quick_test_magika_module.py",
"chars": 3203,
"preview": "#!/usr/bin/env python\n# Copyright 2023-2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\""
},
{
"path": "python/scripts/sync.py",
"chars": 8660,
"preview": "#!/usr/bin/env python3\n# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n#"
},
{
"path": "python/scripts/test_magika_model.py",
"chars": 2547,
"preview": "#!/usr/bin/env python3\n# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n#"
},
{
"path": "python/src/magika/__init__.py",
"chars": 1481,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/cli/magika_client.py",
"chars": 12077,
"preview": "#!/usr/bin/env python3\n# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n#"
},
{
"path": "python/src/magika/cli/magika_rust_client_not_found_warning.py",
"chars": 1713,
"preview": "#!/usr/bin/env python\n# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "python/src/magika/colors.py",
"chars": 1081,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/config/content_types_kb.min.json",
"chars": 44768,
"preview": "{\"3gp\":{\"mime_type\":\"video/3gpp\",\"group\":\"video\",\"description\":\"3GPP multimedia file\",\"extensions\":[\"3gp\"],\"is_text\":fal"
},
{
"path": "python/src/magika/logger.py",
"chars": 3016,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/magika.py",
"chars": 33334,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/models/standard_v3_3/README.md",
"chars": 16448,
"preview": "# Model documentation\n\n## Table of Contents\n\n1. [List of possible outputs](#list-of-possible-outputs)\n1. [List of possib"
},
{
"path": "python/src/magika/models/standard_v3_3/config.min.json",
"chars": 2141,
"preview": "{\"beg_size\":1024,\"mid_size\":0,\"end_size\":1024,\"use_inputs_at_offsets\":false,\"medium_confidence_threshold\":0.5,\"min_file_"
},
{
"path": "python/src/magika/models/standard_v3_3/metadata.json",
"chars": 19,
"preview": "{\"epoch_num\":\"91\"}\n"
},
{
"path": "python/src/magika/py.typed",
"chars": 0,
"preview": ""
},
{
"path": "python/src/magika/types/__init__.py",
"chars": 1561,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/content_type_info.py",
"chars": 3082,
"preview": "# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/content_type_label.py",
"chars": 7954,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/magika_error.py",
"chars": 641,
"preview": "# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/magika_prediction.py",
"chars": 1620,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/magika_result.py",
"chars": 5355,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/model.py",
"chars": 1509,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/overwrite_reason.py",
"chars": 935,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/prediction_mode.py",
"chars": 1146,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/seekable.py",
"chars": 1112,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/status.py",
"chars": 1106,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/src/magika/types/strenum.py",
"chars": 1896,
"preview": "# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/tests/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "python/tests/test_features_extraction_vs_reference.py",
"chars": 9333,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/tests/test_inference_vs_reference.py",
"chars": 23080,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/tests/test_magika_python_module.py",
"chars": 29268,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/tests/test_python_magika_client.py",
"chars": 1181,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "python/tests/utils.py",
"chars": 5541,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "rust/.gitignore",
"chars": 9,
"preview": "/target/\n"
},
{
"path": "rust/README.md",
"chars": 961,
"preview": "This directory contains the Rust crates and their tools. It has the following structure:\n- The `cli` directory contains "
},
{
"path": "rust/changelog.sh",
"chars": 1462,
"preview": "#!/bin/sh\n# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not "
},
{
"path": "rust/cli/CHANGELOG.md",
"chars": 2202,
"preview": "# Changelog\n\n## 1.0.3-dev\n\n### Patch\n\n- Update dependencies\n- Fix new clippy lints\n\n## 1.0.2\n\n### Patch\n\n- Update depend"
},
{
"path": "rust/cli/Cargo.toml",
"chars": 948,
"preview": "[package]\nname = \"magika\"\nversion = \"1.0.3-dev\"\nauthors = [\"Magika Developers <magika-dev@google.com>\"]\nlicense = \"Apach"
},
{
"path": "rust/cli/README.md",
"chars": 5453,
"preview": "# Magika CLI\n\nThis binary crate implements a command-line interface (CLI) to the library crate\n[magika](https://crates.i"
},
{
"path": "rust/cli/output",
"chars": 3482,
"preview": "+ magika rust/code.rs\nrust/code.rs: Rust source (code)\n+ env -u COLORTERM magika rust/code.rs --colors\n\u001b[94mrust/code.rs"
},
{
"path": "rust/cli/publish.sh",
"chars": 1322,
"preview": "#!/bin/sh\n# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not "
},
{
"path": "rust/cli/src/main.rs",
"chars": 19773,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "rust/cli/test.sh",
"chars": 1757,
"preview": "#!/bin/sh\n# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not "
},
{
"path": "rust/color.sh",
"chars": 799,
"preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
},
{
"path": "rust/gen/Cargo.toml",
"chars": 186,
"preview": "[package]\nname = \"gen\"\nversion = \"0.0.0\"\nedition = \"2021\"\npublish = false\n\n[dependencies]\nanyhow = \"1.0.82\"\nserde = { ve"
},
{
"path": "rust/gen/README.md",
"chars": 1804,
"preview": "This crate is for maintenance purposes only. It is used to update the Rust library to a new model.\nThere are 3 files in "
},
{
"path": "rust/gen/content_types",
"chars": 1279,
"preview": "3gp\nace\nai\naidl\napk\napplebplist\nappleplist\nasm\nasp\nautohotkey\nautoit\nawk\nbatch\nbazel\nbib\nbmp\nbzip\nc\ncab\ncat\nchm\nclojure\n"
},
{
"path": "rust/gen/src/main.rs",
"chars": 9827,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "rust/gen/test.sh",
"chars": 678,
"preview": "#!/bin/sh\n# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not "
},
{
"path": "rust/latest.sh",
"chars": 1841,
"preview": "#!/bin/sh\n# Copyright 2025 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not "
},
{
"path": "rust/lib/CHANGELOG.md",
"chars": 2547,
"preview": "# Changelog\n\n## 1.1.0-dev\n\n### Minor\n\n- Unseal `SyncInput` and `AsyncInput` for custom file-like objects\n- Support files"
},
{
"path": "rust/lib/Cargo.toml",
"chars": 1144,
"preview": "[package]\nname = \"magika\"\nversion = \"1.1.0-dev\"\nauthors = [\"Magika Developers <magika-dev@google.com>\"]\nlicense = \"Apach"
},
{
"path": "rust/lib/README.md",
"chars": 866,
"preview": "# Magika\n\nThis library crate provides file content type detection using AI. A command-line interface (CLI) for\nthis libr"
},
{
"path": "rust/lib/src/builder.rs",
"chars": 2805,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
},
{
"path": "rust/lib/src/config.rs",
"chars": 1550,
"preview": "// Copyright 2024 Google LLC\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use th"
}
]
// ... and 232 more files (download for full content)
About this extraction
This page contains the full source code of the google/magika GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 432 files (1.8 MB), approximately 580.7k tokens, and a symbol index with 590 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.