Repository: nolze/msoffcrypto-tool Branch: master Commit: 6d9e72c58de2 Files: 62 Total size: 184.3 KB Directory structure: gitextract_ab9olciu/ ├── .github/ │ ├── SECURITY.md │ └── workflows/ │ └── ci.yaml ├── .gitignore ├── .readthedocs.yml ├── CHANGELOG.md ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── docs/ │ ├── Makefile │ ├── cli.rst │ ├── conf.py │ ├── index.rst │ ├── make.bat │ ├── modules.rst │ ├── msoffcrypto.exceptions.rst │ ├── msoffcrypto.format.rst │ ├── msoffcrypto.method.container.rst │ ├── msoffcrypto.method.rst │ ├── msoffcrypto.rst │ └── requirements.txt ├── msoffcrypto/ │ ├── __init__.py │ ├── __main__.py │ ├── exceptions/ │ │ └── __init__.py │ ├── format/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── common.py │ │ ├── doc97.py │ │ ├── ooxml.py │ │ ├── ppt97.py │ │ └── xls97.py │ └── method/ │ ├── __init__.py │ ├── container/ │ │ ├── __init__.py │ │ └── ecma376_encrypted.py │ ├── ecma376_agile.py │ ├── ecma376_extensible.py │ ├── ecma376_standard.py │ ├── rc4.py │ ├── rc4_cryptoapi.py │ └── xor_obfuscation.py ├── pyproject.toml └── tests/ ├── __init__.py ├── inputs/ │ ├── ecma376standard_password.docx │ ├── example_password.docx │ ├── example_password.xlsx │ ├── plain.doc │ ├── plain.ppt │ ├── plain.xls │ ├── rc4cryptoapi_password.doc │ ├── rc4cryptoapi_password.ppt │ ├── rc4cryptoapi_password.xls │ └── xor_password_123456789012345.xls ├── outputs/ │ ├── ecma376standard_password_plain.docx │ ├── example.docx │ ├── example.xlsx │ ├── rc4cryptoapi_password_plain.doc │ ├── rc4cryptoapi_password_plain.ppt │ ├── rc4cryptoapi_password_plain.xls │ └── xor_password_123456789012345_plain.xls ├── test_cli.py ├── test_cli.sh ├── test_compare_known_output.py └── test_file_handle.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/SECURITY.md ================================================ # Security Policy ## Reporting a Vulnerability To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security). Tidelift will coordinate the fix and disclosure. ================================================ FILE: .github/workflows/ci.yaml ================================================ name: build on: push: # branches: [$default-branch] branches: ["master"] tags: ["*"] pull_request: # branches: [$default-branch] branches: ["master"] jobs: # https://srz-zumix.blogspot.com/2019/10/github-actions-ci-skip.html prepare: runs-on: ubuntu-latest if: "! contains(github.event.head_commit.message, '[skip ci]')" steps: - run: echo "[skip ci] ${{ contains(github.event.head_commit.message, '[skip ci]') }}" - run: echo "[github.ref] ${{ github.ref }}" build: needs: ["prepare"] runs-on: ${{ matrix.os }} strategy: fail-fast: true matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install poetry and codecov run: | python -m pip install --upgrade pip python -m pip install poetry codecov - name: Install dependencies run: | poetry install --no-interaction - name: Test with pytest run: | poetry run coverage run -m pytest -v codecov publish: needs: ["build"] if: "success() && startsWith(github.ref, 'refs/tags')" runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v3 with: python-version: "3.x" - name: Install poetry run: | python -m pip install --upgrade pip python -m pip install poetry - name: Build and publish package run: | poetry config pypi-token.pypi "${{ secrets.PYPI_API_TOKEN }}" poetry publish --no-interaction --build ================================================ FILE: .gitignore ================================================ docs/_static/ docs/_templates/ docs/_build/ ### https://raw.github.com/github/gitignore/4bff4a2986af526650f1d329d97047dc1fa87599/Python.gitignore # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log .static_storage/ .media/ local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ ### https://raw.github.com/github/gitignore/4bff4a2986af526650f1d329d97047dc1fa87599/Global/macOS.gitignore # General .DS_Store .AppleDouble .LSOverride # Icon must end with two \r Icon # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns .com.apple.timemachine.donotpresent # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk ### https://raw.github.com/github/gitignore/4bff4a2986af526650f1d329d97047dc1fa87599/Global/Windows.gitignore # Windows thumbnail cache files Thumbs.db ehthumbs.db ehthumbs_vista.db # Dump file *.stackdump # Folder config file [Dd]esktop.ini # Recycle Bin used on file shares $RECYCLE.BIN/ # Windows Installer files *.cab *.msi *.msm *.msp # Windows shortcuts *.lnk ================================================ FILE: .readthedocs.yml ================================================ version: 2 sphinx: configuration: docs/conf.py build: os: ubuntu-22.04 tools: python: "3.11" python: install: - requirements: docs/requirements.txt - method: pip path: . ================================================ FILE: CHANGELOG.md ================================================ v6.0.0 / 2026-01-12 =================== * (BREAKING) Drop support for Python 3.8 and 3.9, add Python 3.14 to CI * Update dependencies * Clarify error messages v5.4.2 / 2024-08-09 =================== * Fix DeprecationWarning from cryptography library (reported by @dennn11, [#92](https://github.com/nolze/msoffcrypto-tool/issues/92)) v5.4.1 / 2024-05-25 =================== * Fix for incorrect key size with 0 length keySize var (@UserJHansen, [#89](https://github.com/nolze/msoffcrypto-tool/pull/89)) v5.4.0 / 2024-05-02 =================== * Never return None in ooxml's \_parseinfo (@gdesmar, [#88](https://github.com/nolze/msoffcrypto-tool/pull/88)) v5.3.1 / 2024-01-19 =================== * Bug fixes v5.3.0 / 2024-01-19 =================== * Add support for OOXML encryption, a port from the C++ library https://github.com/herumi/msoffice (@stephane-rouleau, [#86](https://github.com/nolze/msoffcrypto-tool/pull/86)) v5.2.0 / 2024-01-06 =================== * Support XOR Obfuscation decryption for .xls documents (@DissectMalware, [#80](https://github.com/nolze/msoffcrypto-tool/pull/80)) * Bug fixes v5.1.1 / 2023-07-20 =================== * Drop Python 3.7 support as it reaches EOL, Add Python 3.11 to CI environments * Get the version in `__main__.py` instead of `__init__.py` to avoid a relevant error in PyInstaller/cx\_Freeze in which `pkg_resources` does not work by default v5.1.0 / 2023-07-17 =================== * Load plain OOXML as OfficeFile with type == plain. Fixes [#74](https://github.com/nolze/msoffcrypto-tool/issues/74) * Use importlib.metadata.version in Python >=3.8 ([#77](https://github.com/nolze/msoffcrypto-tool/issues/77)) 5.0.1 / 2023-02-28 =================== * (dev) Switch to GitHub Actions from Travis CI * Update dependencies, Drop Python 3.6 support 5.0.0 / 2022-01-20 ================== * (dev) Add tests on Python 3.7 to 3.9 ([#71](https://github.com/nolze/msoffcrypto-tool/pull/71)) * (dev) Track poetry.lock ([#71](https://github.com/nolze/msoffcrypto-tool/pull/71)) * (BREAKING) Drop Python 2 support ([#71](https://github.com/nolze/msoffcrypto-tool/pull/71)) * Raise exception if no encryption type is specified ([#70](https://github.com/nolze/msoffcrypto-tool/issues/70)) * Support SHA256, SHA384 hash algorithm (@jackydo, [#67](https://github.com/nolze/msoffcrypto-tool/pull/67)) * Fix errors for unencrypted documents * Use absolute imports ([#63](https://github.com/nolze/msoffcrypto-tool/pull/63)) 4.12.0 / 2021-06-04 =================== * Use custom exceptions ([#59](https://github.com/nolze/msoffcrypto-tool/pull/59)) * (dev) Remove nose (thank you) ([#57](https://github.com/nolze/msoffcrypto-tool/pull/57)) * (dev) Use poetry ([#55](https://github.com/nolze/msoffcrypto-tool/pull/55)) 4.11.0 / 2020-09-03 =================== * Improve hash calculation (suggested by @StanislavNikolov) * Add "verify\_passwd" and "verify\_integrity" option (@jeffli678) * Make _packUserEditAtom spec-compliant 4.10.2 / 2020-04-08 =================== * Update \_makekey in rc4\_cryptoapi (@doracpphp) * Fix handling of optional field value in ppt97 * Add tests for is_encrypted() (--test) * Make Doc97File.is_encrypted() return boolean ================================================ FILE: LICENSE.txt ================================================ MIT License Copyright (c) 2015 nolze Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: NOTICE.txt ================================================ This software contains derivative works from https://github.com/herumi/msoffice which is licensed under the BSD 3-Clause License. https://github.com/herumi/msoffice/blob/c3cdb1ea0a5285a2a1718fee2dc893fd884bdad0/COPYRIGHT Copyright (c) 2007-2015 Cybozu Labs, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of the Cybozu Labs, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # msoffcrypto-tool [![PyPI](https://img.shields.io/pypi/v/msoffcrypto-tool.svg)](https://pypi.org/project/msoffcrypto-tool/) [![PyPI downloads](https://img.shields.io/pypi/dm/msoffcrypto-tool.svg)](https://pypistats.org/packages/msoffcrypto-tool) [![build](https://github.com/nolze/msoffcrypto-tool/actions/workflows/ci.yaml/badge.svg)](https://github.com/nolze/msoffcrypto-tool/actions/workflows/ci.yaml) [![Coverage Status](https://codecov.io/gh/nolze/msoffcrypto-tool/branch/master/graph/badge.svg)](https://codecov.io/gh/nolze/msoffcrypto-tool) [![Documentation Status](https://readthedocs.org/projects/msoffcrypto-tool/badge/?version=latest)](http://msoffcrypto-tool.readthedocs.io/en/latest/?badge=latest) msoffcrypto-tool is a Python tool and library for decrypting and encrypting MS Office files using a password or other keys. ## Contents * [Installation](#installation) * [Examples](#examples) * [Supported encryption methods](#supported-encryption-methods) * [Tests](#tests) * [Todo](#todo) * [Resources](#resources) * [Use cases and mentions](#use-cases-and-mentions) * [Contributors](#contributors) * [Credits](#credits) ## Installation ``` pip install msoffcrypto-tool ``` ## Examples ### As CLI tool (with password) #### Decryption Specify the password with `-p` flag: ``` msoffcrypto-tool encrypted.docx decrypted.docx -p Passw0rd ``` Password is prompted if you omit the password argument value: ```bash $ msoffcrypto-tool encrypted.docx decrypted.docx -p Password: ``` To check if the file is encrypted or not, use `-t` flag: ``` msoffcrypto-tool document.doc --test -v ``` It returns `1` if the file is encrypted, `0` if not. #### Encryption (OOXML only, experimental) > [!IMPORTANT] > Encryption feature is experimental. Please use it at your own risk. To password-protect a document, use `-e` flag along with `-p` flag: ``` msoffcrypto-tool -e -p Passw0rd plain.docx encrypted.docx ``` ### As library Password and more key types are supported with library functions. #### Decryption Basic usage: ```python import msoffcrypto encrypted = open("encrypted.docx", "rb") file = msoffcrypto.OfficeFile(encrypted) file.load_key(password="Passw0rd") # Use password with open("decrypted.docx", "wb") as f: file.decrypt(f) encrypted.close() ``` In-memory: ```python import msoffcrypto import io import pandas as pd decrypted = io.BytesIO() with open("encrypted.xlsx", "rb") as f: file = msoffcrypto.OfficeFile(f) file.load_key(password="Passw0rd") # Use password file.decrypt(decrypted) df = pd.read_excel(decrypted) print(df) ``` Advanced usage: ```python # Verify password before decryption (default: False) # The ECMA-376 Agile/Standard crypto system allows one to know whether the supplied password is correct before actually decrypting the file # Currently, the verify_password option is only meaningful for ECMA-376 Agile/Standard Encryption file.load_key(password="Passw0rd", verify_password=True) # Use private key file.load_key(private_key=open("priv.pem", "rb")) # Use intermediate key (secretKey) file.load_key(secret_key=binascii.unhexlify("AE8C36E68B4BB9EA46E5544A5FDB6693875B2FDE1507CBC65C8BCF99E25C2562")) # Check the HMAC of the data payload before decryption (default: False) # Currently, the verify_integrity option is only meaningful for ECMA-376 Agile Encryption file.decrypt(open("decrypted.docx", "wb"), verify_integrity=True) ``` Supported key types are - Passwords - Intermediate keys (optional) - Private keys used for generating escrow keys (escrow certificates) (optional) See also ["Backdooring MS Office documents with secret master keys"](https://web.archive.org/web/20171008075059/http://secuinside.com/archive/2015/2015-1-9.pdf) for more information on the key types. #### Encryption (OOXML only, experimental) > [!IMPORTANT] > Encryption feature is experimental. Please use it at your own risk. Basic usage: ```python from msoffcrypto.format.ooxml import OOXMLFile plain = open("plain.docx", "rb") file = OOXMLFile(plain) with open("encrypted.docx", "wb") as f: file.encrypt("Passw0rd", f) plain.close() ``` In-memory: ```python from msoffcrypto.format.ooxml import OOXMLFile import io encrypted = io.BytesIO() with open("plain.xlsx", "rb") as f: file = OOXMLFile(f) file.encrypt("Passw0rd", encrypted) # Do stuff with encrypted buffer; it contains an OLE container with an encrypted stream ... ``` ## Supported encryption methods ### MS-OFFCRYPTO specs * [x] ECMA-376 (Agile Encryption/Standard Encryption) * [x] MS-DOCX (OOXML) (Word 2007-) * [x] MS-XLSX (OOXML) (Excel 2007-) * [x] MS-PPTX (OOXML) (PowerPoint 2007-) * [x] Office Binary Document RC4 CryptoAPI * [x] MS-DOC (Word 2002, 2003, 2004) * [x] MS-XLS ([Excel 2002, 2003, 2007, 2010](https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/a3ad4e36-ab66-426c-ba91-b84433312068#Appendix_A_22)) (experimental) * [x] MS-PPT (PowerPoint 2002, 2003, 2004) (partial, experimental) * [x] Office Binary Document RC4 * [x] MS-DOC (Word 97, 98, 2000) * [x] MS-XLS (Excel 97, 98, 2000) (experimental) * [ ] ECMA-376 (Extensible Encryption) * [x] XOR Obfuscation * [x] MS-XLS ([Excel 2002, 2003](https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/a3ad4e36-ab66-426c-ba91-b84433312068#Appendix_A_21)) (experimental) * [ ] MS-DOC (Word 2002, 2003, 2004?) ### Other * [ ] Word 95 Encryption (Word 95 and prior) * [ ] Excel 95 Encryption (Excel 95 and prior) * [ ] PowerPoint 95 Encryption (PowerPoint 95 and prior) PRs are welcome! ## Tests With [coverage](https://github.com/nedbat/coveragepy) and [pytest](https://pytest.org/): ``` poetry install poetry run coverage run -m pytest -v ``` ## Todo * [x] Add tests * [x] Support decryption with passwords * [x] Support older encryption schemes * [x] Add function-level tests * [x] Add API documents * [x] Publish to PyPI * [x] Add decryption tests for various file formats * [x] Integrate with more comprehensive projects handling MS Office files (such as [oletools](https://github.com/decalage2/oletools/)?) if possible * [x] Add the password prompt mode for CLI * [x] Improve error types (v4.12.0) * [ ] Add type hints * [ ] Introduce something like `ctypes.Structure` * [x] Support OOXML encryption * [ ] Support other encryption * [ ] Isolate parser * [ ] Redesign APIs (v6.0.0) ## Resources * "Backdooring MS Office documents with secret master keys" [http://secuinside.com/archive/2015/2015-1-9.pdf](https://web.archive.org/web/20171008075059/http://secuinside.com/archive/2015/2015-1-9.pdf) * Technical Documents * [MS-OFFCRYPTO] Agile Encryption * [MS-OFFDI] Microsoft Office File Format Documentation Introduction * LibreOffice/core * LibreOffice/mso-dumper * wvDecrypt * Microsoft Office password protection - Wikipedia * office2john.py ## Alternatives * herumi/msoffice * DocRecrypt * Apache POI - the Java API for Microsoft Documents ## Use cases and mentions ### General * (kudos to maintainers!) ### Corporate * Workato * Check Point ### Malware/maldoc analysis * * ### CTF * * ### In other languages * * * * ### In publications * [Excel、データ整理&分析、画像処理の自動化ワザを完全網羅! 超速Python仕事術大全](https://books.google.co.jp/books?id=TBdVEAAAQBAJ&q=msoffcrypto) (伊沢剛, 2022) * ["Analyse de documents malveillants en 2021"](https://twitter.com/decalage2/status/1435255507846053889), MISC Hors-série N° 24, "Reverse engineering : apprenez à analyser des binaires" (Lagadec Philippe, 2021) * [シゴトがはかどる Python自動処理の教科書](https://books.google.co.jp/books?id=XEYUEAAAQBAJ&q=msoffcrypto) (クジラ飛行机, 2020) ## Contributors * ## Credits * The sample file for XOR Obfuscation is from: ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/cli.rst ================================================ Command-line interface ====================== .. toctree:: .. autoprogram:: msoffcrypto.__main__:parser :prog: msoffcrypto-tool ================================================ FILE: docs/conf.py ================================================ # Configuration file for the Sphinx documentation builder. # # For the full list of built-in configuration values, see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "msoffcrypto-tool" copyright = "nolze" author = "nolze" version = "" release = version # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration import os import sys sys.path.insert(0, os.path.abspath("../")) extensions = [ "sphinx.ext.autodoc", "sphinxcontrib.autoprogram", "sphinx.ext.napoleon", "sphinx.ext.viewcode", "myst_parser", ] templates_path = ["_templates"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "furo" html_static_path = ["_static"] # html_title = " " html_title = "msoffcrypto-tool" html_theme_options = { "footer_icons": [ { "name": "GitHub", "url": "https://github.com/nolze/msoffcrypto-tool", "html": """ """, "class": "", }, ], } myst_enable_extensions = ["tasklist"] ================================================ FILE: docs/index.rst ================================================ .. msoffcrypto-tool documentation master file, created by sphinx-quickstart on Tue Oct 17 02:16:54 2023. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. msoffcrypto-tool ================ .. include:: ../README.md :parser: myst_parser.sphinx_ :start-after: msoffcrypto-tool .. toctree:: :hidden: :maxdepth: 2 :caption: Contents: cli msoffcrypto .. * :ref:`genindex` .. * :ref:`modindex` .. * :ref:`search` ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd ================================================ FILE: docs/modules.rst ================================================ msoffcrypto =========== .. toctree:: :maxdepth: 1 msoffcrypto ================================================ FILE: docs/msoffcrypto.exceptions.rst ================================================ msoffcrypto.exceptions package ============================== Module contents --------------- .. automodule:: msoffcrypto.exceptions :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/msoffcrypto.format.rst ================================================ msoffcrypto.format package ========================== Submodules ---------- msoffcrypto.format.base module ------------------------------ .. automodule:: msoffcrypto.format.base :members: :undoc-members: :show-inheritance: msoffcrypto.format.common module -------------------------------- .. automodule:: msoffcrypto.format.common :members: :undoc-members: :show-inheritance: msoffcrypto.format.doc97 module ------------------------------- .. automodule:: msoffcrypto.format.doc97 :members: :undoc-members: :show-inheritance: msoffcrypto.format.ooxml module ------------------------------- .. automodule:: msoffcrypto.format.ooxml :members: :undoc-members: :show-inheritance: msoffcrypto.format.ppt97 module ------------------------------- .. automodule:: msoffcrypto.format.ppt97 :members: :undoc-members: :show-inheritance: msoffcrypto.format.xls97 module ------------------------------- .. automodule:: msoffcrypto.format.xls97 :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: msoffcrypto.format :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/msoffcrypto.method.container.rst ================================================ msoffcrypto.method.container package ==================================== Submodules ---------- msoffcrypto.method.container.ecma376\_encrypted module ------------------------------------------------------ .. automodule:: msoffcrypto.method.container.ecma376_encrypted :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: msoffcrypto.method.container :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/msoffcrypto.method.rst ================================================ msoffcrypto.method package ========================== Subpackages ----------- .. toctree:: :maxdepth: 1 msoffcrypto.method.container Submodules ---------- msoffcrypto.method.ecma376\_agile module ---------------------------------------- .. automodule:: msoffcrypto.method.ecma376_agile :members: :undoc-members: :show-inheritance: msoffcrypto.method.ecma376\_extensible module --------------------------------------------- .. automodule:: msoffcrypto.method.ecma376_extensible :members: :undoc-members: :show-inheritance: msoffcrypto.method.ecma376\_standard module ------------------------------------------- .. automodule:: msoffcrypto.method.ecma376_standard :members: :undoc-members: :show-inheritance: msoffcrypto.method.rc4 module ----------------------------- .. automodule:: msoffcrypto.method.rc4 :members: :undoc-members: :show-inheritance: msoffcrypto.method.rc4\_cryptoapi module ---------------------------------------- .. automodule:: msoffcrypto.method.rc4_cryptoapi :members: :undoc-members: :show-inheritance: msoffcrypto.method.xor\_obfuscation module ------------------------------------------ .. automodule:: msoffcrypto.method.xor_obfuscation :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: msoffcrypto.method :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/msoffcrypto.rst ================================================ msoffcrypto package =================== Subpackages ----------- .. toctree:: :maxdepth: 1 msoffcrypto.exceptions msoffcrypto.format msoffcrypto.method Module contents --------------- .. automodule:: msoffcrypto :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/requirements.txt ================================================ accessible-pygments==0.0.5 ; python_version >= "3.10" and python_version < "4.0" alabaster==1.0.0 ; python_version >= "3.10" and python_version < "4.0" anyio==4.12.1 ; python_version >= "3.10" and python_version < "4.0" babel==2.17.0 ; python_version >= "3.10" and python_version < "4.0" beautifulsoup4==4.14.3 ; python_version >= "3.10" and python_version < "4.0" certifi==2026.1.4 ; python_version >= "3.10" and python_version < "4.0" charset-normalizer==3.4.4 ; python_version >= "3.10" and python_version < "4.0" click==8.3.1 ; python_version >= "3.10" and python_version < "4.0" colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" docutils==0.21.2 ; python_version >= "3.10" and python_version < "4.0" exceptiongroup==1.3.1 ; python_version == "3.10" furo==2025.12.19 ; python_version >= "3.10" and python_version < "4.0" h11==0.16.0 ; python_version >= "3.10" and python_version < "4.0" idna==3.11 ; python_version >= "3.10" and python_version < "4.0" imagesize==1.4.1 ; python_version >= "3.10" and python_version < "4.0" jinja2==3.1.6 ; python_version >= "3.10" and python_version < "4.0" markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "4.0" markupsafe==3.0.3 ; python_version >= "3.10" and python_version < "4.0" mdit-py-plugins==0.5.0 ; python_version >= "3.10" and python_version < "4.0" mdurl==0.1.2 ; python_version >= "3.10" and python_version < "4.0" myst-parser==4.0.1 ; python_version >= "3.10" and python_version < "4.0" packaging==25.0 ; python_version >= "3.10" and python_version < "4.0" pygments==2.19.2 ; python_version >= "3.10" and python_version < "4.0" pyyaml==6.0.3 ; python_version >= "3.10" and python_version < "4.0" requests==2.32.5 ; python_version >= "3.10" and python_version < "4.0" snowballstemmer==3.0.1 ; python_version >= "3.10" and python_version < "4.0" soupsieve==2.8.1 ; python_version >= "3.10" and python_version < "4.0" sphinx-autobuild==2024.10.2 ; python_version >= "3.10" and python_version < "4.0" sphinx-basic-ng==1.0.0b2 ; python_version >= "3.10" and python_version < "4.0" sphinx==8.1.3 ; python_version >= "3.10" and python_version < "4.0" sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version < "4.0" sphinxcontrib-autoprogram==0.1.9 ; python_version >= "3.10" and python_version < "4.0" sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version < "4.0" sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version < "4.0" sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version < "4.0" sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version < "4.0" sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version < "4.0" starlette==0.51.0 ; python_version >= "3.10" and python_version < "4.0" tomli==2.4.0 ; python_version == "3.10" typing-extensions==4.15.0 ; python_version >= "3.10" and python_version < "4.0" urllib3==2.6.3 ; python_version >= "3.10" and python_version < "4.0" uvicorn==0.40.0 ; python_version >= "3.10" and python_version < "4.0" watchfiles==1.1.1 ; python_version >= "3.10" and python_version < "4.0" websockets==16.0 ; python_version >= "3.10" and python_version < "4.0" ================================================ FILE: msoffcrypto/__init__.py ================================================ import zipfile import olefile from msoffcrypto import exceptions def OfficeFile(file): """Return an office file object based on the format of given file. Args: file (:obj:`_io.BufferedReader`): Input file. Returns: BaseOfficeFile object. Examples: >>> with open("tests/inputs/example_password.docx", "rb") as f: ... officefile = OfficeFile(f) ... officefile.keyTypes ('password', 'private_key', 'secret_key') >>> with open("tests/inputs/example_password.docx", "rb") as f: ... officefile = OfficeFile(f) ... officefile.load_key(password="Password1234_", verify_password=True) >>> with open("README.md", "rb") as f: ... officefile = OfficeFile(f) Traceback (most recent call last): ... msoffcrypto.exceptions.FileFormatError: ... >>> with open("tests/inputs/example_password.docx", "rb") as f: ... officefile = OfficeFile(f) ... officefile.load_key(password="0000", verify_password=True) Traceback (most recent call last): ... msoffcrypto.exceptions.InvalidKeyError: ... Given file handle will not be closed, the file position will most certainly change. """ file.seek(0) # required by isOleFile if olefile.isOleFile(file): ole = olefile.OleFileIO(file) elif zipfile.is_zipfile(file): # Heuristic from msoffcrypto.format.ooxml import OOXMLFile return OOXMLFile(file) else: raise exceptions.FileFormatError("Unsupported file format") # TODO: Make format specifiable by option in case of obstruction # Try this first; see https://github.com/nolze/msoffcrypto-tool/issues/17 if ole.exists("EncryptionInfo"): from msoffcrypto.format.ooxml import OOXMLFile return OOXMLFile(file) # MS-DOC: The WordDocument stream MUST be present in the file. # https://msdn.microsoft.com/en-us/library/dd926131(v=office.12).aspx elif ole.exists("wordDocument"): from msoffcrypto.format.doc97 import Doc97File return Doc97File(file) # MS-XLS: A file MUST contain exactly one Workbook Stream, ... # https://msdn.microsoft.com/en-us/library/dd911009(v=office.12).aspx elif ole.exists("Workbook"): from msoffcrypto.format.xls97 import Xls97File return Xls97File(file) # MS-PPT: A required stream whose name MUST be "PowerPoint Document". # https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-ppt/1fc22d56-28f9-4818-bd45-67c2bf721ccf elif ole.exists("PowerPoint Document"): from msoffcrypto.format.ppt97 import Ppt97File return Ppt97File(file) else: raise exceptions.FileFormatError("Unrecognized file format") ================================================ FILE: msoffcrypto/__main__.py ================================================ import argparse import getpass import logging import sys import olefile from msoffcrypto import OfficeFile, exceptions from msoffcrypto.format.ooxml import OOXMLFile, _is_ooxml logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) def _get_version(): if sys.version_info >= (3, 8): from importlib import metadata return metadata.version("msoffcrypto-tool") else: import pkg_resources return pkg_resources.get_distribution("msoffcrypto-tool").version def ifWIN32SetBinary(io): if sys.platform == "win32": import msvcrt import os msvcrt.setmode(io.fileno(), os.O_BINARY) def is_encrypted(file): r""" Test if the file is encrypted. >>> f = open("tests/inputs/plain.doc", "rb") >>> is_encrypted(f) False """ # TODO: Validate file if not olefile.isOleFile(file): return False file = OfficeFile(file) return file.is_encrypted() parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-p", "--password", nargs="?", const="", dest="password", help="password text") group.add_argument("-t", "--test", dest="test_encrypted", action="store_true", help="test if the file is encrypted") parser.add_argument("-e", dest="encrypt", action="store_true", help="encryption mode (default is false)") parser.add_argument("-v", dest="verbose", action="store_true", help="print verbose information") parser.add_argument("infile", nargs="?", type=argparse.FileType("rb"), help="input file") parser.add_argument("outfile", nargs="?", type=argparse.FileType("wb"), help="output file (if blank, stdout is used)") def main(): args = parser.parse_args() if args.verbose: logger.removeHandler(logging.NullHandler()) logging.basicConfig(level=logging.DEBUG, format="%(message)s") version = _get_version() logger.debug("Version: {}".format(version)) if args.test_encrypted: if not is_encrypted(args.infile): print("{}: not encrypted".format(args.infile.name), file=sys.stderr) sys.exit(1) else: logger.debug("{}: encrypted".format(args.infile.name)) return if args.password: password = args.password else: password = getpass.getpass() if args.outfile is None: ifWIN32SetBinary(sys.stdout) if hasattr(sys.stdout, "buffer"): # For Python 2 args.outfile = sys.stdout.buffer else: args.outfile = sys.stdout if args.encrypt: if not _is_ooxml(args.infile): raise exceptions.FileFormatError("Not an OOXML file") # OOXML is the only format we support for encryption file = OOXMLFile(args.infile) file.encrypt(password, args.outfile) else: if not olefile.isOleFile(args.infile): raise exceptions.FileFormatError("Not an OLE file") file = OfficeFile(args.infile) file.load_key(password=password) file.decrypt(args.outfile) if __name__ == "__main__": main() ================================================ FILE: msoffcrypto/exceptions/__init__.py ================================================ class FileFormatError(Exception): """Raised when the format of given file is unsupported or unrecognized.""" pass class ParseError(Exception): """Raised when the file cannot be parsed correctly.""" pass class DecryptionError(Exception): """Raised when the file cannot be decrypted.""" pass class EncryptionError(Exception): """Raised when the file cannot be encrypted.""" pass class InvalidKeyError(DecryptionError): """Raised when the given password or key is incorrect or cannot be verified.""" pass ================================================ FILE: msoffcrypto/format/__init__.py ================================================ ================================================ FILE: msoffcrypto/format/base.py ================================================ import abc # For 2 and 3 compatibility # https://stackoverflow.com/questions/35673474/ ABC = abc.ABCMeta("ABC", (object,), {"__slots__": ()}) class BaseOfficeFile(ABC): def __init__(self): pass @abc.abstractmethod def load_key(self): pass @abc.abstractmethod def decrypt(self, outfile): pass @abc.abstractmethod def is_encrypted(self) -> bool: pass ================================================ FILE: msoffcrypto/format/common.py ================================================ import io import logging from struct import unpack logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) # https://msdn.microsoft.com/en-us/library/dd926359(v=office.12).aspx def _parse_encryptionheader(blob): (flags,) = unpack(">> blob = io.BytesIO(b'\xec\xa5\xc1\x00G\x00\t\x04\x00\x00\x00\x13\xbf\x004\x00\ ... \x00\x00\x00\x10\x00\x00\x00\x00\x00\x04\x00\x00\x16\x04\x00\x00') >>> fibbase = _parseFibBase(blob) >>> hex(fibbase.wIdent) '0xa5ec' >>> hex(fibbase.nFib) '0xc1' >>> hex(fibbase.fExtChar) '0x1' """ getBit = lambda bits, i: (bits & (1 << i)) >> i getBitSlice = lambda bits, i, w: (bits & (2**w - 1 << i)) >> i # https://msdn.microsoft.com/en-us/library/dd944620(v=office.12).aspx (buf,) = unpack_from(">> with open("tests/inputs/rc4cryptoapi_password.doc", "rb") as f: ... officefile = Doc97File(f) ... officefile.load_key(password="Password1234_") >>> with open("tests/inputs/rc4cryptoapi_password.doc", "rb") as f: ... officefile = Doc97File(f) ... officefile.load_key(password="0000") Traceback (most recent call last): ... msoffcrypto.exceptions.InvalidKeyError: ... """ def __init__(self, file): self.file = file ole = olefile.OleFileIO(file) # do not close this, would close file self.ole = ole self.format = "doc97" self.keyTypes = ["password"] self.key = None self.salt = None # https://msdn.microsoft.com/en-us/library/dd944620(v=office.12).aspx with ole.openstream("wordDocument") as stream: fib = _parseFib(stream) # https://msdn.microsoft.com/en-us/library/dd923367(v=office.12).aspx tablename = "1Table" if fib.base.fWhichTblStm == 1 else "0Table" Info = namedtuple("Info", ["fib", "tablename"]) self.info = Info( fib=fib, tablename=tablename, ) def load_key(self, password=None): fib = self.info.fib logger.debug( "fEncrypted: {}, fObfuscation: {}".format( fib.base.fEncrypted, fib.base.fObfuscation ) ) if fib.base.fEncrypted == 1: if fib.base.fObfuscation == 1: # Using XOR obfuscation xor_obf_password_verifier = fib.base.IKey logger.debug(hex(xor_obf_password_verifier)) else: # elif fib.base.fObfuscation == 0: encryptionHeader_size = fib.base.IKey logger.debug( "encryptionHeader_size: {}".format(hex(encryptionHeader_size)) ) with self.ole.openstream(self.info.tablename) as table: encryptionHeader = ( table # TODO why create a 2nd reference to same stream? ) encryptionVersionInfo = table.read(4) vMajor, vMinor = unpack(">> f = open("tests/inputs/plain.doc", "rb") >>> file = Doc97File(f) >>> file.is_encrypted() False >>> f = open("tests/inputs/rc4cryptoapi_password.doc", "rb") >>> file = Doc97File(f) >>> file.is_encrypted() True """ return True if self.info.fib.base.fEncrypted == 1 else False ================================================ FILE: msoffcrypto/format/ooxml.py ================================================ import base64 import io import logging import zipfile from struct import unpack from xml.dom.minidom import parseString import olefile from msoffcrypto import exceptions from msoffcrypto.format import base from msoffcrypto.format.common import _parse_encryptionheader, _parse_encryptionverifier from msoffcrypto.method.ecma376_agile import ECMA376Agile from msoffcrypto.method.ecma376_standard import ECMA376Standard logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) def _is_ooxml(file): if not zipfile.is_zipfile(file): return False try: zfile = zipfile.ZipFile(file) with zfile.open("[Content_Types].xml") as stream: xml = parseString(stream.read()) # Heuristic if ( xml.documentElement.tagName == "Types" and xml.documentElement.namespaceURI == "http://schemas.openxmlformats.org/package/2006/content-types" ): return True else: return False except Exception: return False def _parseinfo_standard(ole): (headerFlags,) = unpack(">> with open("tests/inputs/example_password.docx", "rb") as f: ... officefile = OOXMLFile(f) ... officefile.load_key(password="Password1234_", verify_password=True) >>> with open("tests/inputs/example_password.docx", "rb") as f: ... officefile = OOXMLFile(f) ... officefile.load_key(password="0000", verify_password=True) Traceback (most recent call last): ... msoffcrypto.exceptions.InvalidKeyError: ... """ def __init__(self, file): self.format = "ooxml" file.seek(0) # TODO: Investigate the effect (required for olefile.isOleFile) # olefile cannot process non password protected ooxml files. # TODO: this code is duplicate of OfficeFile(). Merge? if olefile.isOleFile(file): ole = olefile.OleFileIO(file) self.file = ole try: with self.file.openstream("EncryptionInfo") as stream: self.type, self.info = _parseinfo(stream) except IOError: raise exceptions.FileFormatError( "Supposed to be an encrypted OOXML file, but no EncryptionInfo stream found" ) logger.debug("OOXMLFile.type: {}".format(self.type)) self.secret_key = None if self.type == "agile": # TODO: Support aliases? self.keyTypes = ("password", "private_key", "secret_key") elif self.type == "standard": self.keyTypes = ("password", "secret_key") elif self.type == "extensible": pass elif _is_ooxml(file): self.type = "plain" self.file = file else: raise exceptions.FileFormatError("Unsupported file format") def load_key( self, password=None, private_key=None, secret_key=None, verify_password=False ): """ >>> with open("tests/outputs/ecma376standard_password_plain.docx", "rb") as f: ... officefile = OOXMLFile(f) ... officefile.load_key("1234") """ if password: if self.type == "agile": self.secret_key = ECMA376Agile.makekey_from_password( password, self.info["passwordSalt"], self.info["passwordHashAlgorithm"], self.info["encryptedKeyValue"], self.info["spinValue"], self.info["passwordKeyBits"], ) if verify_password: verified = ECMA376Agile.verify_password( password, self.info["passwordSalt"], self.info["passwordHashAlgorithm"], self.info["encryptedVerifierHashInput"], self.info["encryptedVerifierHashValue"], self.info["spinValue"], self.info["passwordKeyBits"], ) if not verified: raise exceptions.InvalidKeyError("Key verification failed") elif self.type == "standard": self.secret_key = ECMA376Standard.makekey_from_password( password, self.info["header"]["algId"], self.info["header"]["algIdHash"], self.info["header"]["providerType"], self.info["header"]["keySize"], self.info["verifier"]["saltSize"], self.info["verifier"]["salt"], ) if verify_password: verified = ECMA376Standard.verifykey( self.secret_key, self.info["verifier"]["encryptedVerifier"], self.info["verifier"]["encryptedVerifierHash"], ) if not verified: raise exceptions.InvalidKeyError("Key verification failed") elif self.type == "extensible": pass elif self.type == "plain": pass elif private_key: if self.type == "agile": self.secret_key = ECMA376Agile.makekey_from_privkey( private_key, self.info["encryptedKeyValue"] ) else: raise exceptions.DecryptionError( "Unsupported key type for the encryption method" ) elif secret_key: self.secret_key = secret_key else: raise exceptions.DecryptionError("No key specified") def decrypt(self, outfile, verify_integrity=False): """ >>> from msoffcrypto import exceptions >>> from io import BytesIO; outfile = BytesIO() >>> with open("tests/outputs/ecma376standard_password_plain.docx", "rb") as f: ... officefile = OOXMLFile(f) ... officefile.load_key("1234") ... officefile.decrypt(outfile) Traceback (most recent call last): msoffcrypto.exceptions.DecryptionError: Document is not encrypted """ if self.type == "agile": with self.file.openstream("EncryptedPackage") as stream: if verify_integrity: verified = ECMA376Agile.verify_integrity( self.secret_key, self.info["keyDataSalt"], self.info["keyDataHashAlgorithm"], self.info["keyDataBlockSize"], self.info["encryptedHmacKey"], self.info["encryptedHmacValue"], stream, ) if not verified: raise exceptions.InvalidKeyError( "Payload integrity verification failed" ) obuf = ECMA376Agile.decrypt( self.secret_key, self.info["keyDataSalt"], self.info["keyDataHashAlgorithm"], stream, ) outfile.write(obuf) elif self.type == "standard": with self.file.openstream("EncryptedPackage") as stream: obuf = ECMA376Standard.decrypt(self.secret_key, stream) outfile.write(obuf) elif self.type == "plain": raise exceptions.DecryptionError("Document is not encrypted") else: raise exceptions.DecryptionError("Unsupported encryption method") # If the file is successfully decrypted, there must be a valid OOXML file, i.e. a valid zip file if not zipfile.is_zipfile(io.BytesIO(obuf)): raise exceptions.InvalidKeyError( "The file could not be decrypted with this password" ) def encrypt(self, password, outfile): """ >>> from msoffcrypto.format.ooxml import OOXMLFile >>> from io import BytesIO; outfile = BytesIO() >>> with open("tests/outputs/example.docx", "rb") as f: ... officefile = OOXMLFile(f) ... officefile.encrypt("1234", outfile) """ if self.is_encrypted(): raise exceptions.EncryptionError("File is already encrypted") self.file.seek(0) buf = ECMA376Agile.encrypt(password, self.file) if not olefile.isOleFile(buf): raise exceptions.EncryptionError("Unable to encrypt this file") outfile.write(buf) def is_encrypted(self): """ >>> with open("tests/inputs/example_password.docx", "rb") as f: ... officefile = OOXMLFile(f) ... officefile.is_encrypted() True >>> with open("tests/outputs/ecma376standard_password_plain.docx", "rb") as f: ... officefile = OOXMLFile(f) ... officefile.is_encrypted() False """ # Heuristic if self.type == "plain": return False elif isinstance(self.file, olefile.OleFileIO): return True else: return False ================================================ FILE: msoffcrypto/format/ppt97.py ================================================ import io import logging import shutil import tempfile from collections import namedtuple from struct import pack, unpack import olefile from msoffcrypto import exceptions from msoffcrypto.format import base from msoffcrypto.format.common import _parse_header_RC4CryptoAPI from msoffcrypto.method.rc4_cryptoapi import DocumentRC4CryptoAPI logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) RecordHeader = namedtuple( "RecordHeader", [ "recVer", "recInstance", "recType", "recLen", ], ) def _parseRecordHeader(blob): # RecordHeader: https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-ppt/df201194-0cd0-4dfb-bf10-eea353d8eabc getBitSlice = lambda bits, i, w: (bits & (2**w - 1 << i)) >> i blob.seek(0) (buf,) = unpack("> i (buf,) = unpack(" 0: persistdirectoryatom = persistdirectoryatom_stack.pop() for entry in persistdirectoryatom.rgPersistDirEntry: # logger.debug("persistId: %d" % entry.persistId) for i, offset in enumerate(entry.rgPersistOffset): persistobjectdirectory[entry.persistId + i] = offset return persistobjectdirectory class Ppt97File(base.BaseOfficeFile): """Return a MS-PPT file object. Examples: >>> with open("tests/inputs/rc4cryptoapi_password.ppt", "rb") as f: ... officefile = Ppt97File(f) ... officefile.load_key(password="Password1234_") >>> with open("tests/inputs/rc4cryptoapi_password.ppt", "rb") as f: ... officefile = Ppt97File(f) ... officefile.load_key(password="0000") Traceback (most recent call last): ... msoffcrypto.exceptions.InvalidKeyError: ... """ def __init__(self, file): self.file = file ole = olefile.OleFileIO(file) # do not close this, would close file self.ole = ole self.format = "ppt97" self.keyTypes = ["password"] self.key = None self.salt = None # streams closed in destructor: currentuser = ole.openstream("Current User") powerpointdocument = ole.openstream("PowerPoint Document") Data = namedtuple("Data", ["currentuser", "powerpointdocument"]) self.data = Data( currentuser=currentuser, powerpointdocument=powerpointdocument, ) def __del__(self): """Destructor, closes opened streams.""" if hasattr(self, "data") and self.data: if self.data.currentuser: self.data.currentuser.close() if self.data.powerpointdocument: self.data.powerpointdocument.close() def load_key(self, password=None): persistobjectdirectory = construct_persistobjectdirectory(self.data) logger.debug("[*] persistobjectdirectory: {}".format(persistobjectdirectory)) self.data.currentuser.seek(0) currentuser = _parseCurrentUser(self.data.currentuser) logger.debug("[*] currentuser: {}".format(currentuser)) self.data.powerpointdocument.seek( currentuser.currentuseratom.offsetToCurrentEdit ) usereditatom = _parseUserEditAtom(self.data.powerpointdocument) logger.debug("[*] usereditatom: {}".format(usereditatom)) # cf. Part 2 in https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-ppt/1fc22d56-28f9-4818-bd45-67c2bf721ccf cryptsession10container_offset = persistobjectdirectory[ usereditatom.encryptSessionPersistIdRef ] logger.debug( "[*] cryptsession10container_offset: {}".format( cryptsession10container_offset ) ) self.data.powerpointdocument.seek(cryptsession10container_offset) cryptsession10container = _parseCryptSession10Container( self.data.powerpointdocument ) logger.debug("[*] cryptsession10container: {}".format(cryptsession10container)) encryptionInfo = io.BytesIO(cryptsession10container.data) encryptionVersionInfo = encryptionInfo.read(4) vMajor, vMinor = unpack(" be an encrypted document. headerToken=0xE391C05F, offsetToCurrentEdit=cuatom.offsetToCurrentEdit, lenUserName=cuatom.lenUserName, docFileVersion=cuatom.docFileVersion, majorVersion=cuatom.majorVersion, minorVersion=cuatom.minorVersion, unused=cuatom.unused, ansiUserName=cuatom.ansiUserName, relVersion=cuatom.relVersion, unicodeUserName=cuatom.unicodeUserName, ) ) buf = _packCurrentUser(currentuser_new) buf.seek(0) currentuser_buf = buf # List of encrypted parts: https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-ppt/b0963334-4408-4621-879a-ef9c54551fd8 # PowerPoint Document Stream self.data.powerpointdocument.seek(0) powerpointdocument_size = len(self.data.powerpointdocument.read()) logger.debug("[*] powerpointdocument_size: {}".format(powerpointdocument_size)) self.data.powerpointdocument.seek(0) dec_bytearray = bytearray(self.data.powerpointdocument.read()) # UserEditAtom self.data.powerpointdocument.seek( currentuser.currentuseratom.offsetToCurrentEdit ) # currentuseratom_raw = self.data.powerpointdocument.read(40) self.data.powerpointdocument.seek( currentuser.currentuseratom.offsetToCurrentEdit ) usereditatom = _parseUserEditAtom(self.data.powerpointdocument) # logger.debug(usereditatom) # logger.debug(["offsetToCurrentEdit", currentuser.currentuseratom.offsetToCurrentEdit]) rh_new = RecordHeader( recVer=usereditatom.rh.recVer, recInstance=usereditatom.rh.recInstance, recType=usereditatom.rh.recType, recLen=usereditatom.rh.recLen - 4, # Omit encryptSessionPersistIdRef field ) # logger.debug([_packRecordHeader(usereditatom.rh).read(), _packRecordHeader(rh_new).read()]) usereditatom_new = UserEditAtom( rh=rh_new, lastSlideIdRef=usereditatom.lastSlideIdRef, version=usereditatom.version, minorVersion=usereditatom.minorVersion, majorVersion=usereditatom.majorVersion, offsetLastEdit=usereditatom.offsetLastEdit, offsetPersistDirectory=usereditatom.offsetPersistDirectory, docPersistIdRef=usereditatom.docPersistIdRef, persistIdSeed=usereditatom.persistIdSeed, lastView=usereditatom.lastView, unused=usereditatom.unused, encryptSessionPersistIdRef=0x00000000, # Clear ) # logger.debug(currentuseratom_raw) # logger.debug(_packUserEditAtom(usereditatom).read()) # logger.debug(_packUserEditAtom(usereditatom_new).read()) buf = _packUserEditAtom(usereditatom_new) buf.seek(0) buf_bytes = bytearray(buf.read()) offset = currentuser.currentuseratom.offsetToCurrentEdit dec_bytearray[offset : offset + len(buf_bytes)] = buf_bytes # PersistDirectoryAtom self.data.powerpointdocument.seek( currentuser.currentuseratom.offsetToCurrentEdit ) usereditatom = _parseUserEditAtom(self.data.powerpointdocument) # logger.debug(usereditatom) self.data.powerpointdocument.seek(usereditatom.offsetPersistDirectory) persistdirectoryatom = _parsePersistDirectoryAtom(self.data.powerpointdocument) # logger.debug(persistdirectoryatom) persistdirectoryatom_new = PersistDirectoryAtom( rh=persistdirectoryatom.rh, rgPersistDirEntry=[ PersistDirectoryEntry( persistId=persistdirectoryatom.rgPersistDirEntry[0].persistId, # Omit CryptSession10Container cPersist=persistdirectoryatom.rgPersistDirEntry[0].cPersist - 1, rgPersistOffset=persistdirectoryatom.rgPersistDirEntry[ 0 ].rgPersistOffset, ), ], ) self.data.powerpointdocument.seek(usereditatom.offsetPersistDirectory) buf = _packPersistDirectoryAtom(persistdirectoryatom_new) buf_bytes = bytearray(buf.read()) offset = usereditatom.offsetPersistDirectory dec_bytearray[offset : offset + len(buf_bytes)] = buf_bytes # Persist Objects self.data.powerpointdocument.seek(0) persistobjectdirectory = construct_persistobjectdirectory(self.data) directory_items = list(persistobjectdirectory.items()) for i, (persistId, offset) in enumerate(directory_items): self.data.powerpointdocument.seek(offset) buf = self.data.powerpointdocument.read(8) rh = _parseRecordHeader(io.BytesIO(buf)) logger.debug("[*] rh: {}".format(rh)) # CryptSession10Container if rh.recType == 0x2F14: logger.debug("[*] CryptSession10Container found") # Remove encryption, pad by zero to preserve stream size dec_bytearray[offset : offset + (8 + rh.recLen)] = b"\x00" * ( 8 + rh.recLen ) continue # The UserEditAtom record (section 2.3.3) and the PersistDirectoryAtom record (section 2.3.4) MUST NOT be encrypted. if rh.recType in [0x0FF5, 0x1772]: logger.debug("[*] UserEditAtom/PersistDirectoryAtom found") continue # TODO: Fix here recLen = directory_items[i + 1][1] - offset - 8 logger.debug("[*] recLen: {}".format(recLen)) self.data.powerpointdocument.seek(offset) enc_buf = io.BytesIO(self.data.powerpointdocument.read(8 + recLen)) blocksize = self.keySize * ( (8 + recLen) // self.keySize + 1 ) # Undocumented dec = DocumentRC4CryptoAPI.decrypt( self.key, self.salt, self.keySize, enc_buf, blocksize=blocksize, block=persistId, ) dec_bytes = bytearray(dec.read()) dec_bytearray[offset : offset + len(dec_bytes)] = dec_bytes # To BytesIO dec_buf = io.BytesIO(dec_bytearray) dec_buf.seek(0) for i, (persistId, offset) in enumerate(directory_items): dec_buf.seek(offset) buf = dec_buf.read(8) rh = _parseRecordHeader(io.BytesIO(buf)) logger.debug("[*] rh: {}".format(rh)) dec_buf.seek(0) logger.debug( "[*] powerpointdocument_size={}, len(dec_buf.read())={}".format( powerpointdocument_size, len(dec_buf.read()) ) ) dec_buf.seek(0) powerpointdocument_dec_buf = dec_buf # TODO: Pictures Stream # TODO: Encrypted Summary Info Stream with tempfile.TemporaryFile() as _outfile: self.file.seek(0) shutil.copyfileobj(self.file, _outfile) outole = olefile.OleFileIO(_outfile, write_mode=True) outole.write_stream("Current User", currentuser_buf.read()) outole.write_stream( "PowerPoint Document", powerpointdocument_dec_buf.read() ) # Finalize _outfile.seek(0) shutil.copyfileobj(_outfile, outfile) return def is_encrypted(self): r""" Test if the file is encrypted. >>> f = open("tests/inputs/plain.ppt", "rb") >>> file = Ppt97File(f) >>> file.is_encrypted() False >>> f = open("tests/inputs/rc4cryptoapi_password.ppt", "rb") >>> file = Ppt97File(f) >>> file.is_encrypted() True """ self.data.currentuser.seek(0) currentuser = _parseCurrentUser(self.data.currentuser) logger.debug("[*] currentuser: {}".format(currentuser)) self.data.powerpointdocument.seek( currentuser.currentuseratom.offsetToCurrentEdit ) usereditatom = _parseUserEditAtom(self.data.powerpointdocument) logger.debug("[*] usereditatom: {}".format(usereditatom)) if usereditatom.rh.recLen == 0x00000020: # Cf. _parseUserEditAtom return True else: return False ================================================ FILE: msoffcrypto/format/xls97.py ================================================ import io import logging import shutil import tempfile from collections import namedtuple from struct import pack, unpack import olefile from msoffcrypto import exceptions from msoffcrypto.format import base from msoffcrypto.format.common import _parse_header_RC4CryptoAPI from msoffcrypto.method.rc4 import DocumentRC4 from msoffcrypto.method.rc4_cryptoapi import DocumentRC4CryptoAPI from msoffcrypto.method.xor_obfuscation import DocumentXOR logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) recordNameNum = { "Formula": 6, "EOF": 10, "CalcCount": 12, "CalcMode": 13, "CalcPrecision": 14, "CalcRefMode": 15, "CalcDelta": 16, "CalcIter": 17, "Protect": 18, "Password": 19, "Header": 20, "Footer": 21, "ExternSheet": 23, "Lbl": 24, "WinProtect": 25, "VerticalPageBreaks": 26, "HorizontalPageBreaks": 27, "Note": 28, "Selection": 29, "Date1904": 34, "ExternName": 35, "LeftMargin": 38, "RightMargin": 39, "TopMargin": 40, "BottomMargin": 41, "PrintRowCol": 42, "PrintGrid": 43, "FilePass": 47, "Font": 49, "PrintSize": 51, "Continue": 60, "Window1": 61, "Backup": 64, "Pane": 65, "CodePage": 66, "Pls": 77, "DCon": 80, "DConRef": 81, "DConName": 82, "DefColWidth": 85, "XCT": 89, "CRN": 90, "FileSharing": 91, "WriteAccess": 92, "Obj": 93, "Uncalced": 94, "CalcSaveRecalc": 95, "Template": 96, "Intl": 97, "ObjProtect": 99, "ColInfo": 125, "Guts": 128, "WsBool": 129, "GridSet": 130, "HCenter": 131, "VCenter": 132, "BoundSheet8": 133, "WriteProtect": 134, "Country": 140, "HideObj": 141, "Sort": 144, "Palette": 146, "Sync": 151, "LPr": 152, "DxGCol": 153, "FnGroupName": 154, "FilterMode": 155, "BuiltInFnGroupCount": 156, "AutoFilterInfo": 157, "AutoFilter": 158, "Scl": 160, "Setup": 161, "ScenMan": 174, "SCENARIO": 175, "SxView": 176, "Sxvd": 177, "SXVI": 178, "SxIvd": 180, "SXLI": 181, "SXPI": 182, "DocRoute": 184, "RecipName": 185, "MulRk": 189, "MulBlank": 190, "Mms": 193, "SXDI": 197, "SXDB": 198, "SXFDB": 199, "SXDBB": 200, "SXNum": 201, "SxBool": 202, "SxErr": 203, "SXInt": 204, "SXString": 205, "SXDtr": 206, "SxNil": 207, "SXTbl": 208, "SXTBRGIITM": 209, "SxTbpg": 210, "ObProj": 211, "SXStreamID": 213, "DBCell": 215, "SXRng": 216, "SxIsxoper": 217, "BookBool": 218, "DbOrParamQry": 220, "ScenarioProtect": 221, "OleObjectSize": 222, "XF": 224, "InterfaceHdr": 225, "InterfaceEnd": 226, "SXVS": 227, "MergeCells": 229, "BkHim": 233, "MsoDrawingGroup": 235, "MsoDrawing": 236, "MsoDrawingSelection": 237, "PhoneticInfo": 239, "SxRule": 240, "SXEx": 241, "SxFilt": 242, "SxDXF": 244, "SxItm": 245, "SxName": 246, "SxSelect": 247, "SXPair": 248, "SxFmla": 249, "SxFormat": 251, "SST": 252, "LabelSst": 253, "ExtSST": 255, "SXVDEx": 256, "SXFormula": 259, "SXDBEx": 290, "RRDInsDel": 311, "RRDHead": 312, "RRDChgCell": 315, "RRTabId": 317, "RRDRenSheet": 318, "RRSort": 319, "RRDMove": 320, "RRFormat": 330, "RRAutoFmt": 331, "RRInsertSh": 333, "RRDMoveBegin": 334, "RRDMoveEnd": 335, "RRDInsDelBegin": 336, "RRDInsDelEnd": 337, "RRDConflict": 338, "RRDDefName": 339, "RRDRstEtxp": 340, "LRng": 351, "UsesELFs": 352, "DSF": 353, "CUsr": 401, "CbUsr": 402, "UsrInfo": 403, "UsrExcl": 404, "FileLock": 405, "RRDInfo": 406, "BCUsrs": 407, "UsrChk": 408, "UserBView": 425, "UserSViewBegin": 426, "UserSViewBegin_Chart": 426, "UserSViewEnd": 427, "RRDUserView": 428, "Qsi": 429, "SupBook": 430, "Prot4Rev": 431, "CondFmt": 432, "CF": 433, "DVal": 434, "DConBin": 437, "TxO": 438, "RefreshAll": 439, "HLink": 440, "Lel": 441, "CodeName": 442, "SXFDBType": 443, "Prot4RevPass": 444, "ObNoMacros": 445, "Dv": 446, "Excel9File": 448, "RecalcId": 449, "EntExU2": 450, "Dimensions": 512, "Blank": 513, "Number": 515, "Label": 516, "BoolErr": 517, "String": 519, "Row": 520, "Index": 523, "Array": 545, "DefaultRowHeight": 549, "Table": 566, "Window2": 574, "RK": 638, "Style": 659, "BigName": 1048, "Format": 1054, "ContinueBigName": 1084, "ShrFmla": 1212, "HLinkTooltip": 2048, "WebPub": 2049, "QsiSXTag": 2050, "DBQueryExt": 2051, "ExtString": 2052, "TxtQry": 2053, "Qsir": 2054, "Qsif": 2055, "RRDTQSIF": 2056, "BOF": 2057, "OleDbConn": 2058, "WOpt": 2059, "SXViewEx": 2060, "SXTH": 2061, "SXPIEx": 2062, "SXVDTEx": 2063, "SXViewEx9": 2064, "ContinueFrt": 2066, "RealTimeData": 2067, "ChartFrtInfo": 2128, "FrtWrapper": 2129, "StartBlock": 2130, "EndBlock": 2131, "StartObject": 2132, "EndObject": 2133, "CatLab": 2134, "YMult": 2135, "SXViewLink": 2136, "PivotChartBits": 2137, "FrtFontList": 2138, "SheetExt": 2146, "BookExt": 2147, "SXAddl": 2148, "CrErr": 2149, "HFPicture": 2150, "FeatHdr": 2151, "Feat": 2152, "DataLabExt": 2154, "DataLabExtContents": 2155, "CellWatch": 2156, "FeatHdr11": 2161, "Feature11": 2162, "DropDownObjIds": 2164, "ContinueFrt11": 2165, "DConn": 2166, "List12": 2167, "Feature12": 2168, "CondFmt12": 2169, "CF12": 2170, "CFEx": 2171, "XFCRC": 2172, "XFExt": 2173, "AutoFilter12": 2174, "ContinueFrt12": 2175, "MDTInfo": 2180, "MDXStr": 2181, "MDXTuple": 2182, "MDXSet": 2183, "MDXProp": 2184, "MDXKPI": 2185, "MDB": 2186, "PLV": 2187, "Compat12": 2188, "DXF": 2189, "TableStyles": 2190, "TableStyle": 2191, "TableStyleElement": 2192, "StyleExt": 2194, "NamePublish": 2195, "NameCmt": 2196, "SortData": 2197, "Theme": 2198, "GUIDTypeLib": 2199, "FnGrp12": 2200, "NameFnGrp12": 2201, "MTRSettings": 2202, "CompressPictures": 2203, "HeaderFooter": 2204, "CrtLayout12": 2205, "CrtMlFrt": 2206, "CrtMlFrtContinue": 2207, "ForceFullCalculation": 2211, "ShapePropsStream": 2212, "TextPropsStream": 2213, "RichTextStream": 2214, "CrtLayout12A": 2215, "Units": 4097, "Chart": 4098, "Series": 4099, "DataFormat": 4102, "LineFormat": 4103, "MarkerFormat": 4105, "AreaFormat": 4106, "PieFormat": 4107, "AttachedLabel": 4108, "SeriesText": 4109, "ChartFormat": 4116, "Legend": 4117, "SeriesList": 4118, "Bar": 4119, "Line": 4120, "Pie": 4121, "Area": 4122, "Scatter": 4123, "CrtLine": 4124, "Axis": 4125, "Tick": 4126, "ValueRange": 4127, "CatSerRange": 4128, "AxisLine": 4129, "CrtLink": 4130, "DefaultText": 4132, "Text": 4133, "FontX": 4134, "ObjectLink": 4135, "Frame": 4146, "Begin": 4147, "End": 4148, "PlotArea": 4149, "Chart3d": 4154, "PicF": 4156, "DropBar": 4157, "Radar": 4158, "Surf": 4159, "RadarArea": 4160, "AxisParent": 4161, "LegendException": 4163, "ShtProps": 4164, "SerToCrt": 4165, "AxesUsed": 4166, "SBaseRef": 4168, "SerParent": 4170, "SerAuxTrend": 4171, "IFmtRecord": 4174, "Pos": 4175, "AlRuns": 4176, "BRAI": 4177, "SerAuxErrBar": 4187, "ClrtClient": 4188, "SerFmt": 4189, "Chart3DBarShape": 4191, "Fbi": 4192, "BopPop": 4193, "AxcExt": 4194, "Dat": 4195, "PlotGrowth": 4196, "SIIndex": 4197, "GelFrame": 4198, "BopPopCustom": 4199, "Fbi2": 4200, } def _parse_header_RC4(encryptionInfo): # RC4: https://msdn.microsoft.com/en-us/library/dd908560(v=office.12).aspx salt = encryptionInfo.read(16) encryptedVerifier = encryptionInfo.read(16) encryptedVerifierHash = encryptionInfo.read(16) info = { "salt": salt, "encryptedVerifier": encryptedVerifier, "encryptedVerifierHash": encryptedVerifierHash, } return info class _BIFFStream: def __init__(self, data): self.data = data def has_record(self, target): pos = self.data.tell() while True: h = self.data.read(4) if not h: self.data.seek(pos) return False num, size = unpack(">> with open("tests/inputs/rc4cryptoapi_password.xls", "rb") as f: ... officefile = Xls97File(f) ... officefile.load_key(password="Password1234_") >>> with open("tests/inputs/xor_password_123456789012345.xls", "rb") as f: ... officefile = Xls97File(f) ... officefile.load_key(password="123456789012345") >>> with open("tests/inputs/rc4cryptoapi_password.xls", "rb") as f: ... officefile = Xls97File(f) ... officefile.load_key(password="0000") Traceback (most recent call last): ... msoffcrypto.exceptions.InvalidKeyError: ... """ def __init__(self, file): self.file = file ole = olefile.OleFileIO(file) # do not close this, would close file self.ole = ole self.format = "xls97" self.keyTypes = ["password"] self.key = None self.salt = None workbook = ole.openstream("Workbook") # closed in destructor Data = namedtuple("Data", ["workbook"]) self.data = Data( workbook=workbook, ) def __del__(self): """Destructor, closes opened stream.""" if hasattr(self, "data") and self.data and self.data.workbook: self.data.workbook.close() def load_key(self, password=None): self.data.workbook.seek(0) workbook = _BIFFStream(self.data.workbook) # workbook stream consists of records, each of which begins with its ID number. # Record IDs (in decimal) are listed here: https://msdn.microsoft.com/en-us/library/dd945945(v=office.12).aspx # workbook stream's structure is WORKBOOK = BOF WORKBOOKCONTENT and so forth # as in https://msdn.microsoft.com/en-us/library/dd952177(v=office.12).aspx # A record begins with its length (in bytes). (num,) = unpack(">> f = open("tests/inputs/plain.xls", "rb") >>> file = Xls97File(f) >>> file.is_encrypted() False >>> f = open("tests/inputs/rc4cryptoapi_password.xls", "rb") >>> file = Xls97File(f) >>> file.is_encrypted() True """ # Utilising the method above, check for encryption type. self.data.workbook.seek(0) workbook = _BIFFStream(self.data.workbook) (num,) = unpack(" 2 else 0)) obuf.write( pack( "> 32)) @property def Name(self): return self._Name @Name.setter def Name(self, n): if len(n) > 31: raise ValueError("Name cannot be longer than 31 characters") if set("!:/").intersection(n): raise ValueError("Name contains invalid characters (!:/)") self._Name = n @property def CLSID(self): return self._CLSID @CLSID.setter def CLSID(self, c): if c and len(c) != 16: raise ValueError("CLSID must be blank, or 16 characters long") self._CLSID = c @property def LeftSiblingId(self): return self._LeftSiblingId @LeftSiblingId.setter def LeftSiblingId(self, id): self._valid_id(id) self._LeftSiblingId = id @property def RightSiblingId(self): return self._RightSiblingId @RightSiblingId.setter def RightSiblingId(self, id): self._valid_id(id) self._RightSiblingId = id @property def ChildId(self): return self._ChildId @ChildId.setter def ChildId(self, id): self._valid_id(id) self._ChildId = id def _valid_id(self, id): if not ((id <= SectorTypes.MAXREGSECT) or (id == SectorTypes.NOSTREAM)): raise ValueError("Invalid id received") class ECMA376EncryptedLayout: def __init__(self, sectorSize): self.sectorSize = sectorSize self.miniFatNum = 0 self.miniFatDataSectorNum = 0 self.miniFatSectors = 0 self.numMiniFatSectors = 1 self.difatSectorNum = 0 self.fatSectorNum = 0 self.difatPos = 0 self.directoryEntrySectorNum = 0 self.encryptionPackageSectorNum = 0 @property def fatPos(self): return self.difatPos + self.difatSectorNum @property def miniFatPos(self): return self.fatPos + self.fatSectorNum @property def directoryEntryPos(self): return self.miniFatPos + self.numMiniFatSectors @property def miniFatDataPos(self): return self.directoryEntryPos + self.directoryEntrySectorNum @property def contentSectorNum(self): return ( self.numMiniFatSectors + self.directoryEntrySectorNum + self.miniFatDataSectorNum + self.encryptionPackageSectorNum ) @property def encryptionPackagePos(self): return self.miniFatDataPos + self.miniFatDataSectorNum @property def totalSectors(self): return self.difatSectorNum + self.fatSectorNum + self.contentSectorNum @property def totalSize(self): return Header.BUFFER_SIZE + self.totalSectors * self.sectorSize @property def offsetDirectoryEntries(self): return Header.BUFFER_SIZE + self.directoryEntryPos * self.sectorSize @property def offsetMiniFatData(self): return Header.BUFFER_SIZE + self.miniFatDataPos * self.sectorSize @property def offsetFat(self): return Header.BUFFER_SIZE + self.fatPos * self.sectorSize @property def offsetMiniFat(self): return Header.BUFFER_SIZE + self.miniFatPos * self.sectorSize def offsetDifat(self, n): return Header.BUFFER_SIZE + (self.difatPos + n) * self.sectorSize def offsetData(self, startingSectorLocation): return Header.BUFFER_SIZE + startingSectorLocation * self.sectorSize def offsetMiniData(self, startingSectorLocation): return self.offsetMiniFatData + startingSectorLocation * 64 class ECMA376Encrypted: def __init__(self, encryptedPackage=b"", encryptionInfo=b""): self._header = self._get_default_header() self._dirs = self._get_directory_entries() self.set_payload(encryptedPackage, encryptionInfo) def write_to(self, obuf): """ Writes the encrypted data to obuf """ # Create a temporary buffer with seek/tell capabilities, we do not want to assume the passed-in buffer has such # capabilities (ie: piping to stdout). _obuf = io.BytesIO() self._write_to(_obuf) # Finalize and write to client buffer. obuf.write(_obuf.getvalue()) def set_payload(self, encryptedPackage, encryptionInfo): self._dirs[DSPos.iEncryptionPackage].Content = encryptedPackage self._dirs[DSPos.iEncryptionInfo].Content = encryptionInfo def _get_default_header(self): return Header() def _get_directory_entries(self): ft = datetime2filetime(datetime.now()) directories = [ # Must follow DSPos ordering DirectoryEntry( "Root Entry", DirectoryEntryType.ROOT_STORAGE, RedBlack.RED, ct=ft, mt=ft, childId=DSPos.iEncryptionInfo, ), DirectoryEntry( "EncryptedPackage", DirectoryEntryType.STREAM, RedBlack.RED, ct=ft, mt=ft, ), DirectoryEntry( "\x06DataSpaces", DirectoryEntryType.STORAGE, RedBlack.RED, ct=ft, mt=ft, childId=DSPos.iDataSpaceMap, ), DirectoryEntry( "Version", DirectoryEntryType.STREAM, RedBlack.BLACK, ct=ft, mt=ft, content=DefaultContent.Version, ), DirectoryEntry( "DataSpaceMap", DirectoryEntryType.STREAM, RedBlack.BLACK, ct=ft, mt=ft, leftId=DSPos.iVersion, rightId=DSPos.iDataSpaceInfo, content=DefaultContent.DataSpaceMap, ), DirectoryEntry( "DataSpaceInfo", DirectoryEntryType.STORAGE, RedBlack.BLACK, ct=ft, mt=ft, rightId=DSPos.iTransformInfo, childId=DSPos.iStongEncryptionDataSpace, ), DirectoryEntry( "StrongEncryptionDataSpace", DirectoryEntryType.STREAM, RedBlack.BLACK, ct=ft, mt=ft, content=DefaultContent.StrongEncryptionDataSpace, ), DirectoryEntry( "TransformInfo", DirectoryEntryType.STORAGE, RedBlack.RED, ct=ft, mt=ft, childId=DSPos.iStrongEncryptionTransform, ), DirectoryEntry( "StrongEncryptionTransform", DirectoryEntryType.STORAGE, RedBlack.BLACK, ct=ft, mt=ft, childId=DSPos.iPrimary, ), DirectoryEntry( "\x06Primary", DirectoryEntryType.STREAM, RedBlack.BLACK, ct=ft, mt=ft, content=DefaultContent.Primary, ), DirectoryEntry( "EncryptionInfo", DirectoryEntryType.STREAM, RedBlack.BLACK, ct=ft, mt=ft, leftId=DSPos.iDataSpaces, rightId=DSPos.iEncryptionPackage, ), ] return directories def _write_to(self, obuf): layout = ECMA376EncryptedLayout(self._header.sectorSize) self._set_sector_locations_of_streams(layout) self._detect_sector_num(layout) self._header.firstDirectorySectorLocation = layout.directoryEntryPos self._header.firstMiniFatSectorLocation = layout.miniFatPos self._header.numMiniFatSectors = layout.numMiniFatSectors self._dirs[DSPos.iRoot].StartingSectorLocation = layout.miniFatDataPos self._dirs[DSPos.iRoot].Content = b"\0" * (64 * layout.miniFatNum) self._dirs[ DSPos.iEncryptionPackage ].StartingSectorLocation = layout.encryptionPackagePos for i in range(min(layout.fatSectorNum, Header.FIRSTNUMDIFAT)): self._header.difat.append(layout.fatPos + i) self._header.numFatSectors = layout.fatSectorNum self._header.numDifatSectors = layout.difatSectorNum if layout.difatSectorNum > 0: self._header.firstDifatSectorLocation = layout.difatPos # Zero out the output buffer; some sections pad, some sections don't ... but we need the buffer to have the proper size # so we can jump around obuf.write(b"\0" * layout.totalSize) obuf.seek(0) self._header.write_to(obuf) self._write_DIFAT(obuf, layout) self._write_FAT_start(obuf, layout) self._write_MiniFAT(obuf, layout) self._write_directory_entries(obuf, layout) self._write_Content(obuf, layout) def _write_directory_entries(self, obuf, layout: ECMA376EncryptedLayout): obuf.seek(layout.offsetDirectoryEntries) for d in self._dirs: d.write_header_to(obuf) # This must write 128 bytes, no more, no less. if obuf.tell() != (layout.offsetDirectoryEntries + len(self._dirs) * 128): # TODO: Use appropriate custom exception raise Exception( "Buffer did not advance as expected when writing out directory entries" ) def _write_Content(self, obuf, layout: ECMA376EncryptedLayout): for d in self._dirs: size = len(d.Content) if size: if size <= 4096: # Small content goes in the minifat section obuf.seek(layout.offsetMiniData(d.StartingSectorLocation)) obuf.write(d.Content) else: obuf.seek(layout.offsetData(d.StartingSectorLocation)) obuf.write(d.Content) def _write_FAT_start(self, obuf, layout: ECMA376EncryptedLayout): v = ([SectorTypes.DIFSECT] * layout.difatSectorNum) + ( [SectorTypes.FATSECT] * layout.fatSectorNum ) v += [ layout.numMiniFatSectors, layout.directoryEntrySectorNum, layout.miniFatDataSectorNum, layout.encryptionPackageSectorNum, ] obuf.seek(layout.offsetFat) self._write_FAT(obuf, v, layout.fatSectorNum * layout.sectorSize) def _write_MiniFAT(self, obuf, layout: ECMA376EncryptedLayout): obuf.seek(layout.offsetMiniFat) self._write_FAT( obuf, layout.miniFatSectors, layout.numMiniFatSectors * layout.sectorSize ) def _write_FAT(self, obuf, entries, blockSize): v = 0 startPos = obuf.tell() max_n = blockSize // 4 # 4 bytes per entry with max_n: raise Exception("Attempting to write beyond block size") obuf.write(pack(" layout.difatSectorNum + layout.fatSectorNum: for k in range(j, layout.sectorSize // 4 - 1): obuf.write(pack(" 1: raise ValueError("Unexpected layout size; too large") layout.miniFatNum = miniFatNum layout.miniFatDataSectorNum = miniFatDataSectorNum layout.miniFatSectors = miniFatSectors layout.directoryEntrySectorNum = self._get_block_num(len(self._dirs), 4) layout.encryptionPackageSectorNum = self._get_block_num( len(self._dirs[DSPos.iEncryptionPackage].Content), layout.sectorSize ) def _get_MiniFAT_sector_number(self, size): return self._get_block_num(size, 64) def _get_block_num(self, x, block): return (x + block - 1) // block ================================================ FILE: msoffcrypto/method/ecma376_agile.py ================================================ from __future__ import annotations import base64 import functools import hmac import io import logging import secrets from hashlib import sha1, sha256, sha384, sha512 from struct import pack, unpack from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric import padding from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from msoffcrypto import exceptions from msoffcrypto.method.container.ecma376_encrypted import ECMA376Encrypted logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) ALGORITHM_HASH = { "SHA1": sha1, "SHA256": sha256, "SHA384": sha384, "SHA512": sha512, } blkKey_VerifierHashInput = bytearray([0xFE, 0xA7, 0xD2, 0x76, 0x3B, 0x4B, 0x9E, 0x79]) blkKey_encryptedVerifierHashValue = bytearray( [0xD7, 0xAA, 0x0F, 0x6D, 0x30, 0x61, 0x34, 0x4E] ) blkKey_encryptedKeyValue = bytearray([0x14, 0x6E, 0x0B, 0xE7, 0xAB, 0xAC, 0xD0, 0xD6]) blkKey_dataIntegrity1 = bytearray([0x5F, 0xB2, 0xAD, 0x01, 0x0C, 0xB9, 0xE1, 0xF6]) blkKey_dataIntegrity2 = bytearray([0xA0, 0x67, 0x7F, 0x02, 0xB2, 0x2C, 0x84, 0x33]) def _random_buffer(sz): return secrets.token_bytes(sz) def _get_num_blocks(sz, block): return (sz + block - 1) // block def _round_up(sz, block): return _get_num_blocks(sz, block) * block def _resize_buffer(buf, n, c=b"\0"): if len(buf) >= n: return buf[:n] return buf + c * (n - len(buf)) def _normalize_key(key, n): return _resize_buffer(key, n, b"\x36") def _get_hash_func(algorithm): return ALGORITHM_HASH.get(algorithm, sha1) def _decrypt_aes_cbc(data, key, iv): aes = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend()) decryptor = aes.decryptor() decrypted = decryptor.update(data) + decryptor.finalize() return decrypted def _encrypt_aes_cbc(data, key, iv): aes = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend()) encryptor = aes.encryptor() encrypted = encryptor.update(data) + encryptor.finalize() return encrypted def _encrypt_aes_cbc_padded(data, key, iv, blockSize): buf = data if len(buf) % blockSize: buf = _resize_buffer(buf, _round_up(len(buf), blockSize)) return _encrypt_aes_cbc(buf, key, iv) def _get_salt(salt_value=None, salt_size=16): if salt_value is not None: if len(salt_value) != salt_size: raise exceptions.EncryptionError( f"Invalid salt value size, should be {salt_size}" ) return salt_value return _random_buffer(salt_size) # Hardcoded to AES256 + SHA512 for OOXML. class ECMA376AgileCipherParams: def __init__(self): self.cipherName = "AES" self.hashName = "SHA512" self.saltSize = 16 self.blockSize = 16 self.keyBits = 256 self.hashSize = 64 self.saltValue: bytes | None = None def _enc64(b): return base64.b64encode(b).decode("UTF-8") class ECMA376AgileEncryptionInfo: def __init__(self): self.spinCount = 100000 self.keyData = ECMA376AgileCipherParams() self.encryptedHmacKey: bytes | None = None self.encryptedHmacValue: bytes | None = None self.encryptedKey = ECMA376AgileCipherParams() self.encryptedVerifierHashInput: bytes | None = None self.encryptedVerifierHashValue: bytes | None = None self.encryptedKeyValue: bytes | None = None def getEncryptionDescriptorHeader(self): # https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/87020a34-e73f-4139-99bc-bbdf6cf6fa55 return pack(" """ def _generate_iv(params: ECMA376AgileCipherParams, blkKey, salt_value): if not blkKey: return _normalize_key(salt_value, params.blockSize) hashCalc = _get_hash_func(params.hashName) return _normalize_key(hashCalc(salt_value + blkKey).digest(), params.blockSize) class ECMA376Agile: def __init__(self): pass @staticmethod def _derive_iterated_hash_from_password( password, saltValue, hashAlgorithm, spinValue ): r""" Do a partial password-based hash derivation. Note the block key is not taken into consideration in this function. """ # TODO: This function is quite expensive and it should only be called once. # We need to save the result for later use. # This is not covered by the specification, but MS Word does so. hashCalc = _get_hash_func(hashAlgorithm) # NOTE: Initial round sha512(salt + password) h = hashCalc(saltValue + password.encode("UTF-16LE")) # NOTE: Iteration of 0 -> spincount-1; hash = sha512(iterator + hash) for i in range(0, spinValue, 1): h = hashCalc(pack(">> key = b'@ f\t\xd9\xfa\xad\xf2K\x07j\xeb\xf2\xc45\xb7B\x92\xc8\xb8\xa7\xaa\x81\xbcg\x9b\xe8\x97\x11\xb0*\xc2' >>> keyDataSalt = b'\x8f\xc7x"+P\x8d\xdcL\xe6\x8c\xdd\x15<\x16\xb4' >>> hashAlgorithm = 'SHA512' """ # NOTE: See https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/9e61da63-8ddb-4c0a-b25d-f85d990f44c8 SEGMENT_LENGTH = 4096 hashCalc = _get_hash_func(hashAlgorithm) obuf = io.BytesIO() # NOTE: See https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/b60c8b35-2db2-4409-8710-59d88a793f83 ibuf.seek(0) totalSize = unpack(">> password = 'Password1234_' >>> saltValue = b'\xcb\xca\x1c\x99\x93C\xfb\xad\x92\x07V4\x15\x004\xb0' >>> hashAlgorithm = 'SHA512' >>> encryptedVerifierHashInput = b'9\xee\xa5N&\xe5\x14y\x8c(K\xc7qM8\xac' >>> encryptedVerifierHashValue = b'\x147mm\x81s4\xe6\xb0\xffO\xd8"\x1a|g\x8e]\x8axN\x8f\x99\x9fL\x18\x890\xc3jK)\xc5\xb33`' + \ ... b'[\\\xd4\x03\xb0P\x03\xad\xcf\x18\xcc\xa8\xcb\xab\x8d\xeb\xe3s\xc6V\x04\xa0\xbe\xcf\xae\\\n\xd0' >>> spinValue = 100000 >>> keyBits = 256 >>> ECMA376Agile.verify_password(password, saltValue, hashAlgorithm, encryptedVerifierHashInput, encryptedVerifierHashValue, spinValue, keyBits) True """ # NOTE: See https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/a57cb947-554f-4e5e-b150-3f2978225e92 h = ECMA376Agile._derive_iterated_hash_from_password( password, saltValue, hashAlgorithm, spinValue ) key1 = ECMA376Agile._derive_encryption_key( h.digest(), blkKey_VerifierHashInput, hashAlgorithm, keyBits ) key2 = ECMA376Agile._derive_encryption_key( h.digest(), blkKey_encryptedVerifierHashValue, hashAlgorithm, keyBits ) hash_input = _decrypt_aes_cbc(encryptedVerifierHashInput, key1, saltValue) hashCalc = _get_hash_func(hashAlgorithm) acutal_hash = hashCalc(hash_input) acutal_hash = acutal_hash.digest() expected_hash = _decrypt_aes_cbc(encryptedVerifierHashValue, key2, saltValue) return acutal_hash == expected_hash @staticmethod def verify_integrity( secretKey, keyDataSalt, keyDataHashAlgorithm, keyDataBlockSize, encryptedHmacKey, encryptedHmacValue, stream, ): r""" Return True if the HMAC of the data payload is valid. """ # NOTE: See https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/63d9c262-82b9-4fa3-a06d-d087b93e3b00 hashCalc = _get_hash_func(keyDataHashAlgorithm) iv1 = hashCalc(keyDataSalt + blkKey_dataIntegrity1).digest() iv1 = iv1[:keyDataBlockSize] iv2 = hashCalc(keyDataSalt + blkKey_dataIntegrity2).digest() iv2 = iv2[:keyDataBlockSize] hmacKey = _decrypt_aes_cbc(encryptedHmacKey, secretKey, iv1) hmacValue = _decrypt_aes_cbc(encryptedHmacValue, secretKey, iv2) msg_hmac = hmac.new(hmacKey, stream.read(), hashCalc) actualHmac = msg_hmac.digest() stream.seek(0) return hmacValue == actualHmac @staticmethod def makekey_from_privkey(privkey, encryptedKeyValue): privkey = serialization.load_pem_private_key( privkey.read(), password=None, backend=default_backend() ) skey = privkey.decrypt(encryptedKeyValue, padding.PKCS1v15()) return skey @staticmethod def makekey_from_password( password, saltValue, hashAlgorithm, encryptedKeyValue, spinValue, keyBits ): r""" Generate intermediate key from given password. >>> password = 'Password1234_' >>> saltValue = b'Lr]E\xdca\x0f\x93\x94\x12\xa0M\xa7\x91\x04f' >>> hashAlgorithm = 'SHA512' >>> encryptedKeyValue = b"\xa1l\xd5\x16Zz\xb9\xd2q\x11>\xd3\x86\xa7\x8c\xf4\x96\x92\xe8\xe5'\xb0\xc5\xfc\x00U\xed\x08\x0b|\xb9K" >>> spinValue = 100000 >>> keyBits = 256 >>> expected = b'@ f\t\xd9\xfa\xad\xf2K\x07j\xeb\xf2\xc45\xb7B\x92\xc8\xb8\xa7\xaa\x81\xbcg\x9b\xe8\x97\x11\xb0*\xc2' >>> ECMA376Agile.makekey_from_password(password, saltValue, hashAlgorithm, encryptedKeyValue, spinValue, keyBits) == expected True """ h = ECMA376Agile._derive_iterated_hash_from_password( password, saltValue, hashAlgorithm, spinValue ) encryption_key = ECMA376Agile._derive_encryption_key( h.digest(), blkKey_encryptedKeyValue, hashAlgorithm, keyBits ) skey = _decrypt_aes_cbc(encryptedKeyValue, encryption_key, saltValue) return skey ================================================ FILE: msoffcrypto/method/ecma376_extensible.py ================================================ class ECMA376Extensible: def __init__(self): pass ================================================ FILE: msoffcrypto/method/ecma376_standard.py ================================================ import io import logging from hashlib import sha1 from struct import pack, unpack from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) class ECMA376Standard: def __init__(self): pass @staticmethod def decrypt(key, ibuf): r""" Return decrypted data. """ obuf = io.BytesIO() totalSize = unpack(">> key = b'@\xb1:q\xf9\x0b\x96n7T\x08\xf2\xd1\x81\xa1\xaa' >>> encryptedVerifier = b'Qos.\x96o\xac\x17\xb1\xc5\xd7\xd8\xcc6\xc9(' >>> encryptedVerifierHash = b'+ah\xda\xbe)\x11\xad+\xd3|\x17Ft\\\x14\xd3\xcf\x1b\xb1@\xa4\x8fNo=#\x88\x08r\xb1j' >>> ECMA376Standard.verifykey(key, encryptedVerifier, encryptedVerifierHash) True """ # TODO: For consistency with Agile, rename method to verify_password or the like logger.debug([key, encryptedVerifier, encryptedVerifierHash]) # https://msdn.microsoft.com/en-us/library/dd926426(v=office.12).aspx aes = Cipher(algorithms.AES(key), modes.ECB(), backend=default_backend()) decryptor = aes.decryptor() verifier = decryptor.update(encryptedVerifier) expected_hash = sha1(verifier).digest() decryptor = aes.decryptor() verifierHash = decryptor.update(encryptedVerifierHash)[: sha1().digest_size] return expected_hash == verifierHash @staticmethod def makekey_from_password( password, algId, algIdHash, providerType, keySize, saltSize, salt ): r""" Generate intermediate key from given password. >>> password = 'Password1234_' >>> algId = 0x660e >>> algIdHash = 0x8004 >>> providerType = 0x18 >>> keySize = 128 >>> saltSize = 16 >>> salt = b'\xe8\x82fI\x0c[\xd1\xee\xbd+C\x94\xe3\xf80\xef' >>> expected = b'@\xb1:q\xf9\x0b\x96n7T\x08\xf2\xd1\x81\xa1\xaa' >>> ECMA376Standard.makekey_from_password(password, algId, algIdHash, providerType, keySize, saltSize, salt) == expected True """ logger.debug( [ password, hex(algId), hex(algIdHash), hex(providerType), keySize, saltSize, salt, ] ) xor_bytes = lambda a, b: bytearray( [p ^ q for p, q in zip(bytearray(a), bytearray(b))] ) # bytearray() for Python 2 compat. # https://msdn.microsoft.com/en-us/library/dd925430(v=office.12).aspx ITER_COUNT = 50000 password = password.encode("UTF-16LE") h = sha1(salt + password).digest() for i in range(ITER_COUNT): ibytes = pack("=43.0 # TODO: .algorithm differs from the official documentation from cryptography.hazmat.decrepit.ciphers.algorithms import ARC4 except ImportError: from cryptography.hazmat.primitives.ciphers.algorithms import ARC4 logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) def _makekey(password, salt, block): r""" Return a intermediate key. >>> password = 'password1' >>> salt = b'\xe8w,\x1d\x91\xc5j7\x96Ga\xb2\x80\x182\x17' >>> block = 0 >>> expected = b' \xbf2\xdd\xf5@\x85\x8cQ7D\xaf\x0f$\xe0<' >>> _makekey(password, salt, block) == expected True """ # https://msdn.microsoft.com/en-us/library/dd920360(v=office.12).aspx password = password.encode("UTF-16LE") h0 = md5(password).digest() truncatedHash = h0[:5] intermediateBuffer = (truncatedHash + salt) * 16 h1 = md5(intermediateBuffer).digest() truncatedHash = h1[:5] blockbytes = pack(">> password = 'password1' >>> salt = b'\xe8w,\x1d\x91\xc5j7\x96Ga\xb2\x80\x182\x17' >>> encryptedVerifier = b'\xc9\xe9\x97\xd4T\x97=1\x0b\xb1\xbap\x14&\x83~' >>> encryptedVerifierHash = b'\xb1\xde\x17\x8f\x07\xe9\x89\xc4M\xae^L\xf9j\xc4\x07' >>> DocumentRC4.verifypw(password, salt, encryptedVerifier, encryptedVerifierHash) True """ # https://msdn.microsoft.com/en-us/library/dd952648(v=office.12).aspx block = 0 key = _makekey(password, salt, block) cipher = Cipher(ARC4(key), mode=None, backend=default_backend()) decryptor = cipher.decryptor() verifier = decryptor.update(encryptedVerifier) verfiferHash = decryptor.update(encryptedVerifierHash) hash = md5(verifier).digest() logging.debug([verfiferHash, hash]) return hash == verfiferHash @staticmethod def decrypt(password, salt, ibuf, blocksize=0x200): r""" Return decrypted data. """ obuf = io.BytesIO() block = 0 key = _makekey(password, salt, block) for c, buf in enumerate(iter(functools.partial(ibuf.read, blocksize), b"")): cipher = Cipher(ARC4(key), mode=None, backend=default_backend()) decryptor = cipher.decryptor() dec = decryptor.update(buf) + decryptor.finalize() obuf.write(dec) # From wvDecrypt: # at this stage we need to rekey the rc4 algorithm # Dieter Spaar figured out # this rekeying, big kudos to him block += 1 key = _makekey(password, salt, block) obuf.seek(0) return obuf ================================================ FILE: msoffcrypto/method/rc4_cryptoapi.py ================================================ import functools import io import logging from hashlib import sha1 from struct import pack from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import Cipher try: # NOTE: Avoid DeprecationWarning since cryptography>=43.0 # TODO: .algorithm differs from the official documentation from cryptography.hazmat.decrepit.ciphers.algorithms import ARC4 except ImportError: from cryptography.hazmat.primitives.ciphers.algorithms import ARC4 logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) def _makekey(password, salt, keyLength, block, algIdHash=0x00008004): r""" Return a intermediate key. """ # https://msdn.microsoft.com/en-us/library/dd920677(v=office.12).aspx password = password.encode("UTF-16LE") h0 = sha1(salt + password).digest() blockbytes = pack(" figured out # this rekeying, big kudos to him block += 1 key = _makekey(password, salt, keySize, block) obuf.seek(0) return obuf ================================================ FILE: msoffcrypto/method/xor_obfuscation.py ================================================ import io import logging from hashlib import md5 from struct import pack logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) def _makekey(password, salt, block): r""" Return a intermediate key. >>> password = 'password1' >>> salt = b'\xe8w,\x1d\x91\xc5j7\x96Ga\xb2\x80\x182\x17' >>> block = 0 >>> expected = b' \xbf2\xdd\xf5@\x85\x8cQ7D\xaf\x0f$\xe0<' >>> _makekey(password, salt, block) == expected True """ # https://msdn.microsoft.com/en-us/library/dd920360(v=office.12).aspx password = password.encode("UTF-16LE") h0 = md5(password).digest() truncatedHash = h0[:5] intermediateBuffer = (truncatedHash + salt) * 16 h1 = md5(intermediateBuffer).digest() truncatedHash = h1[:5] blockbytes = pack(">> from struct import unpack >>> password = 'VelvetSweatshop' >>> (key,) = unpack('>> DocumentXOR.verifypw(password, key) True """ # https://interoperability.blob.core.windows.net/files/MS-OFFCRYPTO/%5bMS-OFFCRYPTO%5d.pdf verifier = 0 password_array = [] password_array.append(len(password)) password_array.extend([ord(ch) for ch in password]) password_array.reverse() for password_byte in password_array: if verifier & 0x4000 == 0x0000: intermidiate_1 = 0 else: intermidiate_1 = 1 intermidiate_2 = verifier * 2 intermidiate_2 = ( intermidiate_2 & 0x7FFF ) # SET most significant bit of Intermediate2 TO 0 intermidiate_3 = intermidiate_1 ^ intermidiate_2 verifier = intermidiate_3 ^ password_byte return True if (verifier ^ 0xCE4B) == verificationBytes else False @staticmethod def xor_ror(byte1, byte2): return DocumentXOR.ror(byte1 ^ byte2, 1, 8) @staticmethod def create_xor_key_method1(password): xor_key = DocumentXOR.initial_code[len(password) - 1] current_element = 0x00000068 data = [ord(ch) for ch in reversed(password)] for ch in data: for i in range(7): if ch & 0x40 != 0: xor_key = ( xor_key ^ DocumentXOR.xor_matrix[current_element] ) % 65536 ch = (ch << 1) % 256 current_element -= 1 return xor_key @staticmethod def create_xor_array_method1(password): xor_key = DocumentXOR.create_xor_key_method1(password) index = len(password) obfuscation_array = [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ] if index % 2 == 1: temp = ( xor_key & 0xFF00 ) >> 8 # SET Temp TO most significant byte of XorKey obfuscation_array[index] = DocumentXOR.xor_ror( DocumentXOR.pad_array[0], temp ) index -= 1 temp = xor_key & 0x00FF password_last_char = ord(password[-1]) obfuscation_array[index] = DocumentXOR.xor_ror(password_last_char, temp) while index > 0: index -= 1 temp = (xor_key & 0xFF00) >> 8 obfuscation_array[index] = DocumentXOR.xor_ror(ord(password[index]), temp) index -= 1 temp = xor_key & 0x00FF obfuscation_array[index] = DocumentXOR.xor_ror(ord(password[index]), temp) index = 15 pad_index = 15 - len(password) while pad_index > 0: temp = (xor_key & 0xFF00) >> 8 obfuscation_array[index] = DocumentXOR.xor_ror( DocumentXOR.pad_array[pad_index], temp ) index -= 1 pad_index -= 1 temp = xor_key & 0x00FF obfuscation_array[index] = DocumentXOR.xor_ror( DocumentXOR.pad_array[pad_index], temp ) index -= 1 pad_index -= 1 return obfuscation_array @staticmethod def ror(n, rotations, width): return (2**width - 1) & (n >> rotations | n << (width - rotations)) @staticmethod def rol(n, rotations, width): return (2**width - 1) & (n << rotations | n >> (width - rotations)) @staticmethod def decrypt(password, ibuf, plaintext, records, base): r""" Return decrypted data (DecryptData_Method1) """ obuf = io.BytesIO() xor_array = DocumentXOR.create_xor_array_method1(password) data_index = 0 record_index = 0 while data_index < len(plaintext): count = 1 if plaintext[data_index] == -1 or plaintext[data_index] == -2: for j in range(data_index + 1, len(plaintext)): if plaintext[j] >= 0: break count += 1 if plaintext[data_index] == -2: xor_array_index = (data_index + count + 4) % 16 else: xor_array_index = (data_index + count) % 16 temp_res = 0 for item in range(count): data_byte = ibuf.read(1) temp_res = data_byte[0] ^ xor_array[xor_array_index] temp_res = DocumentXOR.ror(temp_res, 5, 8) obuf.write(temp_res.to_bytes(1, "little")) xor_array_index += 1 xor_array_index = xor_array_index % 16 record_index += 1 else: obuf.write(ibuf.read(1)) data_index += count obuf.seek(0) return obuf ================================================ FILE: pyproject.toml ================================================ [tool.poetry] name = "msoffcrypto-tool" version = "6.0.0" description = "Python tool and library for decrypting and encrypting MS Office files using a password or other keys" license = "MIT" homepage = "https://github.com/nolze/msoffcrypto-tool" authors = ["nolze "] readme = "README.md" packages = [{ include = "msoffcrypto" }, { include = "NOTICE.txt" }] [tool.poetry.dependencies] python = "^3.10" cryptography = ">=39.0" olefile = ">=0.46" [tool.poetry.group.dev.dependencies] # pytest = { version = ">=6.2.1", python = "^3.7" } pytest = "^9.0.2" coverage = { extras = ["toml"], version = "^7.5" } [tool.poetry.group.docs.dependencies] sphinx = "^8" sphinx-autobuild = "2024.10.02" furo = "2025.12.19" myst-parser = "^4.0.1" sphinxcontrib-autoprogram = "^0.1.8" [tool.poetry.scripts] msoffcrypto-tool = 'msoffcrypto.__main__:main' [tool.poetry.requires-plugins] poetry-plugin-export = ">=1.8" [tool.black] line-length = 140 exclude = '/(\.git|\.pytest_cache|\.venv|\.vscode|dist|docs)/' [tool.pytest.ini_options] addopts = "-ra -q --doctest-modules" testpaths = ["msoffcrypto", "tests"] [tool.coverage.run] omit = [".venv/*", "tests/*"] [build-system] requires = ["poetry_core>=1.0.0"] build-backend = "poetry.core.masonry.api" ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/test_cli.py ================================================ import subprocess import unittest class CLITest(unittest.TestCase): def test_cli(self): # Python 3: # cp = subprocess.run("./tests/test_cli.sh", shell=True) # self.assertEqual(cp.returncode, 0) # For Python 2 compat: returncode = subprocess.call("./tests/test_cli.sh", shell=True) self.assertEqual(returncode, 0) if __name__ == "__main__": unittest.main() ================================================ FILE: tests/test_cli.sh ================================================ #!/usr/bin/env bash set -ev cd "$(dirname "$0")" msoffcrypto-tool () { python ../msoffcrypto "$@" } # Decryption msoffcrypto-tool --test inputs/example_password.docx && : ; [ $? = 0 ] msoffcrypto-tool --test outputs/example.docx && : ; [ $? = 1 ] msoffcrypto-tool -p Password1234_ inputs/example_password.docx /tmp/example.docx diff /tmp/example.docx outputs/example.docx msoffcrypto-tool --test inputs/example_password.xlsx && : ; [ $? = 0 ] msoffcrypto-tool --test outputs/example.xlsx && : ; [ $? = 1 ] msoffcrypto-tool -p Password1234_ inputs/example_password.xlsx /tmp/example.xlsx diff /tmp/example.xlsx outputs/example.xlsx msoffcrypto-tool --test inputs/ecma376standard_password.docx && : ; [ $? = 0 ] msoffcrypto-tool --test outputs/ecma376standard_password_plain.docx && : ; [ $? = 1 ] msoffcrypto-tool -p Password1234_ inputs/ecma376standard_password.docx /tmp/ecma376standard_password_plain.docx diff /tmp/ecma376standard_password_plain.docx outputs/ecma376standard_password_plain.docx msoffcrypto-tool --test inputs/rc4cryptoapi_password.doc && : ; [ $? = 0 ] msoffcrypto-tool --test outputs/rc4cryptoapi_password_plain.doc && : ; [ $? = 1 ] msoffcrypto-tool -p Password1234_ inputs/rc4cryptoapi_password.doc /tmp/rc4cryptoapi_password_plain.doc diff /tmp/rc4cryptoapi_password_plain.doc outputs/rc4cryptoapi_password_plain.doc msoffcrypto-tool --test inputs/rc4cryptoapi_password.xls && : ; [ $? = 0 ] msoffcrypto-tool --test outputs/rc4cryptoapi_password_plain.xls && : ; [ $? = 1 ] msoffcrypto-tool -p Password1234_ inputs/rc4cryptoapi_password.xls /tmp/rc4cryptoapi_password_plain.xls diff /tmp/rc4cryptoapi_password_plain.xls outputs/rc4cryptoapi_password_plain.xls msoffcrypto-tool --test inputs/rc4cryptoapi_password.ppt && : ; [ $? = 0 ] msoffcrypto-tool --test outputs/rc4cryptoapi_password_plain.ppt && : ; [ $? = 1 ] msoffcrypto-tool -p Password1234_ inputs/rc4cryptoapi_password.ppt /tmp/rc4cryptoapi_password_plain.ppt diff /tmp/rc4cryptoapi_password_plain.ppt outputs/rc4cryptoapi_password_plain.ppt # Encryption msoffcrypto-tool -e -p Password1234_ outputs/example.docx /tmp/example_password.docx msoffcrypto-tool --test /tmp/example_password.docx && : ; [ $? = 0 ] msoffcrypto-tool -p Password1234_ /tmp/example_password.docx /tmp/example.docx diff /tmp/example.docx outputs/example.docx msoffcrypto-tool -e -p Password1234_ outputs/example.xlsx /tmp/example_password.xlsx msoffcrypto-tool --test /tmp/example_password.xlsx && : ; [ $? = 0 ] msoffcrypto-tool -p Password1234_ /tmp/example_password.xlsx /tmp/example.xlsx diff /tmp/example.xlsx outputs/example.xlsx ================================================ FILE: tests/test_compare_known_output.py ================================================ #!/usr/bin/env python """Compare output of msoffcrypto-tool for a few input files.""" import os import sys import unittest from difflib import SequenceMatcher from os.path import abspath, dirname, isfile from os.path import join as pjoin from tempfile import mkstemp try: import cryptography except ImportError: cryptography = None # add base dir to path so we always import local msoffcrypto TEST_BASE_DIR = dirname(abspath(__file__)) MODULE_BASE_DIR = dirname(TEST_BASE_DIR) if sys.path[0] != MODULE_BASE_DIR: sys.path.insert(0, MODULE_BASE_DIR) import msoffcrypto #: encryption password for files tested here PASSWORD = "Password1234_" #: input dir INPUT_DIR = "inputs" #: pairs of input/output files EXAMPLE_FILES = ( ("example_password.docx", "example.docx", PASSWORD), ("example_password.xlsx", "example.xlsx", PASSWORD), ("ecma376standard_password.docx", "ecma376standard_password_plain.docx", PASSWORD), ("rc4cryptoapi_password.doc", "rc4cryptoapi_password_plain.doc", PASSWORD), ("rc4cryptoapi_password.xls", "rc4cryptoapi_password_plain.xls", PASSWORD), ("rc4cryptoapi_password.ppt", "rc4cryptoapi_password_plain.ppt", PASSWORD), ("xor_password_123456789012345.xls", "xor_password_123456789012345_plain.xls", "123456789012345"), ) #: output dir: OUTPUT_DIR = "outputs" @unittest.skipIf( cryptography is None, "Cryptography module not installed for python{}.{}".format(sys.version_info.major, sys.version_info.minor) ) class KnownOutputCompare(unittest.TestCase): """See module doc.""" def test_known_output(self): """See module doc.""" for in_name, out_name, password in EXAMPLE_FILES: input_path = pjoin(TEST_BASE_DIR, INPUT_DIR, in_name) expect_path = pjoin(TEST_BASE_DIR, OUTPUT_DIR, out_name) # now run the relevant parts of __main__.main: with open(input_path, "rb") as input_handle: file = msoffcrypto.OfficeFile(input_handle) if file.format == "ooxml" and file.type in ["standard", "agile"]: file.load_key(password=password, verify_password=True) else: file.load_key(password=password) out_desc = None out_path = None output = [] try: # create temp file for output of decryption function out_desc, out_path = mkstemp(prefix="msoffcrypto-test-", suffix=".txt", text=True) with os.fdopen(out_desc, "wb") as out_handle: out_desc = None # out_handle now owns this # run decryption, capture output print("decrypting {}".format(in_name)) if file.format == "ooxml" and file.type in ["agile"]: file.decrypt(out_handle, verify_integrity=True) else: file.decrypt(out_handle) # read extracted output file into memory with open(expect_path, "rb") as reader: output = reader.read() finally: # ensure we do not leak temp files. Always close & remove if out_desc: os.close(out_desc) if out_path and isfile(out_path): os.unlink(out_path) # read output file into memory with open(expect_path, "rb") as reader: expect = reader.read() # compare: print("comparing output to {}".format(out_name)) similarity = SequenceMatcher(None, expect, output).ratio() self.assertGreater(similarity, 0.99) if __name__ == "__main__": unittest.main() ================================================ FILE: tests/test_file_handle.py ================================================ """Check that given file handles are not closed.""" import unittest from os.path import dirname, join from msoffcrypto import OfficeFile #: directory with input DATA_DIR = join(dirname(__file__), "inputs") class FileHandleTest(unittest.TestCase): """See module doc.""" def test_file_handle_open(self): """Check that file handles are open after is_encrypted().""" for suffix in "doc", "ppt", "xls": path = join(DATA_DIR, "plain." + suffix) with open(path, "rb") as file_handle: ofile = OfficeFile(file_handle) # do something with ofile self.assertEqual(ofile.is_encrypted(), False) # check that file handle is still open self.assertFalse(file_handle.closed) # destroy OfficeFile, calls destructor del ofile # check that file handle is still open self.assertFalse(file_handle.closed) # just for completeness: # check that file handle is now closed self.assertTrue(file_handle.closed) # if someone calls this as script, run unittests if __name__ == "__main__": unittest.main()