[
  {
    "path": ".gitignore",
    "content": "*.py[cod]\n\n# C extensions\n*.so\n\n# Packages\n*.egg\n*.egg-info\ndist\nbuild\neggs\nparts\nbin\nvar\nsdist\ndevelop-eggs\n.installed.cfg\nlib\nlib64\n\n# Installer logs\npip-log.txt\n\n# Unit test / coverage reports\n.coverage\n.tox\nnosetests.xml\n\n# Translations\n*.mo\n\n# Mr Developer\n.mr.developer.cfg\n.project\n.pydevproject\n<<<<<<< HEAD\nextract_valid_evtx_records_and_templates.py~\nfind_evtx_chunks.py~\n*~\n*.evtx\nworkspace/*.xml\nworkspace/*.txt\n.idea/*\n.idea\n*_templates.txt\n*_chunks.txt\n"
  },
  {
    "path": ".travis.yml",
    "content": "env:\n    global:\n        - secure: \"j89gGCxDhMdJ9vP/dUhu06XUqYMeqMjxIx8/s8KdVOhE0BxOddU7dIQE5SvcGYMoW+W4NV+7/Pio/eIkY3SUXGOLlPLMLwMDmvg9nA6HwrcSs6zPGreCYhqf7RlCNEyHHoWZ6syHjx1cEL2c1FyXLelQ8r5ONAzWsTeDn7ctnFcGzr7EDhKEjC9LGZchjRMYVrWkOruskSwnJYkPCstNqcwLh7qPgAXktTxx0YMPIr0sTbwLTnZRiNCE0egFSoT6QLBggrM3Nv0DbZO7luyFEgozgp99CACDdJMeMsKqgkedk0E+nz2BV26EpqjjyIRJVMiwXZVLexkB8vSw9PhCGY36REwMIhJz6KCzVQMZyoNkSbrMWBpa3LqdfZGyMxBtLlBw/Yvv+pzB+OnbCIjooy1nTfOlyLlk6QzESnzEW/A/DsFVEnNXQjoAOZEZjj6SlaHl+r/Uw6sXAP4FjP8umI++E9+MnuI0T1bevX8ZMdJ6Qz7gPWnIuOGJqkvFqC7MYt3SY4O4O0DS1pHXQGoQOgoRYExY2VRqeJSEnRTvbr8S5uVWOOz5PRb4psM6gnl6eueN8uNNmqj+BDaZp8qu/uLJ31zvAq0q1+rxxaqB5OkVebi65Q7cN6IjnpCTCK5risooGNZRGco2thnRiuysQ3kEQjaWH/f15cfY8YYnuTA=\"\n        - secure: \"xe37YTz7uegptreK36MVYl8+c9FzMqVsNr+/WMPBiZSmQ2T9mHoEP7QWL3AA1JSi7q2A3qWKNK15fUpFmu+u7+/15lFYUqihPEMZTBmmUYx6/I6bxP6d/sdu+mro13kflDzqTOdmkDU9X/Olympc5kI8qqheH6OTqwgjU0ypl/V53/3mXTgOre4LYvy+p0nkYdFCzipp+stuZyvn6tag57nvdnH8j8OLLR886ZV7KtB7RlOeaVK0NbyZ5XFBHJL1GXwV01lDfMsMokDHgkDfvjKVo63p1rYFScOUx7BBwpnSM1zr1hpHli562wQbs2eS4F8oYWOzXxhcRmvwAUxE/iqQvNyis+PD42xlhhOP1ubKJazqc9/AlKIHLNIvfsAVuFLt1eGI/g5/K+cpNUEs38+CXpqy2vTng4bFF4IabpxKlZxqpuCniKiDs5WExmhp2/fwXpNhdQmlh/WM2Mv4+vD8XyardIWZzmeR9EVFRAe+cGoejM+seGNKyfYxdOnj0fqmT2IORK9UKrmqisa9eBUOOg5kLirbnhfbax2J/FAcvAaMS8c5ZQHMPdswyaOdSpoJPPHULpI3uoPvYDFJcbuuZWlE0tuD/Qm5/4ABOeUfInOPAWtOBKYFl+YYIWUzCMEy6QxzJpSXqqXtNlR5y1l4M+PQzVY463+DXK3XBXE=\"\n        - ARTIFACTS_BUCKET=build-artifacts.floss.flare.fireeye.com\n\nlanguage: python\n\nmatrix:\n    include:\n        - os: linux\n          sudo: required\n          python: 2.7\n\n        - os: linux\n          sudo: required\n          python: 3.5\n\n        # travis doesn't have py2.7 available, so we have to do it ourselves\n        # ref: https://github.com/travis-ci/travis-ci/issues/2312\n        - os: osx\n          language: generic\n\nbefore_install:\n    # fix erroring OSX job because of rvm issue\n    - if [[ \"$TRAVIS_OS_NAME\" == \"osx\" ]]; then rvm get stable --auto-dotfiles; fi\n    # travis doesn't have py2.7 available, so we have to do it ourselves\n    # ref: https://github.com/travis-ci/travis-ci/issues/2312\n    - if [[ \"$TRAVIS_OS_NAME\" == \"osx\" ]]; then git clone https://github.com/MacPython/terryfy ../terryfy; fi\n    - if [[ \"$TRAVIS_OS_NAME\" == \"osx\" ]]; then source ../terryfy/travis_tools.sh;                         fi\n    - if [[ \"$TRAVIS_OS_NAME\" == \"osx\" ]]; then get_python_environment macpython 2.7.10;                   fi\n\ninstall:\n    - pip install pyinstaller pep8\n    - echo \"__version__ = '$(git describe --tags)'\" > evtxtract/version.py\n    - pip install -e .\n    - pyinstaller evtxtract.spec && rm -r './dist/evtxtract-dat/'\n\nscript:\n    - find . -name \\*.py -exec pep8 --ignore=E501 {} \\;\n    - pushd ./tests && wget \"https://dl.dropboxusercontent.com/u/55819714/joshua1.zip\" && unzip joshua1.zip && popd\n    - py.test tests/ -v\n\naddons:\n    artifacts:\n        debug: true\n        paths:\n            - $(find . -type f | grep -e '/bin/' -e 'dist/evtxtract' | awk 1 ORS=':')\n        target_paths: travis/$TRAVIS_OS_NAME/\n"
  },
  {
    "path": "LICENSE.TXT",
    "content": "\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "\nPurpose\n-------\nEVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unallocated space and memory images.\n\nQuick Run\n---------\n\nInstall EVTXtract via `pip`:\n\n    pip install evtxtract\n\nNow the tool is ready to go!\n\n    C:/Python27/Scripts/evtxtract.exe   Z:/evidence/1/image.dd   >   Z:/work/1/evtx.xml\n\n\nQuicker Run\n-----------\n\nDownload standalone executable nightly builds of EVTXtract here:\n\n  - [Linux](https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/linux/dist/evtxtract)\n  - [MacOS](https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/osx/dist/evtxtract)\n\nThen you can do:\n\n    ./evtxtract    /path/to/evidence    >   /path/to/output.xml\n\n\nBackground\n----------\n\nEVTX records are XML fragments encoded using a Microsoft-specific binary XML representation.\nDespite the convenient format, it is not easy to recover EVTX event log records from a corrupted file or unallocated space.\nThis is because the complete representation of a record often depends on other records found nearby.\nThe event log service recognizes similarities among records and refactors commonalities into \"templates\".\nA template is a fixed structure with placeholders that reserve space for variable content.\nThe on-disk event log record structure is a reference to a template, and a list of substitutions (the variable content the replaces a placeholder in a template).\nTo decode a record into XML, the event log service resolves the template and replaces its placeholders with the entries of the substitution array.\nTherefore, template corruption renders many records unrecoverable within the local 64KB \"chunk\".\nHowever, the substitution array for the remaining records may still be intact.\nIf so, it may be possible to produce XML fragments that match the original records if the damaged template can be reconstructed.\nFor many common events, such as process creation or account logon, empirical testing demonstrates the relevant templates remain mostly constant.\nIn these cases, recovering event log records boils down to identifying appropriate templates found in other EVTX chunks.\n\n\nAlgorithm\n---------\n\n1. Scan for chunk signatures (\"ElfChnk\")\n   - check header for sane values (0x80 <= size <= 0x200)\n   - verify checksums (header, data)\n2. Extract records from valid chunks found in (1)\n3. Extract templates from valid chunks found in (1)\n4. Scan for record signatures\n   - check header for sane values\n   - extract timestamp\n   - attempt to parse substitutions\n   - attempt to decode substitutions into EID, other fields\n5. Reconstruct records by reusing old templates with recovered substitutions\n\n\nUsage\n-----\n\nThe EVTXtract is a pure Python script.\nThis means it easily runs on Windows, Linux, and MacOS.\nSimply invoke the script, providing the path to a binary image, and EVTXtract writes its results to the standard out stream.\nThe binary file can be any data: a raw image, memory dump, etc.\n\nExample command line:\n\n    C:/Python27/Scripts/evtxtract.exe   Z:/evidence/1/image.dd   >   Z:/work/1/evtx.xml\n\nBelow are some example results from the above command.\nIt shows two records: a complete and incomplete record.\nThe first record is completely reconstructed,\n  and is formatted just like it would be in event viewer.\nHowever, EVTXtract was unable to complete reconstruct the second record,\n since some critical template data was missing.\nSo, its been formatted with as much data as was recovered.\nEVTXtract uses a schema that allows you to continue processing despite incomplete data.\n\n    <Event xmlns=\"http://schemas.microsoft.com/win/2004/08/events/event\">\n        <System>\n            <Provider Name=\"Microsoft-Windows-PrintService\" Guid=\"{747ef6fd-e535-4d16-b510-42c90f6873a1}\"></Provider>\n            <EventID Qualifiers=\"\">823</EventID>\n            <Version>0</Version>\n            <Level>4</Level>\n            <Task>49</Task>\n            <Opcode>11</Opcode>\n            <Keywords>0x80000000000200</Keywords>\n            <TimeCreated SystemTime=\"2013-03-23 02:05:57.848455\"></TimeCreated>\n            <EventRecordID>1</EventRecordID>\n            <Correlation ActivityID=\"\" RelatedActivityID=\"\"></Correlation>\n            <Execution ProcessID=\"1204\" ThreadID=\"1208\"></Execution>\n            <Channel>Microsoft-Windows-PrintService/Admin</Channel>\n            <Computer>JOSHUA</Computer>\n            <Security UserID=\"S-1-5-21-3454551831-629247693-1078506759-1000\"></Security>\n        </System>\n        <UserData>\n            <ChangingDefaultPrinter xmlns:auto-ns3=\"http://schemas.microsoft.com/win/2004/08/events\" xmlns=\"http://manifests.microsoft.com/win/2005/08/windows/printing/spooler/core/events\">\n                <DefaultPrinterSelectedBySpooler>1</DefaultPrinterSelectedBySpooler>\n                <OldDefaultPrinter></OldDefaultPrinter>\n                <NewDefaultPrinter>Microsoft XPS Document Writer,winspool,Ne00:</NewDefaultPrinter>\n                <Status>0x000000</Status>\n                <Module>spoolsv.exe</Module>\n            </ChangingDefaultPrinter>\n        </UserData>\n    </Event>\n\n    ...\n\n    <Record>\n    <Offset>0x317198</Offset>\n    <EventID>1531</EventID>\n    <Substitutions>\n      <Substitution index=\"0\">\n        <Type>4</Type>\n        <Value>4</Value>\n      </Substitution>\n      <Substitution index=\"1\">\n        <Type>4</Type>\n        <Value>0</Value>\n      </Substitution>\n      <Substitution index=\"2\">\n        <Type>6</Type>\n        <Value>0</Value>\n      </Substitution>\n      <Substitution index=\"3\">\n        <Type>6</Type>\n        <Value>1531</Value>\n      </Substitution>\n      <Substitution index=\"4\">\n        <Type>0</Type>\n        <Value></Value>\n      </Substitution>\n      <Substitution index=\"5\">\n        <Type>21</Type>\n        <Value>0x8000000000000000</Value>\n      </Substitution>\n      <Substitution index=\"6\">\n        <Type>17</Type>\n        <Value>2013-03-23 02:02:35.679552</Value>\n      </Substitution>\n      <Substitution index=\"7\">\n        <Type>0</Type>\n        <Value></Value>\n      </Substitution>\n      <Substitution index=\"8\">\n        <Type>8</Type>\n        <Value>928</Value>\n      </Substitution>\n      <Substitution index=\"9\">\n        <Type>8</Type>\n        <Value>1040</Value>\n      </Substitution>\n      <Substitution index=\"10\">\n        <Type>10</Type>\n        <Value>132</Value>\n      </Substitution>\n      <Substitution index=\"11\">\n        <Type>4</Type>\n        <Value>0</Value>\n      </Substitution>\n      <Substitution index=\"12\">\n        <Type>19</Type>\n        <Value>S-1-5-18</Value>\n      </Substitution>\n      <Substitution index=\"13\">\n        <Type>0</Type>\n        <Value></Value>\n      </Substitution>\n      <Substitution index=\"14\">\n        <Type>1</Type>\n        <Value>Microsoft-Windows-User Profiles Service</Value>\n      </Substitution>\n      <Substitution index=\"15\">\n        <Type>15</Type>\n        <Value>0001010f-010c-77e3-bf2f-3ef300001200</Value>\n      </Substitution>\n      <Substitution index=\"16\">\n        <Type>1</Type>\n        <Value>Application</Value>\n      </Substitution>\n    </Substitutions>\n    </Record>\n"
  },
  {
    "path": "evtxtract/__init__.py",
    "content": "import logging\nimport collections\n\nimport evtxtract.utils\nimport evtxtract.carvers\nimport evtxtract.templates\n\n\nlogger = logging.getLogger(__name__)\n\nVALUE = 1\n\n\nclass CompleteRecord(object):\n    __slots__ = ('offset', 'eid', 'xml')\n\n    def __init__(self, offset, eid, xml):\n        super(CompleteRecord, self).__init__()\n        self.offset = offset\n        self.eid = eid\n        self.xml = xml\n\n\nclass IncompleteRecord(object):\n    __slots__ = ('offset', 'eid', 'substitutions')\n\n    def __init__(self, offset, eid, substitutions):\n        super(IncompleteRecord, self).__init__()\n        self.offset = offset\n        self.eid = eid\n        self.substitutions = substitutions\n\n\ndef extract(buf):\n    '''\n    Do the EVTXtract algorithm and reconstruct EVTX records from the given data.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n\n    Returns:\n      iterable[union[CompleteRecord, IncompleteRecord]]: a generator of either\n        CompleteRecord or IncompleteRecord. You'll have to type-switch of these\n        classes to decide out how to handle them.\n    '''\n    # this does a full scan of the file (#1)\n    chunks = set(evtxtract.carvers.find_evtx_chunks(buf))\n\n    valid_record_offsets = set([])\n    for chunk in chunks:\n        for record in evtxtract.carvers.extract_chunk_records(buf, chunk):\n            valid_record_offsets.add(record.offset)\n            yield CompleteRecord(record.offset, record.eid, record.xml)\n\n    # map from eid to dictionary mapping from templateid to template\n    templates = collections.defaultdict(dict)\n    for chunk in chunks:\n        for template in evtxtract.carvers.extract_chunk_templates(buf, chunk):\n            templates[template.eid][template.get_id()] = template\n\n    # this does a full scan of the file (#2).\n    # needs to be distinct because we must have collected all the templates\n    # first.\n    for record_offset in evtxtract.carvers.find_evtx_records(buf):\n        if record_offset in valid_record_offsets:\n            continue\n\n        try:\n            record = evtxtract.carvers.extract_record(buf, record_offset)\n        except evtxtract.carvers.ParseError as e:\n            logger.info('parse error for record at offset: 0x%x: %s', record_offset, str(e))\n            continue\n        except ValueError as e:\n            logger.info('timestamp parse error for record at offset: 0x%x: %s', record_offset, str(e))\n            continue\n        except Exception as e:\n            logger.info('unknown parse error for record at offset: 0x%x: %s', record_offset, str(e))\n            continue\n\n        if len(record.substitutions) < 4:\n            logger.info('too few substitutions for record at offset: 0x%x', record_offset)\n            continue\n\n        # we just know that the EID is substitution index 3\n        eid = record.substitutions[3][VALUE]\n\n        matching_templates = set([])\n        for template in templates.get(eid, {}).values():\n            if template.match_substitutions(record.substitutions):\n                matching_templates.add(template)\n\n        if len(matching_templates) == 0:\n            logger.info('no matching templates for record at offset: 0x%x', record_offset)\n            yield IncompleteRecord(record_offset, eid, record.substitutions)\n            continue\n\n        if len(matching_templates) > 1:\n            logger.info('too many templates for record at offset: 0x%x', record_offset)\n            yield IncompleteRecord(record_offset, eid, record.substitutions)\n            continue\n\n        template = list(matching_templates)[0]\n\n        record_xml = template.insert_substitutions(record.substitutions)\n\n        yield CompleteRecord(record_offset, eid, record_xml)\n"
  },
  {
    "path": "evtxtract/carvers.py",
    "content": "import re\nimport struct\nimport logging\nimport binascii\nimport datetime\nimport xml.sax.saxutils\nfrom collections import namedtuple\n\nimport six\nimport Evtx.Evtx\nimport Evtx.Views\n\nimport evtxtract.templates\n\n\nlogger = logging.getLogger(__name__)\n\n\n# TODO: this should be part of python-evtx\nEVTX_HEADER_MAGIC = b\"ElfChnk\"\nEVTX_RECORD_MAGIC = b\"\\x2a\\x2a\\x00\\x00\"\nCHUNK_SIZE = 0x10000\nMIN_CHUNK_HEADER_SIZE = 0x80\nMAX_CHUNK_HEADER_SIZE = 0x200\n\n\nclass ParseError(RuntimeError): pass\n\n\ndef is_chunk_header(buf, offset):\n    \"\"\"\n    Return True if the offset appears to be an EVTX Chunk header.\n    Implementation note: Simply checks the magic header and size field for reasonable values.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n      offset (int): the address of the potential EVTX chunk header.\n\n    Returns:\n      bool: if the offset appears to be an EVTX chunk header.\n    \"\"\"\n    if len(buf) < offset + 0x2C:\n        # our accesses below will overflow\n        return False\n\n    magic = struct.unpack_from(\"<7s\", buf, offset)[0]\n    if magic != EVTX_HEADER_MAGIC:\n        return False\n\n    size = struct.unpack_from(\"<I\", buf, offset + 0x28)[0]\n    if not (MIN_CHUNK_HEADER_SIZE <= size <= MAX_CHUNK_HEADER_SIZE):\n        return False\n\n    if len(buf) <= offset + size:\n        # the chunk overruns the buffer end\n        return False\n\n    try:\n        chunk = Evtx.Evtx.ChunkHeader(buf, offset)\n    except:\n        logger.debug('failed to parse chunk header', exc_info=True)\n        return False\n\n    if len(buf) < offset + CHUNK_SIZE:\n        return False\n\n    if chunk.calculate_header_checksum() != chunk.header_checksum():\n        return False\n\n    if chunk.calculate_data_checksum() != chunk.data_checksum():\n        return False\n\n    return True\n\n\ndef find_evtx_chunks(buf):\n    \"\"\"\n    Scans the given data for valid EVTX chunk structures.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n\n    Returns:\n      iterable[int]: generator of offsets of chunks\n    \"\"\"\n    offset = 0\n    while True:\n        offset = buf.find(EVTX_HEADER_MAGIC, offset)\n        if offset == -1:\n            break\n\n        if is_chunk_header(buf, offset):\n            yield offset\n\n        offset += 1\n\n\ndef is_record(buf, offset):\n    \"\"\"\n    Return True if the offset appears to be an EVTX record.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n      offset (int): the address of the potential record.\n\n    Returns:\n      bool: if its a record.\n    \"\"\"\n\n    if len(buf) < offset + 8:\n        return False\n\n    magic, size = struct.unpack_from(\"<II\", buf, offset)\n    if magic != 0x00002a2a:\n        return False\n\n    if not (0x30 <= size <= 0x10000):\n        return False\n\n    if len(buf) < offset + size:\n        return False\n\n    size2 = struct.unpack_from(\"<I\", buf, offset + size - 4)[0]\n    if size != size2:\n        return False\n\n    return True\n\n\ndef find_evtx_records(buf):\n    \"\"\"\n    Generates offsets of apparent EVTX records from the given buffer.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n\n    Returns:\n      iterable[int]: the offsets of EVTX records.\n    \"\"\"\n    offset = 0\n    while True:\n        offset = buf.find(EVTX_RECORD_MAGIC, offset)\n        if offset == -1:\n            break\n\n        if is_record(buf, offset):\n            yield offset\n\n        offset += 1\n\n\nRecoveredRecord = namedtuple('RecoveredRecord', ['offset', 'eid', 'xml'])\n\n\ndef extract_chunk_records(buf, offset):\n    \"\"\"\n    Generates EVTX records from the EVTX chunk at the given offset.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n      offset (int): offset to EVTX chunk\n\n    Returns:\n      iterable[int]: the offsets of EVTX records.\n    \"\"\"\n    try:\n        chunk = Evtx.Evtx.ChunkHeader(buf, offset)\n    except:\n        raise ParseError('failed to parse chunk header')\n\n    cache = {}\n    for record in chunk.records():\n        try:\n            record_xml = Evtx.Views.evtx_record_xml_view(record, cache=cache)\n            eid = evtxtract.utils.get_eid(record_xml)\n            yield RecoveredRecord(record.offset(), eid, record_xml)\n\n        except UnicodeEncodeError:\n            logger.info(\"Unicode encoding issue processing record at 0x%X\", record.offset())\n            continue\n\n        except UnicodeDecodeError:\n            logger.info(\"Unicode decoding issue processing record at 0x%X\", record.offset())\n            continue\n\n        except Evtx.Evtx.InvalidRecordException:\n            logger.info(\"EVTX parsing issue processing record at 0x%X\", record.offset())\n            continue\n\n        except Exception as e:\n            logger.info(\"Unknown exception processing record at 0x%X\", record.offset(), exc_info=True)\n            continue\n\n\ndef extract_chunk_templates(buf, offset):\n    \"\"\"\n    Generates EVTX record templates from the EVTX chunk at the given offset.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n      offset (int): offset to EVTX chunk.\n\n    Returns:\n      iterable[evtxtract.templates.Template]: a generator of the things you asked for.\n    \"\"\"\n\n    try:\n        chunk = Evtx.Evtx.ChunkHeader(buf, offset)\n    except:\n        raise ParseError('failed to parse chunk header')\n\n    cache = {}\n    for record in chunk.records():\n        try:\n            yield evtxtract.templates.get_template(record)\n        except UnicodeEncodeError:\n            logger.info(\"Unicode encoding issue processing record at 0x%X\", record.offset())\n            continue\n\n        except UnicodeDecodeError:\n            logger.info(\"Unicode decoding issue processing record at 0x%X\", record.offset())\n            continue\n\n        except Evtx.Evtx.InvalidRecordException:\n            logger.info(\"EVTX parsing issue processing record at 0x%X\", record.offset())\n            continue\n\n        except Exception as e:\n            logger.info(\"Unknown exception processing record at 0x%X\", record.offset(), exc_info=True)\n            continue\n\n\n# map from byte value to boolean\n# the key values correspond to evtx node types\nVALID_SUBSTITUTION_TYPES = [False for _ in range(256)]\nfor i in range(22):\n    VALID_SUBSTITUTION_TYPES[i] = True\nVALID_SUBSTITUTION_TYPES[33] = True\nVALID_SUBSTITUTION_TYPES[129] = True\n\n\nclass MaxOffsetReached(Exception): pass\n\n\ndef does_root_have_resident_template(buf, offset, max_offset):\n    \"\"\"\n    Guess whether an RootNode has a resident template\n      from the given buffer and offset, not parsing\n      beyond the given max_offset.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n      offset (int): address of an EVTX record.\n      max_offset (int): don't parse beyond this address.\n\n    Returns:\n      boolean: if the RootNode has a resident template.\n\n    Raises:\n      MaxOffsetReached: if the given max offset was reached while parsing.\n    \"\"\"\n    logger = logging.getLogger(\"extract_lost_records\")\n    ofs = offset\n    token = struct.unpack_from(\"<b\", buf, ofs)[0]\n    if token == 0x0F:  # stream start\n        ofs += 4\n\n    ofs += 6  # template offset\n\n    # now, since we don't know where the chunk header is\n    #  for this record, we can't use the template offset\n    #  to decide if its resident or not\n    # instead, we assume that if the template is resident,\n    #  then it begins immediately. if this is true, and the\n    #  template is resident, then the next fields are:\n    #    DWORD next_offset  (range 0-0x10000?, length 0x4)\n    #    GUID  template_id (length 0x16, essentially random bytes)\n    #    DWORD template_length (range 0-0x10000?, length 0x4)\n    # if the template is non-resident, then the fields are:\n    #    DWORD num_subs (range 0-100?)\n    #    WORD size                            \\\n    #    BYTE type (value one of 0-21,33,129)  | repeat num_subs times\n    #    BYTE zero (value 0)                  /\n    # the key takeaway is that we can test\n    #   *(ofs + 6 + 4i) (with 0 < i < min(num_subs, 4))\n    #  is in the set {0-21, 33, 129}, and that\n    #   *(ofs + 7 + 4i) (0 < i < min(num_subs, 4))\n    #  is 0.  If these conditions hold, then the template is probably\n    #  non-resident.\n    #\n    # TODO(wb): what if num_subs == 1 or 2?\n\n    ofs += 4  # next_offset or num_subs\n    maybe_num_subs = struct.unpack_from(\"<I\", buf, ofs)[0]\n    if maybe_num_subs > 100:\n        return True\n\n    ofs += 4  # template_id or size\n\n    if max_offset < ofs + 4 + (4 * min(maybe_num_subs or 2, 4)):\n        return False\n\n    for i in range(min(maybe_num_subs or 2, 4)):\n        byte = struct.unpack_from(\"<B\", buf, ofs + 3 + (i * 4))[0]\n        if byte != 0:\n            return True\n\n    for i in range(min(maybe_num_subs or 2, 4)):\n        byte = struct.unpack_from(\"<B\", buf, ofs + 2 + (i * 4))[0]\n        if not VALID_SUBSTITUTION_TYPES[byte]:\n            return True\n\n    return False\n\n\ndef extract_root_substitutions(buf, offset, max_offset):\n    \"\"\"\n    Parse a RootNode into a list of its substitutions, not parsing beyond\n      the max offset.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n      offset (int): address of an EVTX record.\n      max_offset (int): don't parse beyond this address.\n\n    Returns:\n      list[tuple[int, variant]]: list of substitution tuples (type, value).\n\n    Raises:\n      ParseError: for various reasons, including invalid timestamps and overruns.\n    \"\"\"\n    ofs = offset\n    token = struct.unpack_from(\"<b\", buf, ofs)[0]\n    if token == 0x0F:  # stream start\n        ofs += 4\n\n    ofs += 6  # template offset\n\n    if does_root_have_resident_template(buf, offset, max_offset):\n        # have to hope that the template begins immediately\n        # template_offset = struct.unpack_from(\"<I\", buf, ofs)[0]\n        logger.debug(\"0x%x: resident template\", offset)\n        ofs += 4  # next offset\n        ofs += 4  # guid\n        ofs += 0x10  # template_length\n        template_length = struct.unpack_from(\"<I\", buf, ofs)[0]\n        ofs += 4\n        ofs += template_length  # num_subs\n    else:\n        logger.debug(\"0x%x: non-resident template\", offset)\n        ofs += 4  # num_subs\n\n    num_subs = struct.unpack_from(\"<I\", buf, ofs)[0]\n    if num_subs > 100:\n        raise ParseError(\"Unexpected number of substitutions: %d at %s\" %\n                         (num_subs, hex(ofs)))\n\n    ofs += 4  # begin sub list\n\n    substitutions = []\n    for _ in range(num_subs):\n        size, type_ = struct.unpack_from(\"<HB\", buf, ofs)\n        if not VALID_SUBSTITUTION_TYPES[type_]:\n            raise ParseError('Unexpected substitution type: ' + hex(type_))\n\n        substitutions.append((type_, size))\n        ofs += 4\n\n    ret = []\n    for i, pair in enumerate(substitutions):\n        type_, size = pair\n        if ofs > max_offset:\n            raise MaxOffsetReached(\"Substitutions overran record buffer.\")\n\n        value = None\n        #[0] = parse_null_type_node,\n        if type_ == 0x0:\n            value = None\n            ret.append((type_, value))\n\n        #[1] = parse_wstring_type_node,\n        elif type_ == 0x1:\n            s = buf[ofs:ofs + size]\n            s = s.decode('utf-16le')\n            s = xml.sax.saxutils.escape(s)\n            value = s\n            ret.append((type_, value))\n\n        #[2] = parse_string_type_node,\n        elif type_ == 0x2:\n            s = buf[ofs:ofs + size]\n            s = s.decode('ascii')\n            s = xml.sax.saxutils.escape(s)\n            value = s\n            ret.append((type_, value))\n\n        #[3] = parse_signed_byte_type_node,\n        elif type_ == 0x3:\n            value = struct.unpack_from(\"<b\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[4] = parse_unsigned_byte_type_node,\n        elif type_ == 0x4:\n            value = struct.unpack_from(\"<B\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[5] = parse_signed_word_type_node,\n        elif type_ == 0x5:\n            value = struct.unpack_from(\"<h\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[6] = parse_unsigned_word_type_node,\n        elif type_ == 0x6:\n            value = struct.unpack_from(\"<H\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[7] = parse_signed_dword_type_node,\n        elif type_ == 0x7:\n            value = struct.unpack_from(\"<i\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[8] = parse_unsigned_dword_type_node,\n        elif type_ == 0x8:\n            value = struct.unpack_from(\"<I\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[9] = parse_signed_qword_type_node,\n        elif type_ == 0x9:\n            value = struct.unpack_from(\"<q\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[10] = parse_unsigned_qword_type_node,\n        elif type_ == 0xA:\n            value = struct.unpack_from(\"<Q\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[11] = parse_float_type_node,\n        elif type_ == 0xB:\n            value = struct.unpack_from(\"<f\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[12] = parse_double_type_node,\n        elif type_ == 0xC:\n            value = struct.unpack_from(\"<d\", buf, ofs)[0]\n            ret.append((type_, value))\n\n        #[13] = parse_boolean_type_node,\n        elif type_ == 0xD:\n            value = struct.unpack_from(\"<I\", buf, ofs)[0] > 1\n            ret.append((type_, value))\n\n        #[14] = parse_binary_type_node,\n        elif type_ == 0xE:\n            value = binascii.hexlify(buf[ofs:ofs + size])\n            ret.append((type_, value))\n\n        #[15] = parse_guid_type_node,\n        elif type_ == 0xF:\n            _bin = buf[offset:offset + 16]\n\n            # Yeah, this is ugly\n            h = [six.indexbytes(_bin, i) for i in range(len(_bin))]\n            value = \"\"\"{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}\"\"\".format(\n                h[3], h[2], h[1], h[0],\n                h[5], h[4],\n                h[7], h[6],\n                h[8], h[9],\n                h[10], h[11], h[12], h[13], h[14], h[15])\n            ret.append((type_, value))\n\n        #[16] = parse_size_type_node,\n        elif type_ == 0x10:\n            if size == 0x4:\n                value = struct.unpack_from(\"<I\", buf, ofs)[0]\n            elif size == 0x8:\n                value = struct.unpack_from(\"<Q\", buf, ofs)[0]\n            else:\n                raise ParseError('unexpected sizetypenode value: ' + hex(size))\n\n            ret.append((type_, value))\n\n        #[17] = parse_filetime_type_node,\n        elif type_ == 0x11:\n            qword = struct.unpack_from(\"<Q\", buf, ofs)[0]\n            try:\n                value = datetime.datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600)\n            except ValueError:\n                raise ParseError('invalid timestamp')\n\n            ret.append((type_, value))\n\n        #[18] = parse_systemtime_type_node,\n        elif type_ == 0x12:\n            parts = struct.unpack_from(\"<WWWWWWWW\", buf, ofs)\n            value = datetime.datetime(parts[0], parts[1],\n                                      parts[3],  # skip part 2 (day of week)\n                                      parts[4], parts[5],\n                                      parts[6], parts[7])\n            ret.append((type_, value))\n\n        #[19] = parse_sid_type_node,  -- SIDTypeNode, 0x13\n        elif type_ == 0x13:\n            version, num_elements = struct.unpack_from(\"<BB\", buf, ofs)\n            id_high, id_low = struct.unpack_from(\">IH\", buf, ofs + 2)\n            value = \"S-%d-%d\" % (version, (id_high << 16) ^ id_low)\n            for i in range(num_elements):\n                val = struct.unpack_from(\"<I\", buf, ofs + 8 + (4 * i))\n                value += \"-%d\" % val\n            ret.append((type_, value))\n\n        #[20] = parse_hex32_type_node,  -- Hex32TypeNoe, 0x14\n        elif type_ == 0x14:\n            value = \"0x\"\n            for c in buf[ofs:ofs + size][::-1]:\n                if not isinstance(c, (int)):\n                    c = ord(c)\n                value += \"%02x\" % c\n            ret.append((type_, value))\n\n        #[21] = parse_hex64_type_node,  -- Hex64TypeNode, 0x15\n        elif type_ == 0x15:\n            value = \"0x\"\n            for c in buf[ofs:ofs + size][::-1]:\n                if not isinstance(c, (int)):\n                    c = ord(c)\n                value += \"%02x\" % c\n            ret.append((type_, value))\n\n        #[33] = parse_bxml_type_node,  -- BXmlTypeNode, 0x21\n        elif type_ == 0x21:\n            subs = extract_root_substitutions(buf, ofs, max_offset)\n            ret.extend(subs)\n\n        #[129] = WstringArrayTypeNode, 0x81\n        elif type_ == 0x81:\n\n            value = []\n\n            bin = buf[ofs:ofs + size]\n            while len(bin) > 0:\n                match = re.search(b\"((?:[^\\x00].)+)\", bin)\n                if match:\n                    frag = match.group()\n                    s = frag.decode(\"utf-16\")\n                    s = xml.sax.saxutils.escape(s)\n                    value.append(s)\n                    bin = bin[len(frag) + 2:]\n                    if len(bin) == 0:\n                        break\n\n                frag = re.search(b\"(\\x00*)\", bin).group()\n                if len(frag) % 2 == 0:\n                    for _ in range(len(frag) // 2):\n                        value.append('')\n\n                else:\n                    raise ParseError(\"Error parsing uneven substring of NULLs\")\n\n                bin = bin[len(frag):]\n\n            if value[-1].strip(\"\\x00\") == \"\":\n                value = value[:-1]\n\n            ret.append((type_, value))\n\n        else:\n            raise ParseError(\"Unexpected type encountered: \" + hex(type_))\n\n        ofs += size\n    return ret\n\n\nExtractedRecord = namedtuple(\n    'ExtractedRecord', ['offset', 'num', 'timestamp', 'substitutions'])\n\n\ndef extract_record(buf, offset):\n    \"\"\"\n    Parse an EVTX record into a convenient dictionary of fields.\n\n    Args:\n      buf (buffer): the binary data from which to extract structures.\n      offset (int): address of the EVTX record.\n\n    Returns:\n      ExtractedRecord: the thing you asked for.\n\n    Raises:\n      ParseError: for various reasons, including invalid timestamps and overruns.\n    \"\"\"\n    if not is_record(buf, offset):\n        raise ValueError('not a record')\n\n    record_size, record_num, qword = struct.unpack_from(\"<IQQ\", buf, offset + 0x4)\n    timestamp = datetime.datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600)\n    root_offset = offset + 0x18\n    try:\n        substitutions = extract_root_substitutions(buf, root_offset, offset + record_size)\n    except struct.error:\n        raise ParseError('buffer overrun')\n\n    return ExtractedRecord(offset, record_num, timestamp, substitutions)\n"
  },
  {
    "path": "evtxtract/main.py",
    "content": "import os\nimport sys\nimport logging\nimport os.path\nimport argparse\n\nimport evtxtract\nimport evtxtract.carvers\n\n\nlogger = logging.getLogger(__name__)\n\n\ndef output_record(args, r):\n\n    xmlhead = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n<evtxtract>'\n    xmlfoot = '</evtxtract>'\n    if isinstance(r, evtxtract.CompleteRecord):\n        try:\n            if args.split:\n                fname = \"{}-{}.xml\".format(r.eid, r.offset)\n                fpath = os.path.join(args.out, fname)\n                with open(fpath, \"wb\") as f:\n                    f.write(xmlhead)\n                    f.write(r.xml.encode('utf-8'))\n                    f.write(xmlfoot)\n            else:\n                os.write(sys.stdout.fileno(), r.xml.encode('utf-8'))\n        except Exception as e:\n            logger.warn('failed to output record at offset: 0x%x: %s', r.offset, str(e), exc_info=True)\n        else:\n            sys.stdout.flush()\n\n    elif isinstance(r, evtxtract.IncompleteRecord):\n        try:\n            if args.split:\n                fname = \"{}-{}-incomplete.xml\".format(r.eid, r.offset)\n                fpath = os.path.join(args.out, fname)\n                with open(fpath, \"wb\") as f:\n                    f.write(xmlhead.encode('utf-8'))\n                    f.write(format_incomplete_record(r).encode('utf-8'))\n                    f.write(xmlfoot.encode('utf-8'))\n            else:\n                os.write(sys.stdout.fileno(), format_incomplete_record(r).encode('utf-8'))\n        except Exception as e:\n            logger.warn('failed to output record at offset: 0x%x: %s', r.offset, str(e), exc_info=True)\n        else:\n            sys.stdout.flush()\n\n\ndef format_incomplete_record(record):\n    ret = []\n\n    ret.append('<Record>')\n    ret.append('<Offset>0x%x</Offset>' % (record.offset))\n    ret.append('<EventID>%d</EventID>' % (record.eid))\n    ret.append('<Substitutions>')\n    for i, (type_, value) in enumerate(record.substitutions):\n        ret.append('  <Substitution index=\"%d\">' % (i))\n        ret.append('    <Type>%d</Type>' % (type_))\n        if value is None:\n            ret.append('    <Value></Value>')\n        else:\n            ret.append('    <Value>%s</Value>' % (value))\n        ret.append('  </Substitution>')\n    ret.append('</Substitutions>')\n    ret.append('</Record>')\n\n    return '\\n'.join(ret)\n\n\ndef main(argv=None):\n    if argv is None:\n        argv = sys.argv[1:]\n\n    parser = argparse.ArgumentParser(\n        description=\"Reconstruct EVTX event log records from binary data.\")\n    parser.add_argument(\"input\", type=str,\n                        help=\"Path to binary input file\")\n    parser.add_argument(\"-v\", \"--verbose\", action=\"store_true\",\n                        help=\"Enable debug logging\")\n    parser.add_argument(\"-q\", \"--quiet\", action=\"store_true\",\n                        help=\"Disable all output but errors\")\n    parser.add_argument(\"-s\", \"--split\", action=\"store_true\",\n                        help=\"split each event into its own file\")\n    parser.add_argument(\"-o\", \"--out\", metavar='output-directory', action=\"store\",\n                        help=\"output directory to store split files\")\n    args = parser.parse_args()\n\n    if args.verbose:\n        logging.basicConfig(level=logging.DEBUG)\n    elif args.quiet:\n        logging.basicConfig(level=logging.ERROR)\n    else:\n        logging.basicConfig(level=logging.INFO)\n\n    if args.split and not args.out:\n        logger.error('Error: the -o argument is required when using -s. please provide an output directory with -o')\n        exit(1)\n\n    if args.out and not os.path.isdir(args.out):\n        logger.error('Error: {0} is not a directory'.format(args.out))\n        exit(1)\n\n    with evtxtract.utils.Mmap(args.input) as mm:\n        num_complete = 0\n        num_incomplete = 0\n\n        if not args.split:\n            print('<?xml version=\"1.0\" encoding=\"UTF-8\"?>')\n            print('<evtxtract>')\n        for r in evtxtract.extract(mm):\n            \n            output_record(args, r)\n\n            if isinstance(r, evtxtract.CompleteRecord):\n                num_complete += 1\n\n            elif isinstance(r, evtxtract.IncompleteRecord):\n                num_incomplete += 1\n\n            else:\n                raise RuntimeError('unexpected return type')\n\n        if not args.split:\n            print('</evtxtract>')\n\n        logging.info('recovered %d complete records', num_complete)\n        logging.info('recovered %d incomplete records', num_incomplete)\n\n\nif __name__ == \"__main__\":\n    sys.exit(main())\n"
  },
  {
    "path": "evtxtract/templates.py",
    "content": "import re\nimport sys\nimport logging\n\nimport six\nimport Evtx.Evtx\nimport Evtx.Nodes\nimport Evtx.Views\n\nimport evtxtract.utils\nimport evtxtract.templates\n\n\nlogger = logging.getLogger(__name__)\n\n\nclass Template(object):\n    substitition_re = re.compile(\"\\[(Conditional|Normal) Substitution\\(index=(\\d+), type=(\\d+)\\)\\]\")\n\n    def __init__(self, eid, xml):\n        self.eid = eid\n        self.xml = xml\n\n        self._cached_placeholders = None\n        self._cached_id = None\n\n    def get_id(self):\n        \"\"\"\n        @rtype: str\n        @return: A string that can be parsed into constraints describing what\n          types of subsitutions this template can accept.\n          Short example: 1100-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]\n        \"\"\"\n        if self._cached_id is not None:\n            return self._cached_id\n\n        ret = [str(self.eid)]\n        for index, type_, mode in self._get_placeholders():\n            if mode:\n                mode_str = \"c\"\n            else:\n                mode_str = \"n\"\n            ret.append(\"[%s|%s|%s]\" % (index, type_, mode_str))\n\n        self._cached_id = \"-\".join(ret)\n        return self._cached_id\n\n    def _get_placeholders(self):\n        \"\"\"\n        Get descriptors for each of the substitutions required by this\n          template.\n\n        Tuple schema: (index, type, is_conditional)\n\n        @rtype: list of (int, int, boolean)\n        \"\"\"\n        if self._cached_placeholders is not None:\n            return self._cached_placeholders\n\n        ret = []\n        for mode, index, type_ in Template.substitition_re.findall(self.xml):\n            ret.append((int(index), int(type_), mode == \"Conditional\"))\n\n        self._cached_placeholders = sorted(ret, key=lambda p: p[0])\n        return self._cached_placeholders\n\n    def match_substitutions(self, substitutions):\n        \"\"\"\n        Checks to see if the provided set of substitutions match the\n          placeholder values required by this template.\n\n        Note, this is only a best guess.  The number of substitutions\n          *may* be greater than the number of available slots. So we\n          must only check the slot and substitution types.\n\n        @type substitutions: list of (int, str)\n        @param substitutions: Tuple schema (type, value)\n        @rtype: boolean\n        \"\"\"\n        logger = logging.getLogger(\"match_substitutions\")\n        placeholders = self._get_placeholders()\n        logger.debug(\"Substitutions: %s\", str(substitutions))\n        logger.debug(\"Constraints: %s\", str(placeholders))\n        if len(placeholders) > len(substitutions):\n            logger.debug(\"Failing on lens: %d vs %d\",\n                         len(placeholders), len(substitutions))\n            return False\n        if max(placeholders, key=lambda k: k[0])[0] > len(substitutions):\n            logger.debug(\"Failing on max index: %d vs %d\",\n                         max(placeholders, key=lambda k: k[0])[0],\n                         len(substitutions))\n            return False\n\n        # it seems that some templates request different values than what are subsequently put in them\n        #   specifically, a Hex64 might be put into a SizeType field (EID 4624)\n        # this maps from the type described in a template, to possible additional types that a\n        #   record can provide for a particular substitution\n        overrides = {\n            16: set([21])\n        }\n\n        for index, type_, is_conditional in placeholders:\n            sub_type, sub_value = substitutions[index]\n            if is_conditional and sub_type == 0:\n                continue\n            if sub_type != type_:\n                if type_ not in overrides or sub_type not in overrides[type_]:\n                    logger.debug(\"Failing on type comparison, index %d: %d vs %d (mode: %s)\",\n                                 index, sub_type, type_, is_conditional)\n                    return False\n                else:\n                    logger.debug(\"Overriding template type %d with substitution type %d\", type_, sub_type)\n                    continue\n        return True\n\n    escape_re = re.compile(r\"\\\\\\\\(\\d)\")\n\n    @staticmethod\n    def _escape(value):\n        \"\"\"\n        Escape the static value to be used in a regular expression\n          subsititution. This processes any backreferences and\n          makes them plain, escaped sequences.\n\n        @type value: str\n        @rtype: str\n        \"\"\"\n        return Template.escape_re.sub(r\"\\\\\\\\\\\\\\\\\\1\", re.escape(value))\n\n    def insert_substitutions(self, substitutions):\n        \"\"\"\n        Return a copy of the template with the given substitutions inserted.\n\n        @type substitutions: list of (int, str)\n        @param substitutions: an ordered list of (type:int, value:str)\n        @rtype: str\n        \"\"\"\n        ret = self.xml\n        for index, pair in enumerate(substitutions):\n            type_, value = pair\n            from_pattern = \"\\[(Normal|Conditional) Substitution\\(index=%d, type=\\d+\\)\\]\" % index\n            if isinstance(value, six.string_types):\n                value = Template._escape(value)\n            else:\n                value = str(value)\n            ret = re.sub(from_pattern, value, ret)\n        return ret\n\n\nREPLACEMENT_PATTERNS = {\n    i: re.compile(\n        \"\\[(Normal|Conditional) Substitution\\(index=%d, type=\\d+\\)\\]\" % i)\n    for i in range(35)}\n\n\ndef make_replacement(template, index, substitution):\n    \"\"\"\n    Makes a substitution given a template as a string.\n\n    Implementation is a huge hack that depends on the\n    brittle template_format() output.\n\n    @type template: str\n    @type index: int\n    @type substitution: str\n    @rtype: str\n    \"\"\"\n    if index not in REPLACEMENT_PATTERNS:\n        from_pattern = re.compile(\"\\[(Normal|Conditional) Substitution\\(index=%d, type=\\d+\\)\\]\" % index)\n        REPLACEMENT_PATTERNS[index] = from_pattern\n    return REPLACEMENT_PATTERNS[index].sub(substitution, template)\n\n\ndef get_complete_template(root, current_index=0):\n    \"\"\"\n    Gets the template from a RootNode while resolving any\n    nested templates and fixing up their indices.\n    Depth first ordering/indexing.\n\n    Implementation is a huge hack that depends on the\n      brittle template_format() output.\n\n    @type root: RootNode\n    @type current_index: int\n    @rtype: str\n    \"\"\"\n    template = Evtx.Views.evtx_template_readable_view(root)  # TODO(wb): make sure this is working\n\n    # walk through each substitution.\n    # if its a normal node, continue\n    # else its a subtemplate, and we count the number of substitutions _it_ has\n    #   so that we can later fixup all the indices\n    replacements = []\n    for index, substitution in enumerate(root.substitutions()):\n        # find all sub-templates\n        if not isinstance(substitution, Evtx.Nodes.BXmlTypeNode):\n            replacements.append(current_index + index)\n            continue\n        # TODO(wb): hack here accessing ._root\n        subtemplate = get_complete_template(substitution._root,\n                                             current_index=current_index + index)\n        replacements.append(subtemplate)\n        current_index += subtemplate.count(\"Substitution(index=\")\n    replacements.reverse()\n\n    # now walk through all the indices and fix them up depth-first\n    for i, replacement in enumerate(replacements):\n        index = len(replacements) - i - 1\n        if isinstance(replacement, int):\n            # fixup index\n            from_pattern = \"index=%d,\" % index\n            to_pattern = \"index=%d,\" % replacement\n            template = template.replace(from_pattern, to_pattern)\n        if isinstance(replacement, six.string_types):\n            # insert sub-template\n            template = make_replacement(template, index, replacement)\n    return template\n\n\ndef get_template(record):\n    \"\"\"\n    Given a complete Record, parse out the nodes that make up the Template\n      and return it as a Template.\n\n    @type record: Record\n    @rtype: Template\n    \"\"\"\n    record_xml = Evtx.Views.evtx_record_xml_view(record)\n    eid = evtxtract.utils.get_eid(record_xml)\n    return Template(eid, get_complete_template(record.root()))\n"
  },
  {
    "path": "evtxtract/utils.py",
    "content": "import mmap\nimport logging\nfrom lxml import etree\n\n\nlogger = logging.getLogger(__name__)\n\n\ndef to_lxml(record_xml):\n    \"\"\"\n    Convert an XML string to an Etree element.\n\n    @type record_xml: str\n    @rtype: etree.Element\n    \"\"\"\n    if \"<?xml\" not in record_xml:\n        return etree.fromstring(\n            \"<?xml version=\\\"1.0\\\" standalone=\\\"yes\\\" ?>%s\" % record_xml)\n    else:\n        return etree.fromstring(record_xml)\n\n\ndef get_child(node, tag,\n              ns=\"{http://schemas.microsoft.com/win/2004/08/events/event}\"):\n    \"\"\"\n    Given an Etree element, get the first child node with the given tag.\n\n    @type node: etree.Element\n    @type tag: str\n    @type ns: str\n    @rtype: etree.Element or None\n    \"\"\"\n    return node.find(\"%s%s\" % (ns, tag))\n\n\ndef get_eid(record_xml):\n    \"\"\"\n    Given EVTX record XML, return the EID of the record.\n\n    Args:\n      record_xml (str)\n\n    Returns:\n      int: the event ID of the record\n    \"\"\"\n    return int(\n        get_child(\n            get_child(to_lxml(record_xml),\n                      \"System\"),\n            \"EventID\").text)\n\n\nclass Mmap(object):\n    \"\"\"\n    Convenience class for opening a read-only memory map for a file path.\n    \"\"\"\n\n    def __init__(self, filename):\n        super(Mmap, self).__init__()\n        self._filename = filename\n        self._f = None\n        self._mmap = None\n\n    def __enter__(self):\n        self._f = open(self._filename, \"rb\")\n        self._mmap = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ)\n        return self._mmap\n\n    def __exit__(self, type, value, traceback):\n        if self._mmap:\n            self._mmap.close()\n        if  self._f :\n            self._f.close()\n"
  },
  {
    "path": "evtxtract/version.py",
    "content": "__version__ = '0.2.4'\n"
  },
  {
    "path": "evtxtract.spec",
    "content": "# -*- mode: python -*-\n\nblock_cipher = None\n\na = Analysis(\n    ['evtxtract/main.py'],\n     pathex=['evtxtract'],\n     binaries=None,\n     datas=None,\n     hiddenimports=[],\n     hookspath=None,\n     runtime_hooks=None,\n     excludes=[\"tkinter\", \"_tkinter\", \"Tkinter\"],\n     win_no_prefer_redirects=None,\n     win_private_assemblies=None,\n     cipher=None)\n\na.binaries = a.binaries - TOC([\n ('sqlite3.dll', None, None),\n ('tcl85.dll', None, None),\n ('tk85.dll', None, None),\n ('_sqlite3', None, None),\n ('_ssl', None, None),\n ('_tkinter', None, None)])\n\npyz = PYZ(a.pure, a.zipped_data, cipher=None)\n\nexe = EXE(pyz,\n          a.scripts,\n          a.binaries,\n          exclude_binaries=False,\n          name='evtxtract',\n          #icon='resources/icon.ico',\n          debug=False,\n          strip=None,\n          upx=True,\n          console=True )\n\ncoll = COLLECT(exe,\n               a.binaries,\n               a.zipfiles,\n               a.datas,\n               strip=None,\n               upx=True,\n               name='evtxtract-dat')\n"
  },
  {
    "path": "setup.py",
    "content": "#!/usr/bin/env python\n\nimport os\nimport setuptools\n\n\n# this sets __version__\n# # via: http://stackoverflow.com/a/7071358/87207\n# # and: http://stackoverflow.com/a/2073599/87207\nwith open(os.path.join(\"evtxtract\", \"version.py\"), \"rb\") as f:\n     exec(f.read())\n\nsetuptools.setup(name=\"evtxtract\",\n      version=__version__,\n      description=\"EVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unallocated space and memory images.\",\n      author=\"Willi Ballenthin\",\n      author_email=\"william.ballenthin@fireeye.com\",\n      url=\"https://github.com/williballenthin/evtxtract\",\n      license=\"Apache 2.0 License\",\n      packages=setuptools.find_packages(),\n      entry_points={\n          \"console_scripts\": [\n              \"evtxtract=evtxtract.main:main\",\n          ]\n      },\n      install_requires=[\n          'six',\n          'lxml',\n          'pytest',\n          'python-evtx>=0.5.2',\n      ],\n)\n"
  },
  {
    "path": "tests/.gitignore",
    "content": "*memoryevtx/file.None*\n*.dat\n*.vacb\n"
  },
  {
    "path": "tests/fixtures.py",
    "content": "import os\n\nimport pytest\n\nimport evtxtract.utils\n\n\nCD = os.path.dirname(__file__)\nIMAGE_PATH = os.path.join(CD, 'joshua1.vmem')\n\n\n@pytest.fixture\ndef image(request):\n    if not os.path.exists(IMAGE_PATH):\n        raise RuntimeError('required image %s does not exist. see readme.' % (IMAGE_PATH))\n\n    return IMAGE_PATH\n\n\n@pytest.fixture\ndef image_file(request):\n    with open(image(request), 'rb') as f:\n        yield f\n\n\n@pytest.fixture\ndef image_mmap(request):\n    with evtxtract.utils.Mmap(image(request)) as mm:\n        yield mm\n\n"
  },
  {
    "path": "tests/readmd.txt",
    "content": "the tests require the image `joshua1.vmem` from:\n  - referenced: http://jessekornblum.livejournal.com/293291.html\n  - download: https://dl.dropboxusercontent.com/u/55819714/joshua1.zip\n"
  },
  {
    "path": "tests/test_all.py",
    "content": "import logging\n\nimport evtxtract\nimport evtxtract.carvers\n\nfrom fixtures import *\n\n\n#logging.basicConfig(level=logging.DEBUG)\n#logging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)\n\n\ndef test_find_chunks(image_mmap):\n    # these offsets were empirically collected from the test image\n    expected = set([\n        0xc7f000,\n        0xf0e000,\n        0x1374f20,\n        0x70cc000,\n        0xd727440,\n        0xdfe7000,\n        0x18851080,\n        0x1c31d000,\n        0x20b362c0,\n        0x276f8000,\n        0x2833e000,\n        0x28b4e000,\n        0x28b68000,\n        0x28d5e000,\n        0x28ead000,\n        0x2986e000,\n        0x2998c000,\n        0x29a9c000,\n        0x2ff30000,\n        0x2ffd0000,\n        0x3070f000,\n        0x30c1f000,\n        0x30c8f000,\n        0x30dbf000,\n        0x30f2f000,\n        0x30fff000,\n        0x3126f000,\n        0x328eac10,\n        0x34b75000,\n        0x38835000,\n        0x39981910,\n        0x39cc07a0,\n        0x3b91b000,\n    ])\n\n    assert expected == set(evtxtract.carvers.find_evtx_chunks(image_mmap))\n\n\ndef first(s):\n    for x in s:\n        return x\n\n\ndef test_extract_records(image_mmap):\n    # these offsets were empirically collected from the test image\n    expected_offsets = set([\n        0xf0e200,\n        0x70cc200,\n        0x70cca30,\n        0x1c31d200,\n        0x1c31d858,\n        0x20b364c0,\n        0x20b36b80,\n        0x276f8200,\n        0x276f88c0,\n        0x29a9c200,\n        0x30dbf200,\n        0x30dbf8c8,\n        0x30dbfb68,\n        0x30dbfde8,\n        0x34b75200,\n        0x34b758a0,\n        0x3b91b200,\n    ])\n\n    # these eids were empirically collected from the test image\n    expected_eids = set([\n        1,\n        2,\n        5,\n        21,\n        22,\n        100,\n        306,\n        823,\n        1001,\n        1002,\n        1006,\n        1009,\n        1020\n    ])\n\n    found_offsets = set([])\n    found_eids = set([])\n    for chunk_offset in evtxtract.carvers.find_evtx_chunks(image_mmap):\n        for recovered_record in evtxtract.carvers.extract_chunk_records(image_mmap, chunk_offset):\n            found_offsets.add(recovered_record.offset)\n            found_eids.add(recovered_record.eid)\n\n    assert expected_offsets == found_offsets\n    assert expected_eids == found_eids\n\n\ndef test_extract_templates(image_mmap):\n    # these template ids were empirically collected from the test image\n    expected_ids = set([\n        \"1-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]\",\n        \"2-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]\",\n        \"21-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|1|n]\",\n        \"22-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|1|n]\",\n        \"5-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|1|n]\",\n        \"100-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|1|n]\",\n        \"306-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]\",\n        \"823-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|1|n]-[19|1|n]-[20|20|n]-[21|1|n]\",\n        \"1001-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]\",\n        \"1002-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]\",\n        \"1006-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|13|n]-[19|13|n]\",\n        \"1009-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|8|n]\",\n        \"1020-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]\",\n    ])\n\n    found_ids = set([])\n    for chunk_offset in evtxtract.carvers.find_evtx_chunks(image_mmap):\n        for template in evtxtract.carvers.extract_chunk_templates(image_mmap, chunk_offset):\n            found_ids.add(template.get_id())\n\n    assert expected_ids == found_ids\n\n\ndef test_find_records(image_mmap):\n    records = list(evtxtract.carvers.find_evtx_records(image_mmap))\n    assert records[0] == 0x317198\n    assert records[-1] == 0x3D706A88\n    assert len(records) == 1674\n\n\ndef test_evtxtract(image_mmap):\n    num_complete = 0\n    num_incomplete = 0\n    for r in evtxtract.extract(image_mmap):\n        if isinstance(r, evtxtract.CompleteRecord):\n            num_complete += 1\n        elif isinstance(r, evtxtract.IncompleteRecord):\n            num_incomplete += 1\n        else:\n            raise RuntimeError('unexpected return type')\n\n    assert num_complete == 52\n    assert num_incomplete == 1615\n"
  }
]