Repository: williballenthin/EVTXtract
Branch: master
Commit: 0895be4c2512
Files: 16
Total size: 65.6 KB
Directory structure:
gitextract_2o9c9iwa/
├── .gitignore
├── .travis.yml
├── LICENSE.TXT
├── README.md
├── evtxtract/
│ ├── __init__.py
│ ├── carvers.py
│ ├── main.py
│ ├── templates.py
│ ├── utils.py
│ └── version.py
├── evtxtract.spec
├── setup.py
└── tests/
├── .gitignore
├── fixtures.py
├── readmd.txt
└── test_all.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.py[cod]
# C extensions
*.so
# Packages
*.egg
*.egg-info
dist
build
eggs
parts
bin
var
sdist
develop-eggs
.installed.cfg
lib
lib64
# Installer logs
pip-log.txt
# Unit test / coverage reports
.coverage
.tox
nosetests.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
<<<<<<< HEAD
extract_valid_evtx_records_and_templates.py~
find_evtx_chunks.py~
*~
*.evtx
workspace/*.xml
workspace/*.txt
.idea/*
.idea
*_templates.txt
*_chunks.txt
================================================
FILE: .travis.yml
================================================
env:
global:
- secure: "j89gGCxDhMdJ9vP/dUhu06XUqYMeqMjxIx8/s8KdVOhE0BxOddU7dIQE5SvcGYMoW+W4NV+7/Pio/eIkY3SUXGOLlPLMLwMDmvg9nA6HwrcSs6zPGreCYhqf7RlCNEyHHoWZ6syHjx1cEL2c1FyXLelQ8r5ONAzWsTeDn7ctnFcGzr7EDhKEjC9LGZchjRMYVrWkOruskSwnJYkPCstNqcwLh7qPgAXktTxx0YMPIr0sTbwLTnZRiNCE0egFSoT6QLBggrM3Nv0DbZO7luyFEgozgp99CACDdJMeMsKqgkedk0E+nz2BV26EpqjjyIRJVMiwXZVLexkB8vSw9PhCGY36REwMIhJz6KCzVQMZyoNkSbrMWBpa3LqdfZGyMxBtLlBw/Yvv+pzB+OnbCIjooy1nTfOlyLlk6QzESnzEW/A/DsFVEnNXQjoAOZEZjj6SlaHl+r/Uw6sXAP4FjP8umI++E9+MnuI0T1bevX8ZMdJ6Qz7gPWnIuOGJqkvFqC7MYt3SY4O4O0DS1pHXQGoQOgoRYExY2VRqeJSEnRTvbr8S5uVWOOz5PRb4psM6gnl6eueN8uNNmqj+BDaZp8qu/uLJ31zvAq0q1+rxxaqB5OkVebi65Q7cN6IjnpCTCK5risooGNZRGco2thnRiuysQ3kEQjaWH/f15cfY8YYnuTA="
- secure: "xe37YTz7uegptreK36MVYl8+c9FzMqVsNr+/WMPBiZSmQ2T9mHoEP7QWL3AA1JSi7q2A3qWKNK15fUpFmu+u7+/15lFYUqihPEMZTBmmUYx6/I6bxP6d/sdu+mro13kflDzqTOdmkDU9X/Olympc5kI8qqheH6OTqwgjU0ypl/V53/3mXTgOre4LYvy+p0nkYdFCzipp+stuZyvn6tag57nvdnH8j8OLLR886ZV7KtB7RlOeaVK0NbyZ5XFBHJL1GXwV01lDfMsMokDHgkDfvjKVo63p1rYFScOUx7BBwpnSM1zr1hpHli562wQbs2eS4F8oYWOzXxhcRmvwAUxE/iqQvNyis+PD42xlhhOP1ubKJazqc9/AlKIHLNIvfsAVuFLt1eGI/g5/K+cpNUEs38+CXpqy2vTng4bFF4IabpxKlZxqpuCniKiDs5WExmhp2/fwXpNhdQmlh/WM2Mv4+vD8XyardIWZzmeR9EVFRAe+cGoejM+seGNKyfYxdOnj0fqmT2IORK9UKrmqisa9eBUOOg5kLirbnhfbax2J/FAcvAaMS8c5ZQHMPdswyaOdSpoJPPHULpI3uoPvYDFJcbuuZWlE0tuD/Qm5/4ABOeUfInOPAWtOBKYFl+YYIWUzCMEy6QxzJpSXqqXtNlR5y1l4M+PQzVY463+DXK3XBXE="
- ARTIFACTS_BUCKET=build-artifacts.floss.flare.fireeye.com
language: python
matrix:
include:
- os: linux
sudo: required
python: 2.7
- os: linux
sudo: required
python: 3.5
# travis doesn't have py2.7 available, so we have to do it ourselves
# ref: https://github.com/travis-ci/travis-ci/issues/2312
- os: osx
language: generic
before_install:
# fix erroring OSX job because of rvm issue
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then rvm get stable --auto-dotfiles; fi
# travis doesn't have py2.7 available, so we have to do it ourselves
# ref: https://github.com/travis-ci/travis-ci/issues/2312
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then git clone https://github.com/MacPython/terryfy ../terryfy; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then source ../terryfy/travis_tools.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then get_python_environment macpython 2.7.10; fi
install:
- pip install pyinstaller pep8
- echo "__version__ = '$(git describe --tags)'" > evtxtract/version.py
- pip install -e .
- pyinstaller evtxtract.spec && rm -r './dist/evtxtract-dat/'
script:
- find . -name \*.py -exec pep8 --ignore=E501 {} \;
- pushd ./tests && wget "https://dl.dropboxusercontent.com/u/55819714/joshua1.zip" && unzip joshua1.zip && popd
- py.test tests/ -v
addons:
artifacts:
debug: true
paths:
- $(find . -type f | grep -e '/bin/' -e 'dist/evtxtract' | awk 1 ORS=':')
target_paths: travis/$TRAVIS_OS_NAME/
================================================
FILE: LICENSE.TXT
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
Purpose
-------
EVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unallocated space and memory images.
Quick Run
---------
Install EVTXtract via `pip`:
pip install evtxtract
Now the tool is ready to go!
C:/Python27/Scripts/evtxtract.exe Z:/evidence/1/image.dd > Z:/work/1/evtx.xml
Quicker Run
-----------
Download standalone executable nightly builds of EVTXtract here:
- [Linux](https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/linux/dist/evtxtract)
- [MacOS](https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/osx/dist/evtxtract)
Then you can do:
./evtxtract /path/to/evidence > /path/to/output.xml
Background
----------
EVTX records are XML fragments encoded using a Microsoft-specific binary XML representation.
Despite the convenient format, it is not easy to recover EVTX event log records from a corrupted file or unallocated space.
This is because the complete representation of a record often depends on other records found nearby.
The event log service recognizes similarities among records and refactors commonalities into "templates".
A template is a fixed structure with placeholders that reserve space for variable content.
The on-disk event log record structure is a reference to a template, and a list of substitutions (the variable content the replaces a placeholder in a template).
To decode a record into XML, the event log service resolves the template and replaces its placeholders with the entries of the substitution array.
Therefore, template corruption renders many records unrecoverable within the local 64KB "chunk".
However, the substitution array for the remaining records may still be intact.
If so, it may be possible to produce XML fragments that match the original records if the damaged template can be reconstructed.
For many common events, such as process creation or account logon, empirical testing demonstrates the relevant templates remain mostly constant.
In these cases, recovering event log records boils down to identifying appropriate templates found in other EVTX chunks.
Algorithm
---------
1. Scan for chunk signatures ("ElfChnk")
- check header for sane values (0x80 <= size <= 0x200)
- verify checksums (header, data)
2. Extract records from valid chunks found in (1)
3. Extract templates from valid chunks found in (1)
4. Scan for record signatures
- check header for sane values
- extract timestamp
- attempt to parse substitutions
- attempt to decode substitutions into EID, other fields
5. Reconstruct records by reusing old templates with recovered substitutions
Usage
-----
The EVTXtract is a pure Python script.
This means it easily runs on Windows, Linux, and MacOS.
Simply invoke the script, providing the path to a binary image, and EVTXtract writes its results to the standard out stream.
The binary file can be any data: a raw image, memory dump, etc.
Example command line:
C:/Python27/Scripts/evtxtract.exe Z:/evidence/1/image.dd > Z:/work/1/evtx.xml
Below are some example results from the above command.
It shows two records: a complete and incomplete record.
The first record is completely reconstructed,
and is formatted just like it would be in event viewer.
However, EVTXtract was unable to complete reconstruct the second record,
since some critical template data was missing.
So, its been formatted with as much data as was recovered.
EVTXtract uses a schema that allows you to continue processing despite incomplete data.
<Event xmlns="http://schemas.microsoft.com/win/2004/08/events/event">
<System>
<Provider Name="Microsoft-Windows-PrintService" Guid="{747ef6fd-e535-4d16-b510-42c90f6873a1}"></Provider>
<EventID Qualifiers="">823</EventID>
<Version>0</Version>
<Level>4</Level>
<Task>49</Task>
<Opcode>11</Opcode>
<Keywords>0x80000000000200</Keywords>
<TimeCreated SystemTime="2013-03-23 02:05:57.848455"></TimeCreated>
<EventRecordID>1</EventRecordID>
<Correlation ActivityID="" RelatedActivityID=""></Correlation>
<Execution ProcessID="1204" ThreadID="1208"></Execution>
<Channel>Microsoft-Windows-PrintService/Admin</Channel>
<Computer>JOSHUA</Computer>
<Security UserID="S-1-5-21-3454551831-629247693-1078506759-1000"></Security>
</System>
<UserData>
<ChangingDefaultPrinter xmlns:auto-ns3="http://schemas.microsoft.com/win/2004/08/events" xmlns="http://manifests.microsoft.com/win/2005/08/windows/printing/spooler/core/events">
<DefaultPrinterSelectedBySpooler>1</DefaultPrinterSelectedBySpooler>
<OldDefaultPrinter></OldDefaultPrinter>
<NewDefaultPrinter>Microsoft XPS Document Writer,winspool,Ne00:</NewDefaultPrinter>
<Status>0x000000</Status>
<Module>spoolsv.exe</Module>
</ChangingDefaultPrinter>
</UserData>
</Event>
...
<Record>
<Offset>0x317198</Offset>
<EventID>1531</EventID>
<Substitutions>
<Substitution index="0">
<Type>4</Type>
<Value>4</Value>
</Substitution>
<Substitution index="1">
<Type>4</Type>
<Value>0</Value>
</Substitution>
<Substitution index="2">
<Type>6</Type>
<Value>0</Value>
</Substitution>
<Substitution index="3">
<Type>6</Type>
<Value>1531</Value>
</Substitution>
<Substitution index="4">
<Type>0</Type>
<Value></Value>
</Substitution>
<Substitution index="5">
<Type>21</Type>
<Value>0x8000000000000000</Value>
</Substitution>
<Substitution index="6">
<Type>17</Type>
<Value>2013-03-23 02:02:35.679552</Value>
</Substitution>
<Substitution index="7">
<Type>0</Type>
<Value></Value>
</Substitution>
<Substitution index="8">
<Type>8</Type>
<Value>928</Value>
</Substitution>
<Substitution index="9">
<Type>8</Type>
<Value>1040</Value>
</Substitution>
<Substitution index="10">
<Type>10</Type>
<Value>132</Value>
</Substitution>
<Substitution index="11">
<Type>4</Type>
<Value>0</Value>
</Substitution>
<Substitution index="12">
<Type>19</Type>
<Value>S-1-5-18</Value>
</Substitution>
<Substitution index="13">
<Type>0</Type>
<Value></Value>
</Substitution>
<Substitution index="14">
<Type>1</Type>
<Value>Microsoft-Windows-User Profiles Service</Value>
</Substitution>
<Substitution index="15">
<Type>15</Type>
<Value>0001010f-010c-77e3-bf2f-3ef300001200</Value>
</Substitution>
<Substitution index="16">
<Type>1</Type>
<Value>Application</Value>
</Substitution>
</Substitutions>
</Record>
================================================
FILE: evtxtract/__init__.py
================================================
import logging
import collections
import evtxtract.utils
import evtxtract.carvers
import evtxtract.templates
logger = logging.getLogger(__name__)
VALUE = 1
class CompleteRecord(object):
__slots__ = ('offset', 'eid', 'xml')
def __init__(self, offset, eid, xml):
super(CompleteRecord, self).__init__()
self.offset = offset
self.eid = eid
self.xml = xml
class IncompleteRecord(object):
__slots__ = ('offset', 'eid', 'substitutions')
def __init__(self, offset, eid, substitutions):
super(IncompleteRecord, self).__init__()
self.offset = offset
self.eid = eid
self.substitutions = substitutions
def extract(buf):
'''
Do the EVTXtract algorithm and reconstruct EVTX records from the given data.
Args:
buf (buffer): the binary data from which to extract structures.
Returns:
iterable[union[CompleteRecord, IncompleteRecord]]: a generator of either
CompleteRecord or IncompleteRecord. You'll have to type-switch of these
classes to decide out how to handle them.
'''
# this does a full scan of the file (#1)
chunks = set(evtxtract.carvers.find_evtx_chunks(buf))
valid_record_offsets = set([])
for chunk in chunks:
for record in evtxtract.carvers.extract_chunk_records(buf, chunk):
valid_record_offsets.add(record.offset)
yield CompleteRecord(record.offset, record.eid, record.xml)
# map from eid to dictionary mapping from templateid to template
templates = collections.defaultdict(dict)
for chunk in chunks:
for template in evtxtract.carvers.extract_chunk_templates(buf, chunk):
templates[template.eid][template.get_id()] = template
# this does a full scan of the file (#2).
# needs to be distinct because we must have collected all the templates
# first.
for record_offset in evtxtract.carvers.find_evtx_records(buf):
if record_offset in valid_record_offsets:
continue
try:
record = evtxtract.carvers.extract_record(buf, record_offset)
except evtxtract.carvers.ParseError as e:
logger.info('parse error for record at offset: 0x%x: %s', record_offset, str(e))
continue
except ValueError as e:
logger.info('timestamp parse error for record at offset: 0x%x: %s', record_offset, str(e))
continue
except Exception as e:
logger.info('unknown parse error for record at offset: 0x%x: %s', record_offset, str(e))
continue
if len(record.substitutions) < 4:
logger.info('too few substitutions for record at offset: 0x%x', record_offset)
continue
# we just know that the EID is substitution index 3
eid = record.substitutions[3][VALUE]
matching_templates = set([])
for template in templates.get(eid, {}).values():
if template.match_substitutions(record.substitutions):
matching_templates.add(template)
if len(matching_templates) == 0:
logger.info('no matching templates for record at offset: 0x%x', record_offset)
yield IncompleteRecord(record_offset, eid, record.substitutions)
continue
if len(matching_templates) > 1:
logger.info('too many templates for record at offset: 0x%x', record_offset)
yield IncompleteRecord(record_offset, eid, record.substitutions)
continue
template = list(matching_templates)[0]
record_xml = template.insert_substitutions(record.substitutions)
yield CompleteRecord(record_offset, eid, record_xml)
================================================
FILE: evtxtract/carvers.py
================================================
import re
import struct
import logging
import binascii
import datetime
import xml.sax.saxutils
from collections import namedtuple
import six
import Evtx.Evtx
import Evtx.Views
import evtxtract.templates
logger = logging.getLogger(__name__)
# TODO: this should be part of python-evtx
EVTX_HEADER_MAGIC = b"ElfChnk"
EVTX_RECORD_MAGIC = b"\x2a\x2a\x00\x00"
CHUNK_SIZE = 0x10000
MIN_CHUNK_HEADER_SIZE = 0x80
MAX_CHUNK_HEADER_SIZE = 0x200
class ParseError(RuntimeError): pass
def is_chunk_header(buf, offset):
"""
Return True if the offset appears to be an EVTX Chunk header.
Implementation note: Simply checks the magic header and size field for reasonable values.
Args:
buf (buffer): the binary data from which to extract structures.
offset (int): the address of the potential EVTX chunk header.
Returns:
bool: if the offset appears to be an EVTX chunk header.
"""
if len(buf) < offset + 0x2C:
# our accesses below will overflow
return False
magic = struct.unpack_from("<7s", buf, offset)[0]
if magic != EVTX_HEADER_MAGIC:
return False
size = struct.unpack_from("<I", buf, offset + 0x28)[0]
if not (MIN_CHUNK_HEADER_SIZE <= size <= MAX_CHUNK_HEADER_SIZE):
return False
if len(buf) <= offset + size:
# the chunk overruns the buffer end
return False
try:
chunk = Evtx.Evtx.ChunkHeader(buf, offset)
except:
logger.debug('failed to parse chunk header', exc_info=True)
return False
if len(buf) < offset + CHUNK_SIZE:
return False
if chunk.calculate_header_checksum() != chunk.header_checksum():
return False
if chunk.calculate_data_checksum() != chunk.data_checksum():
return False
return True
def find_evtx_chunks(buf):
"""
Scans the given data for valid EVTX chunk structures.
Args:
buf (buffer): the binary data from which to extract structures.
Returns:
iterable[int]: generator of offsets of chunks
"""
offset = 0
while True:
offset = buf.find(EVTX_HEADER_MAGIC, offset)
if offset == -1:
break
if is_chunk_header(buf, offset):
yield offset
offset += 1
def is_record(buf, offset):
"""
Return True if the offset appears to be an EVTX record.
Args:
buf (buffer): the binary data from which to extract structures.
offset (int): the address of the potential record.
Returns:
bool: if its a record.
"""
if len(buf) < offset + 8:
return False
magic, size = struct.unpack_from("<II", buf, offset)
if magic != 0x00002a2a:
return False
if not (0x30 <= size <= 0x10000):
return False
if len(buf) < offset + size:
return False
size2 = struct.unpack_from("<I", buf, offset + size - 4)[0]
if size != size2:
return False
return True
def find_evtx_records(buf):
"""
Generates offsets of apparent EVTX records from the given buffer.
Args:
buf (buffer): the binary data from which to extract structures.
Returns:
iterable[int]: the offsets of EVTX records.
"""
offset = 0
while True:
offset = buf.find(EVTX_RECORD_MAGIC, offset)
if offset == -1:
break
if is_record(buf, offset):
yield offset
offset += 1
RecoveredRecord = namedtuple('RecoveredRecord', ['offset', 'eid', 'xml'])
def extract_chunk_records(buf, offset):
"""
Generates EVTX records from the EVTX chunk at the given offset.
Args:
buf (buffer): the binary data from which to extract structures.
offset (int): offset to EVTX chunk
Returns:
iterable[int]: the offsets of EVTX records.
"""
try:
chunk = Evtx.Evtx.ChunkHeader(buf, offset)
except:
raise ParseError('failed to parse chunk header')
cache = {}
for record in chunk.records():
try:
record_xml = Evtx.Views.evtx_record_xml_view(record, cache=cache)
eid = evtxtract.utils.get_eid(record_xml)
yield RecoveredRecord(record.offset(), eid, record_xml)
except UnicodeEncodeError:
logger.info("Unicode encoding issue processing record at 0x%X", record.offset())
continue
except UnicodeDecodeError:
logger.info("Unicode decoding issue processing record at 0x%X", record.offset())
continue
except Evtx.Evtx.InvalidRecordException:
logger.info("EVTX parsing issue processing record at 0x%X", record.offset())
continue
except Exception as e:
logger.info("Unknown exception processing record at 0x%X", record.offset(), exc_info=True)
continue
def extract_chunk_templates(buf, offset):
"""
Generates EVTX record templates from the EVTX chunk at the given offset.
Args:
buf (buffer): the binary data from which to extract structures.
offset (int): offset to EVTX chunk.
Returns:
iterable[evtxtract.templates.Template]: a generator of the things you asked for.
"""
try:
chunk = Evtx.Evtx.ChunkHeader(buf, offset)
except:
raise ParseError('failed to parse chunk header')
cache = {}
for record in chunk.records():
try:
yield evtxtract.templates.get_template(record)
except UnicodeEncodeError:
logger.info("Unicode encoding issue processing record at 0x%X", record.offset())
continue
except UnicodeDecodeError:
logger.info("Unicode decoding issue processing record at 0x%X", record.offset())
continue
except Evtx.Evtx.InvalidRecordException:
logger.info("EVTX parsing issue processing record at 0x%X", record.offset())
continue
except Exception as e:
logger.info("Unknown exception processing record at 0x%X", record.offset(), exc_info=True)
continue
# map from byte value to boolean
# the key values correspond to evtx node types
VALID_SUBSTITUTION_TYPES = [False for _ in range(256)]
for i in range(22):
VALID_SUBSTITUTION_TYPES[i] = True
VALID_SUBSTITUTION_TYPES[33] = True
VALID_SUBSTITUTION_TYPES[129] = True
class MaxOffsetReached(Exception): pass
def does_root_have_resident_template(buf, offset, max_offset):
"""
Guess whether an RootNode has a resident template
from the given buffer and offset, not parsing
beyond the given max_offset.
Args:
buf (buffer): the binary data from which to extract structures.
offset (int): address of an EVTX record.
max_offset (int): don't parse beyond this address.
Returns:
boolean: if the RootNode has a resident template.
Raises:
MaxOffsetReached: if the given max offset was reached while parsing.
"""
logger = logging.getLogger("extract_lost_records")
ofs = offset
token = struct.unpack_from("<b", buf, ofs)[0]
if token == 0x0F: # stream start
ofs += 4
ofs += 6 # template offset
# now, since we don't know where the chunk header is
# for this record, we can't use the template offset
# to decide if its resident or not
# instead, we assume that if the template is resident,
# then it begins immediately. if this is true, and the
# template is resident, then the next fields are:
# DWORD next_offset (range 0-0x10000?, length 0x4)
# GUID template_id (length 0x16, essentially random bytes)
# DWORD template_length (range 0-0x10000?, length 0x4)
# if the template is non-resident, then the fields are:
# DWORD num_subs (range 0-100?)
# WORD size \
# BYTE type (value one of 0-21,33,129) | repeat num_subs times
# BYTE zero (value 0) /
# the key takeaway is that we can test
# *(ofs + 6 + 4i) (with 0 < i < min(num_subs, 4))
# is in the set {0-21, 33, 129}, and that
# *(ofs + 7 + 4i) (0 < i < min(num_subs, 4))
# is 0. If these conditions hold, then the template is probably
# non-resident.
#
# TODO(wb): what if num_subs == 1 or 2?
ofs += 4 # next_offset or num_subs
maybe_num_subs = struct.unpack_from("<I", buf, ofs)[0]
if maybe_num_subs > 100:
return True
ofs += 4 # template_id or size
if max_offset < ofs + 4 + (4 * min(maybe_num_subs or 2, 4)):
return False
for i in range(min(maybe_num_subs or 2, 4)):
byte = struct.unpack_from("<B", buf, ofs + 3 + (i * 4))[0]
if byte != 0:
return True
for i in range(min(maybe_num_subs or 2, 4)):
byte = struct.unpack_from("<B", buf, ofs + 2 + (i * 4))[0]
if not VALID_SUBSTITUTION_TYPES[byte]:
return True
return False
def extract_root_substitutions(buf, offset, max_offset):
"""
Parse a RootNode into a list of its substitutions, not parsing beyond
the max offset.
Args:
buf (buffer): the binary data from which to extract structures.
offset (int): address of an EVTX record.
max_offset (int): don't parse beyond this address.
Returns:
list[tuple[int, variant]]: list of substitution tuples (type, value).
Raises:
ParseError: for various reasons, including invalid timestamps and overruns.
"""
ofs = offset
token = struct.unpack_from("<b", buf, ofs)[0]
if token == 0x0F: # stream start
ofs += 4
ofs += 6 # template offset
if does_root_have_resident_template(buf, offset, max_offset):
# have to hope that the template begins immediately
# template_offset = struct.unpack_from("<I", buf, ofs)[0]
logger.debug("0x%x: resident template", offset)
ofs += 4 # next offset
ofs += 4 # guid
ofs += 0x10 # template_length
template_length = struct.unpack_from("<I", buf, ofs)[0]
ofs += 4
ofs += template_length # num_subs
else:
logger.debug("0x%x: non-resident template", offset)
ofs += 4 # num_subs
num_subs = struct.unpack_from("<I", buf, ofs)[0]
if num_subs > 100:
raise ParseError("Unexpected number of substitutions: %d at %s" %
(num_subs, hex(ofs)))
ofs += 4 # begin sub list
substitutions = []
for _ in range(num_subs):
size, type_ = struct.unpack_from("<HB", buf, ofs)
if not VALID_SUBSTITUTION_TYPES[type_]:
raise ParseError('Unexpected substitution type: ' + hex(type_))
substitutions.append((type_, size))
ofs += 4
ret = []
for i, pair in enumerate(substitutions):
type_, size = pair
if ofs > max_offset:
raise MaxOffsetReached("Substitutions overran record buffer.")
value = None
#[0] = parse_null_type_node,
if type_ == 0x0:
value = None
ret.append((type_, value))
#[1] = parse_wstring_type_node,
elif type_ == 0x1:
s = buf[ofs:ofs + size]
s = s.decode('utf-16le')
s = xml.sax.saxutils.escape(s)
value = s
ret.append((type_, value))
#[2] = parse_string_type_node,
elif type_ == 0x2:
s = buf[ofs:ofs + size]
s = s.decode('ascii')
s = xml.sax.saxutils.escape(s)
value = s
ret.append((type_, value))
#[3] = parse_signed_byte_type_node,
elif type_ == 0x3:
value = struct.unpack_from("<b", buf, ofs)[0]
ret.append((type_, value))
#[4] = parse_unsigned_byte_type_node,
elif type_ == 0x4:
value = struct.unpack_from("<B", buf, ofs)[0]
ret.append((type_, value))
#[5] = parse_signed_word_type_node,
elif type_ == 0x5:
value = struct.unpack_from("<h", buf, ofs)[0]
ret.append((type_, value))
#[6] = parse_unsigned_word_type_node,
elif type_ == 0x6:
value = struct.unpack_from("<H", buf, ofs)[0]
ret.append((type_, value))
#[7] = parse_signed_dword_type_node,
elif type_ == 0x7:
value = struct.unpack_from("<i", buf, ofs)[0]
ret.append((type_, value))
#[8] = parse_unsigned_dword_type_node,
elif type_ == 0x8:
value = struct.unpack_from("<I", buf, ofs)[0]
ret.append((type_, value))
#[9] = parse_signed_qword_type_node,
elif type_ == 0x9:
value = struct.unpack_from("<q", buf, ofs)[0]
ret.append((type_, value))
#[10] = parse_unsigned_qword_type_node,
elif type_ == 0xA:
value = struct.unpack_from("<Q", buf, ofs)[0]
ret.append((type_, value))
#[11] = parse_float_type_node,
elif type_ == 0xB:
value = struct.unpack_from("<f", buf, ofs)[0]
ret.append((type_, value))
#[12] = parse_double_type_node,
elif type_ == 0xC:
value = struct.unpack_from("<d", buf, ofs)[0]
ret.append((type_, value))
#[13] = parse_boolean_type_node,
elif type_ == 0xD:
value = struct.unpack_from("<I", buf, ofs)[0] > 1
ret.append((type_, value))
#[14] = parse_binary_type_node,
elif type_ == 0xE:
value = binascii.hexlify(buf[ofs:ofs + size])
ret.append((type_, value))
#[15] = parse_guid_type_node,
elif type_ == 0xF:
_bin = buf[offset:offset + 16]
# Yeah, this is ugly
h = [six.indexbytes(_bin, i) for i in range(len(_bin))]
value = """{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}""".format(
h[3], h[2], h[1], h[0],
h[5], h[4],
h[7], h[6],
h[8], h[9],
h[10], h[11], h[12], h[13], h[14], h[15])
ret.append((type_, value))
#[16] = parse_size_type_node,
elif type_ == 0x10:
if size == 0x4:
value = struct.unpack_from("<I", buf, ofs)[0]
elif size == 0x8:
value = struct.unpack_from("<Q", buf, ofs)[0]
else:
raise ParseError('unexpected sizetypenode value: ' + hex(size))
ret.append((type_, value))
#[17] = parse_filetime_type_node,
elif type_ == 0x11:
qword = struct.unpack_from("<Q", buf, ofs)[0]
try:
value = datetime.datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600)
except ValueError:
raise ParseError('invalid timestamp')
ret.append((type_, value))
#[18] = parse_systemtime_type_node,
elif type_ == 0x12:
parts = struct.unpack_from("<WWWWWWWW", buf, ofs)
value = datetime.datetime(parts[0], parts[1],
parts[3], # skip part 2 (day of week)
parts[4], parts[5],
parts[6], parts[7])
ret.append((type_, value))
#[19] = parse_sid_type_node, -- SIDTypeNode, 0x13
elif type_ == 0x13:
version, num_elements = struct.unpack_from("<BB", buf, ofs)
id_high, id_low = struct.unpack_from(">IH", buf, ofs + 2)
value = "S-%d-%d" % (version, (id_high << 16) ^ id_low)
for i in range(num_elements):
val = struct.unpack_from("<I", buf, ofs + 8 + (4 * i))
value += "-%d" % val
ret.append((type_, value))
#[20] = parse_hex32_type_node, -- Hex32TypeNoe, 0x14
elif type_ == 0x14:
value = "0x"
for c in buf[ofs:ofs + size][::-1]:
if not isinstance(c, (int)):
c = ord(c)
value += "%02x" % c
ret.append((type_, value))
#[21] = parse_hex64_type_node, -- Hex64TypeNode, 0x15
elif type_ == 0x15:
value = "0x"
for c in buf[ofs:ofs + size][::-1]:
if not isinstance(c, (int)):
c = ord(c)
value += "%02x" % c
ret.append((type_, value))
#[33] = parse_bxml_type_node, -- BXmlTypeNode, 0x21
elif type_ == 0x21:
subs = extract_root_substitutions(buf, ofs, max_offset)
ret.extend(subs)
#[129] = WstringArrayTypeNode, 0x81
elif type_ == 0x81:
value = []
bin = buf[ofs:ofs + size]
while len(bin) > 0:
match = re.search(b"((?:[^\x00].)+)", bin)
if match:
frag = match.group()
s = frag.decode("utf-16")
s = xml.sax.saxutils.escape(s)
value.append(s)
bin = bin[len(frag) + 2:]
if len(bin) == 0:
break
frag = re.search(b"(\x00*)", bin).group()
if len(frag) % 2 == 0:
for _ in range(len(frag) // 2):
value.append('')
else:
raise ParseError("Error parsing uneven substring of NULLs")
bin = bin[len(frag):]
if value[-1].strip("\x00") == "":
value = value[:-1]
ret.append((type_, value))
else:
raise ParseError("Unexpected type encountered: " + hex(type_))
ofs += size
return ret
ExtractedRecord = namedtuple(
'ExtractedRecord', ['offset', 'num', 'timestamp', 'substitutions'])
def extract_record(buf, offset):
"""
Parse an EVTX record into a convenient dictionary of fields.
Args:
buf (buffer): the binary data from which to extract structures.
offset (int): address of the EVTX record.
Returns:
ExtractedRecord: the thing you asked for.
Raises:
ParseError: for various reasons, including invalid timestamps and overruns.
"""
if not is_record(buf, offset):
raise ValueError('not a record')
record_size, record_num, qword = struct.unpack_from("<IQQ", buf, offset + 0x4)
timestamp = datetime.datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600)
root_offset = offset + 0x18
try:
substitutions = extract_root_substitutions(buf, root_offset, offset + record_size)
except struct.error:
raise ParseError('buffer overrun')
return ExtractedRecord(offset, record_num, timestamp, substitutions)
================================================
FILE: evtxtract/main.py
================================================
import os
import sys
import logging
import os.path
import argparse
import evtxtract
import evtxtract.carvers
logger = logging.getLogger(__name__)
def output_record(args, r):
xmlhead = '<?xml version="1.0" encoding="UTF-8"?>\n<evtxtract>'
xmlfoot = '</evtxtract>'
if isinstance(r, evtxtract.CompleteRecord):
try:
if args.split:
fname = "{}-{}.xml".format(r.eid, r.offset)
fpath = os.path.join(args.out, fname)
with open(fpath, "wb") as f:
f.write(xmlhead)
f.write(r.xml.encode('utf-8'))
f.write(xmlfoot)
else:
os.write(sys.stdout.fileno(), r.xml.encode('utf-8'))
except Exception as e:
logger.warn('failed to output record at offset: 0x%x: %s', r.offset, str(e), exc_info=True)
else:
sys.stdout.flush()
elif isinstance(r, evtxtract.IncompleteRecord):
try:
if args.split:
fname = "{}-{}-incomplete.xml".format(r.eid, r.offset)
fpath = os.path.join(args.out, fname)
with open(fpath, "wb") as f:
f.write(xmlhead.encode('utf-8'))
f.write(format_incomplete_record(r).encode('utf-8'))
f.write(xmlfoot.encode('utf-8'))
else:
os.write(sys.stdout.fileno(), format_incomplete_record(r).encode('utf-8'))
except Exception as e:
logger.warn('failed to output record at offset: 0x%x: %s', r.offset, str(e), exc_info=True)
else:
sys.stdout.flush()
def format_incomplete_record(record):
ret = []
ret.append('<Record>')
ret.append('<Offset>0x%x</Offset>' % (record.offset))
ret.append('<EventID>%d</EventID>' % (record.eid))
ret.append('<Substitutions>')
for i, (type_, value) in enumerate(record.substitutions):
ret.append(' <Substitution index="%d">' % (i))
ret.append(' <Type>%d</Type>' % (type_))
if value is None:
ret.append(' <Value></Value>')
else:
ret.append(' <Value>%s</Value>' % (value))
ret.append(' </Substitution>')
ret.append('</Substitutions>')
ret.append('</Record>')
return '\n'.join(ret)
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(
description="Reconstruct EVTX event log records from binary data.")
parser.add_argument("input", type=str,
help="Path to binary input file")
parser.add_argument("-v", "--verbose", action="store_true",
help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true",
help="Disable all output but errors")
parser.add_argument("-s", "--split", action="store_true",
help="split each event into its own file")
parser.add_argument("-o", "--out", metavar='output-directory', action="store",
help="output directory to store split files")
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
elif args.quiet:
logging.basicConfig(level=logging.ERROR)
else:
logging.basicConfig(level=logging.INFO)
if args.split and not args.out:
logger.error('Error: the -o argument is required when using -s. please provide an output directory with -o')
exit(1)
if args.out and not os.path.isdir(args.out):
logger.error('Error: {0} is not a directory'.format(args.out))
exit(1)
with evtxtract.utils.Mmap(args.input) as mm:
num_complete = 0
num_incomplete = 0
if not args.split:
print('<?xml version="1.0" encoding="UTF-8"?>')
print('<evtxtract>')
for r in evtxtract.extract(mm):
output_record(args, r)
if isinstance(r, evtxtract.CompleteRecord):
num_complete += 1
elif isinstance(r, evtxtract.IncompleteRecord):
num_incomplete += 1
else:
raise RuntimeError('unexpected return type')
if not args.split:
print('</evtxtract>')
logging.info('recovered %d complete records', num_complete)
logging.info('recovered %d incomplete records', num_incomplete)
if __name__ == "__main__":
sys.exit(main())
================================================
FILE: evtxtract/templates.py
================================================
import re
import sys
import logging
import six
import Evtx.Evtx
import Evtx.Nodes
import Evtx.Views
import evtxtract.utils
import evtxtract.templates
logger = logging.getLogger(__name__)
class Template(object):
substitition_re = re.compile("\[(Conditional|Normal) Substitution\(index=(\d+), type=(\d+)\)\]")
def __init__(self, eid, xml):
self.eid = eid
self.xml = xml
self._cached_placeholders = None
self._cached_id = None
def get_id(self):
"""
@rtype: str
@return: A string that can be parsed into constraints describing what
types of subsitutions this template can accept.
Short example: 1100-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]
"""
if self._cached_id is not None:
return self._cached_id
ret = [str(self.eid)]
for index, type_, mode in self._get_placeholders():
if mode:
mode_str = "c"
else:
mode_str = "n"
ret.append("[%s|%s|%s]" % (index, type_, mode_str))
self._cached_id = "-".join(ret)
return self._cached_id
def _get_placeholders(self):
"""
Get descriptors for each of the substitutions required by this
template.
Tuple schema: (index, type, is_conditional)
@rtype: list of (int, int, boolean)
"""
if self._cached_placeholders is not None:
return self._cached_placeholders
ret = []
for mode, index, type_ in Template.substitition_re.findall(self.xml):
ret.append((int(index), int(type_), mode == "Conditional"))
self._cached_placeholders = sorted(ret, key=lambda p: p[0])
return self._cached_placeholders
def match_substitutions(self, substitutions):
"""
Checks to see if the provided set of substitutions match the
placeholder values required by this template.
Note, this is only a best guess. The number of substitutions
*may* be greater than the number of available slots. So we
must only check the slot and substitution types.
@type substitutions: list of (int, str)
@param substitutions: Tuple schema (type, value)
@rtype: boolean
"""
logger = logging.getLogger("match_substitutions")
placeholders = self._get_placeholders()
logger.debug("Substitutions: %s", str(substitutions))
logger.debug("Constraints: %s", str(placeholders))
if len(placeholders) > len(substitutions):
logger.debug("Failing on lens: %d vs %d",
len(placeholders), len(substitutions))
return False
if max(placeholders, key=lambda k: k[0])[0] > len(substitutions):
logger.debug("Failing on max index: %d vs %d",
max(placeholders, key=lambda k: k[0])[0],
len(substitutions))
return False
# it seems that some templates request different values than what are subsequently put in them
# specifically, a Hex64 might be put into a SizeType field (EID 4624)
# this maps from the type described in a template, to possible additional types that a
# record can provide for a particular substitution
overrides = {
16: set([21])
}
for index, type_, is_conditional in placeholders:
sub_type, sub_value = substitutions[index]
if is_conditional and sub_type == 0:
continue
if sub_type != type_:
if type_ not in overrides or sub_type not in overrides[type_]:
logger.debug("Failing on type comparison, index %d: %d vs %d (mode: %s)",
index, sub_type, type_, is_conditional)
return False
else:
logger.debug("Overriding template type %d with substitution type %d", type_, sub_type)
continue
return True
escape_re = re.compile(r"\\\\(\d)")
@staticmethod
def _escape(value):
"""
Escape the static value to be used in a regular expression
subsititution. This processes any backreferences and
makes them plain, escaped sequences.
@type value: str
@rtype: str
"""
return Template.escape_re.sub(r"\\\\\\\\\1", re.escape(value))
def insert_substitutions(self, substitutions):
"""
Return a copy of the template with the given substitutions inserted.
@type substitutions: list of (int, str)
@param substitutions: an ordered list of (type:int, value:str)
@rtype: str
"""
ret = self.xml
for index, pair in enumerate(substitutions):
type_, value = pair
from_pattern = "\[(Normal|Conditional) Substitution\(index=%d, type=\d+\)\]" % index
if isinstance(value, six.string_types):
value = Template._escape(value)
else:
value = str(value)
ret = re.sub(from_pattern, value, ret)
return ret
REPLACEMENT_PATTERNS = {
i: re.compile(
"\[(Normal|Conditional) Substitution\(index=%d, type=\d+\)\]" % i)
for i in range(35)}
def make_replacement(template, index, substitution):
"""
Makes a substitution given a template as a string.
Implementation is a huge hack that depends on the
brittle template_format() output.
@type template: str
@type index: int
@type substitution: str
@rtype: str
"""
if index not in REPLACEMENT_PATTERNS:
from_pattern = re.compile("\[(Normal|Conditional) Substitution\(index=%d, type=\d+\)\]" % index)
REPLACEMENT_PATTERNS[index] = from_pattern
return REPLACEMENT_PATTERNS[index].sub(substitution, template)
def get_complete_template(root, current_index=0):
"""
Gets the template from a RootNode while resolving any
nested templates and fixing up their indices.
Depth first ordering/indexing.
Implementation is a huge hack that depends on the
brittle template_format() output.
@type root: RootNode
@type current_index: int
@rtype: str
"""
template = Evtx.Views.evtx_template_readable_view(root) # TODO(wb): make sure this is working
# walk through each substitution.
# if its a normal node, continue
# else its a subtemplate, and we count the number of substitutions _it_ has
# so that we can later fixup all the indices
replacements = []
for index, substitution in enumerate(root.substitutions()):
# find all sub-templates
if not isinstance(substitution, Evtx.Nodes.BXmlTypeNode):
replacements.append(current_index + index)
continue
# TODO(wb): hack here accessing ._root
subtemplate = get_complete_template(substitution._root,
current_index=current_index + index)
replacements.append(subtemplate)
current_index += subtemplate.count("Substitution(index=")
replacements.reverse()
# now walk through all the indices and fix them up depth-first
for i, replacement in enumerate(replacements):
index = len(replacements) - i - 1
if isinstance(replacement, int):
# fixup index
from_pattern = "index=%d," % index
to_pattern = "index=%d," % replacement
template = template.replace(from_pattern, to_pattern)
if isinstance(replacement, six.string_types):
# insert sub-template
template = make_replacement(template, index, replacement)
return template
def get_template(record):
"""
Given a complete Record, parse out the nodes that make up the Template
and return it as a Template.
@type record: Record
@rtype: Template
"""
record_xml = Evtx.Views.evtx_record_xml_view(record)
eid = evtxtract.utils.get_eid(record_xml)
return Template(eid, get_complete_template(record.root()))
================================================
FILE: evtxtract/utils.py
================================================
import mmap
import logging
from lxml import etree
logger = logging.getLogger(__name__)
def to_lxml(record_xml):
"""
Convert an XML string to an Etree element.
@type record_xml: str
@rtype: etree.Element
"""
if "<?xml" not in record_xml:
return etree.fromstring(
"<?xml version=\"1.0\" standalone=\"yes\" ?>%s" % record_xml)
else:
return etree.fromstring(record_xml)
def get_child(node, tag,
ns="{http://schemas.microsoft.com/win/2004/08/events/event}"):
"""
Given an Etree element, get the first child node with the given tag.
@type node: etree.Element
@type tag: str
@type ns: str
@rtype: etree.Element or None
"""
return node.find("%s%s" % (ns, tag))
def get_eid(record_xml):
"""
Given EVTX record XML, return the EID of the record.
Args:
record_xml (str)
Returns:
int: the event ID of the record
"""
return int(
get_child(
get_child(to_lxml(record_xml),
"System"),
"EventID").text)
class Mmap(object):
"""
Convenience class for opening a read-only memory map for a file path.
"""
def __init__(self, filename):
super(Mmap, self).__init__()
self._filename = filename
self._f = None
self._mmap = None
def __enter__(self):
self._f = open(self._filename, "rb")
self._mmap = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ)
return self._mmap
def __exit__(self, type, value, traceback):
if self._mmap:
self._mmap.close()
if self._f :
self._f.close()
================================================
FILE: evtxtract/version.py
================================================
__version__ = '0.2.4'
================================================
FILE: evtxtract.spec
================================================
# -*- mode: python -*-
block_cipher = None
a = Analysis(
['evtxtract/main.py'],
pathex=['evtxtract'],
binaries=None,
datas=None,
hiddenimports=[],
hookspath=None,
runtime_hooks=None,
excludes=["tkinter", "_tkinter", "Tkinter"],
win_no_prefer_redirects=None,
win_private_assemblies=None,
cipher=None)
a.binaries = a.binaries - TOC([
('sqlite3.dll', None, None),
('tcl85.dll', None, None),
('tk85.dll', None, None),
('_sqlite3', None, None),
('_ssl', None, None),
('_tkinter', None, None)])
pyz = PYZ(a.pure, a.zipped_data, cipher=None)
exe = EXE(pyz,
a.scripts,
a.binaries,
exclude_binaries=False,
name='evtxtract',
#icon='resources/icon.ico',
debug=False,
strip=None,
upx=True,
console=True )
coll = COLLECT(exe,
a.binaries,
a.zipfiles,
a.datas,
strip=None,
upx=True,
name='evtxtract-dat')
================================================
FILE: setup.py
================================================
#!/usr/bin/env python
import os
import setuptools
# this sets __version__
# # via: http://stackoverflow.com/a/7071358/87207
# # and: http://stackoverflow.com/a/2073599/87207
with open(os.path.join("evtxtract", "version.py"), "rb") as f:
exec(f.read())
setuptools.setup(name="evtxtract",
version=__version__,
description="EVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unallocated space and memory images.",
author="Willi Ballenthin",
author_email="william.ballenthin@fireeye.com",
url="https://github.com/williballenthin/evtxtract",
license="Apache 2.0 License",
packages=setuptools.find_packages(),
entry_points={
"console_scripts": [
"evtxtract=evtxtract.main:main",
]
},
install_requires=[
'six',
'lxml',
'pytest',
'python-evtx>=0.5.2',
],
)
================================================
FILE: tests/.gitignore
================================================
*memoryevtx/file.None*
*.dat
*.vacb
================================================
FILE: tests/fixtures.py
================================================
import os
import pytest
import evtxtract.utils
CD = os.path.dirname(__file__)
IMAGE_PATH = os.path.join(CD, 'joshua1.vmem')
@pytest.fixture
def image(request):
if not os.path.exists(IMAGE_PATH):
raise RuntimeError('required image %s does not exist. see readme.' % (IMAGE_PATH))
return IMAGE_PATH
@pytest.fixture
def image_file(request):
with open(image(request), 'rb') as f:
yield f
@pytest.fixture
def image_mmap(request):
with evtxtract.utils.Mmap(image(request)) as mm:
yield mm
================================================
FILE: tests/readmd.txt
================================================
the tests require the image `joshua1.vmem` from:
- referenced: http://jessekornblum.livejournal.com/293291.html
- download: https://dl.dropboxusercontent.com/u/55819714/joshua1.zip
================================================
FILE: tests/test_all.py
================================================
import logging
import evtxtract
import evtxtract.carvers
from fixtures import *
#logging.basicConfig(level=logging.DEBUG)
#logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_find_chunks(image_mmap):
# these offsets were empirically collected from the test image
expected = set([
0xc7f000,
0xf0e000,
0x1374f20,
0x70cc000,
0xd727440,
0xdfe7000,
0x18851080,
0x1c31d000,
0x20b362c0,
0x276f8000,
0x2833e000,
0x28b4e000,
0x28b68000,
0x28d5e000,
0x28ead000,
0x2986e000,
0x2998c000,
0x29a9c000,
0x2ff30000,
0x2ffd0000,
0x3070f000,
0x30c1f000,
0x30c8f000,
0x30dbf000,
0x30f2f000,
0x30fff000,
0x3126f000,
0x328eac10,
0x34b75000,
0x38835000,
0x39981910,
0x39cc07a0,
0x3b91b000,
])
assert expected == set(evtxtract.carvers.find_evtx_chunks(image_mmap))
def first(s):
for x in s:
return x
def test_extract_records(image_mmap):
# these offsets were empirically collected from the test image
expected_offsets = set([
0xf0e200,
0x70cc200,
0x70cca30,
0x1c31d200,
0x1c31d858,
0x20b364c0,
0x20b36b80,
0x276f8200,
0x276f88c0,
0x29a9c200,
0x30dbf200,
0x30dbf8c8,
0x30dbfb68,
0x30dbfde8,
0x34b75200,
0x34b758a0,
0x3b91b200,
])
# these eids were empirically collected from the test image
expected_eids = set([
1,
2,
5,
21,
22,
100,
306,
823,
1001,
1002,
1006,
1009,
1020
])
found_offsets = set([])
found_eids = set([])
for chunk_offset in evtxtract.carvers.find_evtx_chunks(image_mmap):
for recovered_record in evtxtract.carvers.extract_chunk_records(image_mmap, chunk_offset):
found_offsets.add(recovered_record.offset)
found_eids.add(recovered_record.eid)
assert expected_offsets == found_offsets
assert expected_eids == found_eids
def test_extract_templates(image_mmap):
# these template ids were empirically collected from the test image
expected_ids = set([
"1-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]",
"2-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]",
"21-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|1|n]",
"22-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|1|n]",
"5-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|1|n]",
"100-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|1|n]",
"306-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]",
"823-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|1|n]-[19|1|n]-[20|20|n]-[21|1|n]",
"1001-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]",
"1002-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]",
"1006-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|13|n]-[19|13|n]",
"1009-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|8|n]",
"1020-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]",
])
found_ids = set([])
for chunk_offset in evtxtract.carvers.find_evtx_chunks(image_mmap):
for template in evtxtract.carvers.extract_chunk_templates(image_mmap, chunk_offset):
found_ids.add(template.get_id())
assert expected_ids == found_ids
def test_find_records(image_mmap):
records = list(evtxtract.carvers.find_evtx_records(image_mmap))
assert records[0] == 0x317198
assert records[-1] == 0x3D706A88
assert len(records) == 1674
def test_evtxtract(image_mmap):
num_complete = 0
num_incomplete = 0
for r in evtxtract.extract(image_mmap):
if isinstance(r, evtxtract.CompleteRecord):
num_complete += 1
elif isinstance(r, evtxtract.IncompleteRecord):
num_incomplete += 1
else:
raise RuntimeError('unexpected return type')
assert num_complete == 52
assert num_incomplete == 1615
gitextract_2o9c9iwa/
├── .gitignore
├── .travis.yml
├── LICENSE.TXT
├── README.md
├── evtxtract/
│ ├── __init__.py
│ ├── carvers.py
│ ├── main.py
│ ├── templates.py
│ ├── utils.py
│ └── version.py
├── evtxtract.spec
├── setup.py
└── tests/
├── .gitignore
├── fixtures.py
├── readmd.txt
└── test_all.py
SYMBOL INDEX (45 symbols across 7 files)
FILE: evtxtract/__init__.py
class CompleteRecord (line 14) | class CompleteRecord(object):
method __init__ (line 17) | def __init__(self, offset, eid, xml):
class IncompleteRecord (line 24) | class IncompleteRecord(object):
method __init__ (line 27) | def __init__(self, offset, eid, substitutions):
function extract (line 34) | def extract(buf):
FILE: evtxtract/carvers.py
class ParseError (line 27) | class ParseError(RuntimeError): pass
function is_chunk_header (line 30) | def is_chunk_header(buf, offset):
function find_evtx_chunks (line 76) | def find_evtx_chunks(buf):
function is_record (line 98) | def is_record(buf, offset):
function find_evtx_records (line 130) | def find_evtx_records(buf):
function extract_chunk_records (line 155) | def extract_chunk_records(buf, offset):
function extract_chunk_templates (line 195) | def extract_chunk_templates(buf, offset):
class MaxOffsetReached (line 242) | class MaxOffsetReached(Exception): pass
function does_root_have_resident_template (line 245) | def does_root_have_resident_template(buf, offset, max_offset):
function extract_root_substitutions (line 316) | def extract_root_substitutions(buf, offset, max_offset):
function extract_record (line 577) | def extract_record(buf, offset):
FILE: evtxtract/main.py
function output_record (line 14) | def output_record(args, r):
function format_incomplete_record (line 51) | def format_incomplete_record(record):
function main (line 72) | def main(argv=None):
FILE: evtxtract/templates.py
class Template (line 17) | class Template(object):
method __init__ (line 20) | def __init__(self, eid, xml):
method get_id (line 27) | def get_id(self):
method _get_placeholders (line 48) | def _get_placeholders(self):
method match_substitutions (line 67) | def match_substitutions(self, substitutions):
method _escape (line 119) | def _escape(value):
method insert_substitutions (line 130) | def insert_substitutions(self, substitutions):
function make_replacement (line 156) | def make_replacement(template, index, substitution):
function get_complete_template (line 174) | def get_complete_template(root, current_index=0):
function get_template (line 220) | def get_template(record):
FILE: evtxtract/utils.py
function to_lxml (line 9) | def to_lxml(record_xml):
function get_child (line 23) | def get_child(node, tag,
function get_eid (line 36) | def get_eid(record_xml):
class Mmap (line 53) | class Mmap(object):
method __init__ (line 58) | def __init__(self, filename):
method __enter__ (line 64) | def __enter__(self):
method __exit__ (line 69) | def __exit__(self, type, value, traceback):
FILE: tests/fixtures.py
function image (line 13) | def image(request):
function image_file (line 21) | def image_file(request):
function image_mmap (line 27) | def image_mmap(request):
FILE: tests/test_all.py
function test_find_chunks (line 14) | def test_find_chunks(image_mmap):
function first (line 55) | def first(s):
function test_extract_records (line 60) | def test_extract_records(image_mmap):
function test_extract_templates (line 110) | def test_extract_templates(image_mmap):
function test_find_records (line 136) | def test_find_records(image_mmap):
function test_evtxtract (line 143) | def test_evtxtract(image_mmap):
Condensed preview — 16 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (70K chars).
[
{
"path": ".gitignore",
"chars": 467,
"preview": "*.py[cod]\n\n# C extensions\n*.so\n\n# Packages\n*.egg\n*.egg-info\ndist\nbuild\neggs\nparts\nbin\nvar\nsdist\ndevelop-eggs\n.installed."
},
{
"path": ".travis.yml",
"chars": 3077,
"preview": "env:\n global:\n - secure: \"j89gGCxDhMdJ9vP/dUhu06XUqYMeqMjxIx8/s8KdVOhE0BxOddU7dIQE5SvcGYMoW+W4NV+7/Pio/eIkY3SU"
},
{
"path": "LICENSE.TXT",
"chars": 11358,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 7096,
"preview": "\nPurpose\n-------\nEVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unalloc"
},
{
"path": "evtxtract/__init__.py",
"chars": 3699,
"preview": "import logging\nimport collections\n\nimport evtxtract.utils\nimport evtxtract.carvers\nimport evtxtract.templates\n\n\nlogger ="
},
{
"path": "evtxtract/carvers.py",
"chars": 18730,
"preview": "import re\nimport struct\nimport logging\nimport binascii\nimport datetime\nimport xml.sax.saxutils\nfrom collections import n"
},
{
"path": "evtxtract/main.py",
"chars": 4499,
"preview": "import os\nimport sys\nimport logging\nimport os.path\nimport argparse\n\nimport evtxtract\nimport evtxtract.carvers\n\n\nlogger ="
},
{
"path": "evtxtract/templates.py",
"chars": 8107,
"preview": "import re\nimport sys\nimport logging\n\nimport six\nimport Evtx.Evtx\nimport Evtx.Nodes\nimport Evtx.Views\n\nimport evtxtract.u"
},
{
"path": "evtxtract/utils.py",
"chars": 1683,
"preview": "import mmap\nimport logging\nfrom lxml import etree\n\n\nlogger = logging.getLogger(__name__)\n\n\ndef to_lxml(record_xml):\n "
},
{
"path": "evtxtract/version.py",
"chars": 22,
"preview": "__version__ = '0.2.4'\n"
},
{
"path": "evtxtract.spec",
"chars": 1036,
"preview": "# -*- mode: python -*-\n\nblock_cipher = None\n\na = Analysis(\n ['evtxtract/main.py'],\n pathex=['evtxtract'],\n bi"
},
{
"path": "setup.py",
"chars": 943,
"preview": "#!/usr/bin/env python\n\nimport os\nimport setuptools\n\n\n# this sets __version__\n# # via: http://stackoverflow.com/a/7071358"
},
{
"path": "tests/.gitignore",
"chars": 36,
"preview": "*memoryevtx/file.None*\n*.dat\n*.vacb\n"
},
{
"path": "tests/fixtures.py",
"chars": 534,
"preview": "import os\n\nimport pytest\n\nimport evtxtract.utils\n\n\nCD = os.path.dirname(__file__)\nIMAGE_PATH = os.path.join(CD, 'joshua1"
},
{
"path": "tests/readmd.txt",
"chars": 185,
"preview": "the tests require the image `joshua1.vmem` from:\n - referenced: http://jessekornblum.livejournal.com/293291.html\n - do"
},
{
"path": "tests/test_all.py",
"chars": 5684,
"preview": "import logging\n\nimport evtxtract\nimport evtxtract.carvers\n\nfrom fixtures import *\n\n\n#logging.basicConfig(level=logging.D"
}
]
About this extraction
This page contains the full source code of the williballenthin/EVTXtract GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 16 files (65.6 KB), approximately 18.3k tokens, and a symbol index with 45 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.