Repository: williballenthin/EVTXtract
Branch: master
Commit: 0895be4c2512
Files: 16
Total size: 65.6 KB
Directory structure:
gitextract_2o9c9iwa/
├── .gitignore
├── .travis.yml
├── LICENSE.TXT
├── README.md
├── evtxtract/
│ ├── __init__.py
│ ├── carvers.py
│ ├── main.py
│ ├── templates.py
│ ├── utils.py
│ └── version.py
├── evtxtract.spec
├── setup.py
└── tests/
├── .gitignore
├── fixtures.py
├── readmd.txt
└── test_all.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.py[cod]
# C extensions
*.so
# Packages
*.egg
*.egg-info
dist
build
eggs
parts
bin
var
sdist
develop-eggs
.installed.cfg
lib
lib64
# Installer logs
pip-log.txt
# Unit test / coverage reports
.coverage
.tox
nosetests.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
<<<<<<< HEAD
extract_valid_evtx_records_and_templates.py~
find_evtx_chunks.py~
*~
*.evtx
workspace/*.xml
workspace/*.txt
.idea/*
.idea
*_templates.txt
*_chunks.txt
================================================
FILE: .travis.yml
================================================
env:
global:
- secure: "j89gGCxDhMdJ9vP/dUhu06XUqYMeqMjxIx8/s8KdVOhE0BxOddU7dIQE5SvcGYMoW+W4NV+7/Pio/eIkY3SUXGOLlPLMLwMDmvg9nA6HwrcSs6zPGreCYhqf7RlCNEyHHoWZ6syHjx1cEL2c1FyXLelQ8r5ONAzWsTeDn7ctnFcGzr7EDhKEjC9LGZchjRMYVrWkOruskSwnJYkPCstNqcwLh7qPgAXktTxx0YMPIr0sTbwLTnZRiNCE0egFSoT6QLBggrM3Nv0DbZO7luyFEgozgp99CACDdJMeMsKqgkedk0E+nz2BV26EpqjjyIRJVMiwXZVLexkB8vSw9PhCGY36REwMIhJz6KCzVQMZyoNkSbrMWBpa3LqdfZGyMxBtLlBw/Yvv+pzB+OnbCIjooy1nTfOlyLlk6QzESnzEW/A/DsFVEnNXQjoAOZEZjj6SlaHl+r/Uw6sXAP4FjP8umI++E9+MnuI0T1bevX8ZMdJ6Qz7gPWnIuOGJqkvFqC7MYt3SY4O4O0DS1pHXQGoQOgoRYExY2VRqeJSEnRTvbr8S5uVWOOz5PRb4psM6gnl6eueN8uNNmqj+BDaZp8qu/uLJ31zvAq0q1+rxxaqB5OkVebi65Q7cN6IjnpCTCK5risooGNZRGco2thnRiuysQ3kEQjaWH/f15cfY8YYnuTA="
- secure: "xe37YTz7uegptreK36MVYl8+c9FzMqVsNr+/WMPBiZSmQ2T9mHoEP7QWL3AA1JSi7q2A3qWKNK15fUpFmu+u7+/15lFYUqihPEMZTBmmUYx6/I6bxP6d/sdu+mro13kflDzqTOdmkDU9X/Olympc5kI8qqheH6OTqwgjU0ypl/V53/3mXTgOre4LYvy+p0nkYdFCzipp+stuZyvn6tag57nvdnH8j8OLLR886ZV7KtB7RlOeaVK0NbyZ5XFBHJL1GXwV01lDfMsMokDHgkDfvjKVo63p1rYFScOUx7BBwpnSM1zr1hpHli562wQbs2eS4F8oYWOzXxhcRmvwAUxE/iqQvNyis+PD42xlhhOP1ubKJazqc9/AlKIHLNIvfsAVuFLt1eGI/g5/K+cpNUEs38+CXpqy2vTng4bFF4IabpxKlZxqpuCniKiDs5WExmhp2/fwXpNhdQmlh/WM2Mv4+vD8XyardIWZzmeR9EVFRAe+cGoejM+seGNKyfYxdOnj0fqmT2IORK9UKrmqisa9eBUOOg5kLirbnhfbax2J/FAcvAaMS8c5ZQHMPdswyaOdSpoJPPHULpI3uoPvYDFJcbuuZWlE0tuD/Qm5/4ABOeUfInOPAWtOBKYFl+YYIWUzCMEy6QxzJpSXqqXtNlR5y1l4M+PQzVY463+DXK3XBXE="
- ARTIFACTS_BUCKET=build-artifacts.floss.flare.fireeye.com
language: python
matrix:
include:
- os: linux
sudo: required
python: 2.7
- os: linux
sudo: required
python: 3.5
# travis doesn't have py2.7 available, so we have to do it ourselves
# ref: https://github.com/travis-ci/travis-ci/issues/2312
- os: osx
language: generic
before_install:
# fix erroring OSX job because of rvm issue
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then rvm get stable --auto-dotfiles; fi
# travis doesn't have py2.7 available, so we have to do it ourselves
# ref: https://github.com/travis-ci/travis-ci/issues/2312
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then git clone https://github.com/MacPython/terryfy ../terryfy; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then source ../terryfy/travis_tools.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then get_python_environment macpython 2.7.10; fi
install:
- pip install pyinstaller pep8
- echo "__version__ = '$(git describe --tags)'" > evtxtract/version.py
- pip install -e .
- pyinstaller evtxtract.spec && rm -r './dist/evtxtract-dat/'
script:
- find . -name \*.py -exec pep8 --ignore=E501 {} \;
- pushd ./tests && wget "https://dl.dropboxusercontent.com/u/55819714/joshua1.zip" && unzip joshua1.zip && popd
- py.test tests/ -v
addons:
artifacts:
debug: true
paths:
- $(find . -type f | grep -e '/bin/' -e 'dist/evtxtract' | awk 1 ORS=':')
target_paths: travis/$TRAVIS_OS_NAME/
================================================
FILE: LICENSE.TXT
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
Purpose
-------
EVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unallocated space and memory images.
Quick Run
---------
Install EVTXtract via `pip`:
pip install evtxtract
Now the tool is ready to go!
C:/Python27/Scripts/evtxtract.exe Z:/evidence/1/image.dd > Z:/work/1/evtx.xml
Quicker Run
-----------
Download standalone executable nightly builds of EVTXtract here:
- [Linux](https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/linux/dist/evtxtract)
- [MacOS](https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/osx/dist/evtxtract)
Then you can do:
./evtxtract /path/to/evidence > /path/to/output.xml
Background
----------
EVTX records are XML fragments encoded using a Microsoft-specific binary XML representation.
Despite the convenient format, it is not easy to recover EVTX event log records from a corrupted file or unallocated space.
This is because the complete representation of a record often depends on other records found nearby.
The event log service recognizes similarities among records and refactors commonalities into "templates".
A template is a fixed structure with placeholders that reserve space for variable content.
The on-disk event log record structure is a reference to a template, and a list of substitutions (the variable content the replaces a placeholder in a template).
To decode a record into XML, the event log service resolves the template and replaces its placeholders with the entries of the substitution array.
Therefore, template corruption renders many records unrecoverable within the local 64KB "chunk".
However, the substitution array for the remaining records may still be intact.
If so, it may be possible to produce XML fragments that match the original records if the damaged template can be reconstructed.
For many common events, such as process creation or account logon, empirical testing demonstrates the relevant templates remain mostly constant.
In these cases, recovering event log records boils down to identifying appropriate templates found in other EVTX chunks.
Algorithm
---------
1. Scan for chunk signatures ("ElfChnk")
- check header for sane values (0x80 <= size <= 0x200)
- verify checksums (header, data)
2. Extract records from valid chunks found in (1)
3. Extract templates from valid chunks found in (1)
4. Scan for record signatures
- check header for sane values
- extract timestamp
- attempt to parse substitutions
- attempt to decode substitutions into EID, other fields
5. Reconstruct records by reusing old templates with recovered substitutions
Usage
-----
The EVTXtract is a pure Python script.
This means it easily runs on Windows, Linux, and MacOS.
Simply invoke the script, providing the path to a binary image, and EVTXtract writes its results to the standard out stream.
The binary file can be any data: a raw image, memory dump, etc.
Example command line:
C:/Python27/Scripts/evtxtract.exe Z:/evidence/1/image.dd > Z:/work/1/evtx.xml
Below are some example results from the above command.
It shows two records: a complete and incomplete record.
The first record is completely reconstructed,
and is formatted just like it would be in event viewer.
However, EVTXtract was unable to complete reconstruct the second record,
since some critical template data was missing.
So, its been formatted with as much data as was recovered.
EVTXtract uses a schema that allows you to continue processing despite incomplete data.
823
0
4
49
11
0x80000000000200
1
Microsoft-Windows-PrintService/Admin
JOSHUA
1
Microsoft XPS Document Writer,winspool,Ne00:
0x000000
spoolsv.exe
...
0x317198
1531
4
4
4
0
6
0
6
1531
0
21
0x8000000000000000
17
2013-03-23 02:02:35.679552
0
8
928
8
1040
10
132
4
0
19
S-1-5-18
0
1
Microsoft-Windows-User Profiles Service
15
0001010f-010c-77e3-bf2f-3ef300001200
1
Application
================================================
FILE: evtxtract/__init__.py
================================================
import logging
import collections
import evtxtract.utils
import evtxtract.carvers
import evtxtract.templates
logger = logging.getLogger(__name__)
VALUE = 1
class CompleteRecord(object):
__slots__ = ('offset', 'eid', 'xml')
def __init__(self, offset, eid, xml):
super(CompleteRecord, self).__init__()
self.offset = offset
self.eid = eid
self.xml = xml
class IncompleteRecord(object):
__slots__ = ('offset', 'eid', 'substitutions')
def __init__(self, offset, eid, substitutions):
super(IncompleteRecord, self).__init__()
self.offset = offset
self.eid = eid
self.substitutions = substitutions
def extract(buf):
'''
Do the EVTXtract algorithm and reconstruct EVTX records from the given data.
Args:
buf (buffer): the binary data from which to extract structures.
Returns:
iterable[union[CompleteRecord, IncompleteRecord]]: a generator of either
CompleteRecord or IncompleteRecord. You'll have to type-switch of these
classes to decide out how to handle them.
'''
# this does a full scan of the file (#1)
chunks = set(evtxtract.carvers.find_evtx_chunks(buf))
valid_record_offsets = set([])
for chunk in chunks:
for record in evtxtract.carvers.extract_chunk_records(buf, chunk):
valid_record_offsets.add(record.offset)
yield CompleteRecord(record.offset, record.eid, record.xml)
# map from eid to dictionary mapping from templateid to template
templates = collections.defaultdict(dict)
for chunk in chunks:
for template in evtxtract.carvers.extract_chunk_templates(buf, chunk):
templates[template.eid][template.get_id()] = template
# this does a full scan of the file (#2).
# needs to be distinct because we must have collected all the templates
# first.
for record_offset in evtxtract.carvers.find_evtx_records(buf):
if record_offset in valid_record_offsets:
continue
try:
record = evtxtract.carvers.extract_record(buf, record_offset)
except evtxtract.carvers.ParseError as e:
logger.info('parse error for record at offset: 0x%x: %s', record_offset, str(e))
continue
except ValueError as e:
logger.info('timestamp parse error for record at offset: 0x%x: %s', record_offset, str(e))
continue
except Exception as e:
logger.info('unknown parse error for record at offset: 0x%x: %s', record_offset, str(e))
continue
if len(record.substitutions) < 4:
logger.info('too few substitutions for record at offset: 0x%x', record_offset)
continue
# we just know that the EID is substitution index 3
eid = record.substitutions[3][VALUE]
matching_templates = set([])
for template in templates.get(eid, {}).values():
if template.match_substitutions(record.substitutions):
matching_templates.add(template)
if len(matching_templates) == 0:
logger.info('no matching templates for record at offset: 0x%x', record_offset)
yield IncompleteRecord(record_offset, eid, record.substitutions)
continue
if len(matching_templates) > 1:
logger.info('too many templates for record at offset: 0x%x', record_offset)
yield IncompleteRecord(record_offset, eid, record.substitutions)
continue
template = list(matching_templates)[0]
record_xml = template.insert_substitutions(record.substitutions)
yield CompleteRecord(record_offset, eid, record_xml)
================================================
FILE: evtxtract/carvers.py
================================================
import re
import struct
import logging
import binascii
import datetime
import xml.sax.saxutils
from collections import namedtuple
import six
import Evtx.Evtx
import Evtx.Views
import evtxtract.templates
logger = logging.getLogger(__name__)
# TODO: this should be part of python-evtx
EVTX_HEADER_MAGIC = b"ElfChnk"
EVTX_RECORD_MAGIC = b"\x2a\x2a\x00\x00"
CHUNK_SIZE = 0x10000
MIN_CHUNK_HEADER_SIZE = 0x80
MAX_CHUNK_HEADER_SIZE = 0x200
class ParseError(RuntimeError): pass
def is_chunk_header(buf, offset):
"""
Return True if the offset appears to be an EVTX Chunk header.
Implementation note: Simply checks the magic header and size field for reasonable values.
Args:
buf (buffer): the binary data from which to extract structures.
offset (int): the address of the potential EVTX chunk header.
Returns:
bool: if the offset appears to be an EVTX chunk header.
"""
if len(buf) < offset + 0x2C:
# our accesses below will overflow
return False
magic = struct.unpack_from("<7s", buf, offset)[0]
if magic != EVTX_HEADER_MAGIC:
return False
size = struct.unpack_from(" 100:
return True
ofs += 4 # template_id or size
if max_offset < ofs + 4 + (4 * min(maybe_num_subs or 2, 4)):
return False
for i in range(min(maybe_num_subs or 2, 4)):
byte = struct.unpack_from(" 100:
raise ParseError("Unexpected number of substitutions: %d at %s" %
(num_subs, hex(ofs)))
ofs += 4 # begin sub list
substitutions = []
for _ in range(num_subs):
size, type_ = struct.unpack_from(" max_offset:
raise MaxOffsetReached("Substitutions overran record buffer.")
value = None
#[0] = parse_null_type_node,
if type_ == 0x0:
value = None
ret.append((type_, value))
#[1] = parse_wstring_type_node,
elif type_ == 0x1:
s = buf[ofs:ofs + size]
s = s.decode('utf-16le')
s = xml.sax.saxutils.escape(s)
value = s
ret.append((type_, value))
#[2] = parse_string_type_node,
elif type_ == 0x2:
s = buf[ofs:ofs + size]
s = s.decode('ascii')
s = xml.sax.saxutils.escape(s)
value = s
ret.append((type_, value))
#[3] = parse_signed_byte_type_node,
elif type_ == 0x3:
value = struct.unpack_from(" 1
ret.append((type_, value))
#[14] = parse_binary_type_node,
elif type_ == 0xE:
value = binascii.hexlify(buf[ofs:ofs + size])
ret.append((type_, value))
#[15] = parse_guid_type_node,
elif type_ == 0xF:
_bin = buf[offset:offset + 16]
# Yeah, this is ugly
h = [six.indexbytes(_bin, i) for i in range(len(_bin))]
value = """{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}""".format(
h[3], h[2], h[1], h[0],
h[5], h[4],
h[7], h[6],
h[8], h[9],
h[10], h[11], h[12], h[13], h[14], h[15])
ret.append((type_, value))
#[16] = parse_size_type_node,
elif type_ == 0x10:
if size == 0x4:
value = struct.unpack_from("IH", buf, ofs + 2)
value = "S-%d-%d" % (version, (id_high << 16) ^ id_low)
for i in range(num_elements):
val = struct.unpack_from(" 0:
match = re.search(b"((?:[^\x00].)+)", bin)
if match:
frag = match.group()
s = frag.decode("utf-16")
s = xml.sax.saxutils.escape(s)
value.append(s)
bin = bin[len(frag) + 2:]
if len(bin) == 0:
break
frag = re.search(b"(\x00*)", bin).group()
if len(frag) % 2 == 0:
for _ in range(len(frag) // 2):
value.append('')
else:
raise ParseError("Error parsing uneven substring of NULLs")
bin = bin[len(frag):]
if value[-1].strip("\x00") == "":
value = value[:-1]
ret.append((type_, value))
else:
raise ParseError("Unexpected type encountered: " + hex(type_))
ofs += size
return ret
ExtractedRecord = namedtuple(
'ExtractedRecord', ['offset', 'num', 'timestamp', 'substitutions'])
def extract_record(buf, offset):
"""
Parse an EVTX record into a convenient dictionary of fields.
Args:
buf (buffer): the binary data from which to extract structures.
offset (int): address of the EVTX record.
Returns:
ExtractedRecord: the thing you asked for.
Raises:
ParseError: for various reasons, including invalid timestamps and overruns.
"""
if not is_record(buf, offset):
raise ValueError('not a record')
record_size, record_num, qword = struct.unpack_from("')
ret.append('0x%x' % (record.offset))
ret.append('%d' % (record.eid))
ret.append('')
for i, (type_, value) in enumerate(record.substitutions):
ret.append(' ' % (i))
ret.append(' %d' % (type_))
if value is None:
ret.append(' ')
else:
ret.append(' %s' % (value))
ret.append(' ')
ret.append('')
ret.append('')
return '\n'.join(ret)
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(
description="Reconstruct EVTX event log records from binary data.")
parser.add_argument("input", type=str,
help="Path to binary input file")
parser.add_argument("-v", "--verbose", action="store_true",
help="Enable debug logging")
parser.add_argument("-q", "--quiet", action="store_true",
help="Disable all output but errors")
parser.add_argument("-s", "--split", action="store_true",
help="split each event into its own file")
parser.add_argument("-o", "--out", metavar='output-directory', action="store",
help="output directory to store split files")
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
elif args.quiet:
logging.basicConfig(level=logging.ERROR)
else:
logging.basicConfig(level=logging.INFO)
if args.split and not args.out:
logger.error('Error: the -o argument is required when using -s. please provide an output directory with -o')
exit(1)
if args.out and not os.path.isdir(args.out):
logger.error('Error: {0} is not a directory'.format(args.out))
exit(1)
with evtxtract.utils.Mmap(args.input) as mm:
num_complete = 0
num_incomplete = 0
if not args.split:
print('')
print('')
for r in evtxtract.extract(mm):
output_record(args, r)
if isinstance(r, evtxtract.CompleteRecord):
num_complete += 1
elif isinstance(r, evtxtract.IncompleteRecord):
num_incomplete += 1
else:
raise RuntimeError('unexpected return type')
if not args.split:
print('')
logging.info('recovered %d complete records', num_complete)
logging.info('recovered %d incomplete records', num_incomplete)
if __name__ == "__main__":
sys.exit(main())
================================================
FILE: evtxtract/templates.py
================================================
import re
import sys
import logging
import six
import Evtx.Evtx
import Evtx.Nodes
import Evtx.Views
import evtxtract.utils
import evtxtract.templates
logger = logging.getLogger(__name__)
class Template(object):
substitition_re = re.compile("\[(Conditional|Normal) Substitution\(index=(\d+), type=(\d+)\)\]")
def __init__(self, eid, xml):
self.eid = eid
self.xml = xml
self._cached_placeholders = None
self._cached_id = None
def get_id(self):
"""
@rtype: str
@return: A string that can be parsed into constraints describing what
types of subsitutions this template can accept.
Short example: 1100-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]
"""
if self._cached_id is not None:
return self._cached_id
ret = [str(self.eid)]
for index, type_, mode in self._get_placeholders():
if mode:
mode_str = "c"
else:
mode_str = "n"
ret.append("[%s|%s|%s]" % (index, type_, mode_str))
self._cached_id = "-".join(ret)
return self._cached_id
def _get_placeholders(self):
"""
Get descriptors for each of the substitutions required by this
template.
Tuple schema: (index, type, is_conditional)
@rtype: list of (int, int, boolean)
"""
if self._cached_placeholders is not None:
return self._cached_placeholders
ret = []
for mode, index, type_ in Template.substitition_re.findall(self.xml):
ret.append((int(index), int(type_), mode == "Conditional"))
self._cached_placeholders = sorted(ret, key=lambda p: p[0])
return self._cached_placeholders
def match_substitutions(self, substitutions):
"""
Checks to see if the provided set of substitutions match the
placeholder values required by this template.
Note, this is only a best guess. The number of substitutions
*may* be greater than the number of available slots. So we
must only check the slot and substitution types.
@type substitutions: list of (int, str)
@param substitutions: Tuple schema (type, value)
@rtype: boolean
"""
logger = logging.getLogger("match_substitutions")
placeholders = self._get_placeholders()
logger.debug("Substitutions: %s", str(substitutions))
logger.debug("Constraints: %s", str(placeholders))
if len(placeholders) > len(substitutions):
logger.debug("Failing on lens: %d vs %d",
len(placeholders), len(substitutions))
return False
if max(placeholders, key=lambda k: k[0])[0] > len(substitutions):
logger.debug("Failing on max index: %d vs %d",
max(placeholders, key=lambda k: k[0])[0],
len(substitutions))
return False
# it seems that some templates request different values than what are subsequently put in them
# specifically, a Hex64 might be put into a SizeType field (EID 4624)
# this maps from the type described in a template, to possible additional types that a
# record can provide for a particular substitution
overrides = {
16: set([21])
}
for index, type_, is_conditional in placeholders:
sub_type, sub_value = substitutions[index]
if is_conditional and sub_type == 0:
continue
if sub_type != type_:
if type_ not in overrides or sub_type not in overrides[type_]:
logger.debug("Failing on type comparison, index %d: %d vs %d (mode: %s)",
index, sub_type, type_, is_conditional)
return False
else:
logger.debug("Overriding template type %d with substitution type %d", type_, sub_type)
continue
return True
escape_re = re.compile(r"\\\\(\d)")
@staticmethod
def _escape(value):
"""
Escape the static value to be used in a regular expression
subsititution. This processes any backreferences and
makes them plain, escaped sequences.
@type value: str
@rtype: str
"""
return Template.escape_re.sub(r"\\\\\\\\\1", re.escape(value))
def insert_substitutions(self, substitutions):
"""
Return a copy of the template with the given substitutions inserted.
@type substitutions: list of (int, str)
@param substitutions: an ordered list of (type:int, value:str)
@rtype: str
"""
ret = self.xml
for index, pair in enumerate(substitutions):
type_, value = pair
from_pattern = "\[(Normal|Conditional) Substitution\(index=%d, type=\d+\)\]" % index
if isinstance(value, six.string_types):
value = Template._escape(value)
else:
value = str(value)
ret = re.sub(from_pattern, value, ret)
return ret
REPLACEMENT_PATTERNS = {
i: re.compile(
"\[(Normal|Conditional) Substitution\(index=%d, type=\d+\)\]" % i)
for i in range(35)}
def make_replacement(template, index, substitution):
"""
Makes a substitution given a template as a string.
Implementation is a huge hack that depends on the
brittle template_format() output.
@type template: str
@type index: int
@type substitution: str
@rtype: str
"""
if index not in REPLACEMENT_PATTERNS:
from_pattern = re.compile("\[(Normal|Conditional) Substitution\(index=%d, type=\d+\)\]" % index)
REPLACEMENT_PATTERNS[index] = from_pattern
return REPLACEMENT_PATTERNS[index].sub(substitution, template)
def get_complete_template(root, current_index=0):
"""
Gets the template from a RootNode while resolving any
nested templates and fixing up their indices.
Depth first ordering/indexing.
Implementation is a huge hack that depends on the
brittle template_format() output.
@type root: RootNode
@type current_index: int
@rtype: str
"""
template = Evtx.Views.evtx_template_readable_view(root) # TODO(wb): make sure this is working
# walk through each substitution.
# if its a normal node, continue
# else its a subtemplate, and we count the number of substitutions _it_ has
# so that we can later fixup all the indices
replacements = []
for index, substitution in enumerate(root.substitutions()):
# find all sub-templates
if not isinstance(substitution, Evtx.Nodes.BXmlTypeNode):
replacements.append(current_index + index)
continue
# TODO(wb): hack here accessing ._root
subtemplate = get_complete_template(substitution._root,
current_index=current_index + index)
replacements.append(subtemplate)
current_index += subtemplate.count("Substitution(index=")
replacements.reverse()
# now walk through all the indices and fix them up depth-first
for i, replacement in enumerate(replacements):
index = len(replacements) - i - 1
if isinstance(replacement, int):
# fixup index
from_pattern = "index=%d," % index
to_pattern = "index=%d," % replacement
template = template.replace(from_pattern, to_pattern)
if isinstance(replacement, six.string_types):
# insert sub-template
template = make_replacement(template, index, replacement)
return template
def get_template(record):
"""
Given a complete Record, parse out the nodes that make up the Template
and return it as a Template.
@type record: Record
@rtype: Template
"""
record_xml = Evtx.Views.evtx_record_xml_view(record)
eid = evtxtract.utils.get_eid(record_xml)
return Template(eid, get_complete_template(record.root()))
================================================
FILE: evtxtract/utils.py
================================================
import mmap
import logging
from lxml import etree
logger = logging.getLogger(__name__)
def to_lxml(record_xml):
"""
Convert an XML string to an Etree element.
@type record_xml: str
@rtype: etree.Element
"""
if "%s" % record_xml)
else:
return etree.fromstring(record_xml)
def get_child(node, tag,
ns="{http://schemas.microsoft.com/win/2004/08/events/event}"):
"""
Given an Etree element, get the first child node with the given tag.
@type node: etree.Element
@type tag: str
@type ns: str
@rtype: etree.Element or None
"""
return node.find("%s%s" % (ns, tag))
def get_eid(record_xml):
"""
Given EVTX record XML, return the EID of the record.
Args:
record_xml (str)
Returns:
int: the event ID of the record
"""
return int(
get_child(
get_child(to_lxml(record_xml),
"System"),
"EventID").text)
class Mmap(object):
"""
Convenience class for opening a read-only memory map for a file path.
"""
def __init__(self, filename):
super(Mmap, self).__init__()
self._filename = filename
self._f = None
self._mmap = None
def __enter__(self):
self._f = open(self._filename, "rb")
self._mmap = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ)
return self._mmap
def __exit__(self, type, value, traceback):
if self._mmap:
self._mmap.close()
if self._f :
self._f.close()
================================================
FILE: evtxtract/version.py
================================================
__version__ = '0.2.4'
================================================
FILE: evtxtract.spec
================================================
# -*- mode: python -*-
block_cipher = None
a = Analysis(
['evtxtract/main.py'],
pathex=['evtxtract'],
binaries=None,
datas=None,
hiddenimports=[],
hookspath=None,
runtime_hooks=None,
excludes=["tkinter", "_tkinter", "Tkinter"],
win_no_prefer_redirects=None,
win_private_assemblies=None,
cipher=None)
a.binaries = a.binaries - TOC([
('sqlite3.dll', None, None),
('tcl85.dll', None, None),
('tk85.dll', None, None),
('_sqlite3', None, None),
('_ssl', None, None),
('_tkinter', None, None)])
pyz = PYZ(a.pure, a.zipped_data, cipher=None)
exe = EXE(pyz,
a.scripts,
a.binaries,
exclude_binaries=False,
name='evtxtract',
#icon='resources/icon.ico',
debug=False,
strip=None,
upx=True,
console=True )
coll = COLLECT(exe,
a.binaries,
a.zipfiles,
a.datas,
strip=None,
upx=True,
name='evtxtract-dat')
================================================
FILE: setup.py
================================================
#!/usr/bin/env python
import os
import setuptools
# this sets __version__
# # via: http://stackoverflow.com/a/7071358/87207
# # and: http://stackoverflow.com/a/2073599/87207
with open(os.path.join("evtxtract", "version.py"), "rb") as f:
exec(f.read())
setuptools.setup(name="evtxtract",
version=__version__,
description="EVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unallocated space and memory images.",
author="Willi Ballenthin",
author_email="william.ballenthin@fireeye.com",
url="https://github.com/williballenthin/evtxtract",
license="Apache 2.0 License",
packages=setuptools.find_packages(),
entry_points={
"console_scripts": [
"evtxtract=evtxtract.main:main",
]
},
install_requires=[
'six',
'lxml',
'pytest',
'python-evtx>=0.5.2',
],
)
================================================
FILE: tests/.gitignore
================================================
*memoryevtx/file.None*
*.dat
*.vacb
================================================
FILE: tests/fixtures.py
================================================
import os
import pytest
import evtxtract.utils
CD = os.path.dirname(__file__)
IMAGE_PATH = os.path.join(CD, 'joshua1.vmem')
@pytest.fixture
def image(request):
if not os.path.exists(IMAGE_PATH):
raise RuntimeError('required image %s does not exist. see readme.' % (IMAGE_PATH))
return IMAGE_PATH
@pytest.fixture
def image_file(request):
with open(image(request), 'rb') as f:
yield f
@pytest.fixture
def image_mmap(request):
with evtxtract.utils.Mmap(image(request)) as mm:
yield mm
================================================
FILE: tests/readmd.txt
================================================
the tests require the image `joshua1.vmem` from:
- referenced: http://jessekornblum.livejournal.com/293291.html
- download: https://dl.dropboxusercontent.com/u/55819714/joshua1.zip
================================================
FILE: tests/test_all.py
================================================
import logging
import evtxtract
import evtxtract.carvers
from fixtures import *
#logging.basicConfig(level=logging.DEBUG)
#logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_find_chunks(image_mmap):
# these offsets were empirically collected from the test image
expected = set([
0xc7f000,
0xf0e000,
0x1374f20,
0x70cc000,
0xd727440,
0xdfe7000,
0x18851080,
0x1c31d000,
0x20b362c0,
0x276f8000,
0x2833e000,
0x28b4e000,
0x28b68000,
0x28d5e000,
0x28ead000,
0x2986e000,
0x2998c000,
0x29a9c000,
0x2ff30000,
0x2ffd0000,
0x3070f000,
0x30c1f000,
0x30c8f000,
0x30dbf000,
0x30f2f000,
0x30fff000,
0x3126f000,
0x328eac10,
0x34b75000,
0x38835000,
0x39981910,
0x39cc07a0,
0x3b91b000,
])
assert expected == set(evtxtract.carvers.find_evtx_chunks(image_mmap))
def first(s):
for x in s:
return x
def test_extract_records(image_mmap):
# these offsets were empirically collected from the test image
expected_offsets = set([
0xf0e200,
0x70cc200,
0x70cca30,
0x1c31d200,
0x1c31d858,
0x20b364c0,
0x20b36b80,
0x276f8200,
0x276f88c0,
0x29a9c200,
0x30dbf200,
0x30dbf8c8,
0x30dbfb68,
0x30dbfde8,
0x34b75200,
0x34b758a0,
0x3b91b200,
])
# these eids were empirically collected from the test image
expected_eids = set([
1,
2,
5,
21,
22,
100,
306,
823,
1001,
1002,
1006,
1009,
1020
])
found_offsets = set([])
found_eids = set([])
for chunk_offset in evtxtract.carvers.find_evtx_chunks(image_mmap):
for recovered_record in evtxtract.carvers.extract_chunk_records(image_mmap, chunk_offset):
found_offsets.add(recovered_record.offset)
found_eids.add(recovered_record.eid)
assert expected_offsets == found_offsets
assert expected_eids == found_eids
def test_extract_templates(image_mmap):
# these template ids were empirically collected from the test image
expected_ids = set([
"1-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]",
"2-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]",
"21-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|1|n]",
"22-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|1|n]",
"5-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|1|n]",
"100-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|1|n]",
"306-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]",
"823-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|1|n]-[19|1|n]-[20|20|n]-[21|1|n]",
"1001-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]",
"1002-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]",
"1006-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|13|n]-[19|13|n]",
"1009-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|8|n]",
"1020-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]",
])
found_ids = set([])
for chunk_offset in evtxtract.carvers.find_evtx_chunks(image_mmap):
for template in evtxtract.carvers.extract_chunk_templates(image_mmap, chunk_offset):
found_ids.add(template.get_id())
assert expected_ids == found_ids
def test_find_records(image_mmap):
records = list(evtxtract.carvers.find_evtx_records(image_mmap))
assert records[0] == 0x317198
assert records[-1] == 0x3D706A88
assert len(records) == 1674
def test_evtxtract(image_mmap):
num_complete = 0
num_incomplete = 0
for r in evtxtract.extract(image_mmap):
if isinstance(r, evtxtract.CompleteRecord):
num_complete += 1
elif isinstance(r, evtxtract.IncompleteRecord):
num_incomplete += 1
else:
raise RuntimeError('unexpected return type')
assert num_complete == 52
assert num_incomplete == 1615