Repository: williballenthin/EVTXtract Branch: master Commit: 0895be4c2512 Files: 16 Total size: 65.6 KB Directory structure: gitextract_2o9c9iwa/ ├── .gitignore ├── .travis.yml ├── LICENSE.TXT ├── README.md ├── evtxtract/ │ ├── __init__.py │ ├── carvers.py │ ├── main.py │ ├── templates.py │ ├── utils.py │ └── version.py ├── evtxtract.spec ├── setup.py └── tests/ ├── .gitignore ├── fixtures.py ├── readmd.txt └── test_all.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.py[cod] # C extensions *.so # Packages *.egg *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox nosetests.xml # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject <<<<<<< HEAD extract_valid_evtx_records_and_templates.py~ find_evtx_chunks.py~ *~ *.evtx workspace/*.xml workspace/*.txt .idea/* .idea *_templates.txt *_chunks.txt ================================================ FILE: .travis.yml ================================================ env: global: - secure: "j89gGCxDhMdJ9vP/dUhu06XUqYMeqMjxIx8/s8KdVOhE0BxOddU7dIQE5SvcGYMoW+W4NV+7/Pio/eIkY3SUXGOLlPLMLwMDmvg9nA6HwrcSs6zPGreCYhqf7RlCNEyHHoWZ6syHjx1cEL2c1FyXLelQ8r5ONAzWsTeDn7ctnFcGzr7EDhKEjC9LGZchjRMYVrWkOruskSwnJYkPCstNqcwLh7qPgAXktTxx0YMPIr0sTbwLTnZRiNCE0egFSoT6QLBggrM3Nv0DbZO7luyFEgozgp99CACDdJMeMsKqgkedk0E+nz2BV26EpqjjyIRJVMiwXZVLexkB8vSw9PhCGY36REwMIhJz6KCzVQMZyoNkSbrMWBpa3LqdfZGyMxBtLlBw/Yvv+pzB+OnbCIjooy1nTfOlyLlk6QzESnzEW/A/DsFVEnNXQjoAOZEZjj6SlaHl+r/Uw6sXAP4FjP8umI++E9+MnuI0T1bevX8ZMdJ6Qz7gPWnIuOGJqkvFqC7MYt3SY4O4O0DS1pHXQGoQOgoRYExY2VRqeJSEnRTvbr8S5uVWOOz5PRb4psM6gnl6eueN8uNNmqj+BDaZp8qu/uLJ31zvAq0q1+rxxaqB5OkVebi65Q7cN6IjnpCTCK5risooGNZRGco2thnRiuysQ3kEQjaWH/f15cfY8YYnuTA=" - secure: "xe37YTz7uegptreK36MVYl8+c9FzMqVsNr+/WMPBiZSmQ2T9mHoEP7QWL3AA1JSi7q2A3qWKNK15fUpFmu+u7+/15lFYUqihPEMZTBmmUYx6/I6bxP6d/sdu+mro13kflDzqTOdmkDU9X/Olympc5kI8qqheH6OTqwgjU0ypl/V53/3mXTgOre4LYvy+p0nkYdFCzipp+stuZyvn6tag57nvdnH8j8OLLR886ZV7KtB7RlOeaVK0NbyZ5XFBHJL1GXwV01lDfMsMokDHgkDfvjKVo63p1rYFScOUx7BBwpnSM1zr1hpHli562wQbs2eS4F8oYWOzXxhcRmvwAUxE/iqQvNyis+PD42xlhhOP1ubKJazqc9/AlKIHLNIvfsAVuFLt1eGI/g5/K+cpNUEs38+CXpqy2vTng4bFF4IabpxKlZxqpuCniKiDs5WExmhp2/fwXpNhdQmlh/WM2Mv4+vD8XyardIWZzmeR9EVFRAe+cGoejM+seGNKyfYxdOnj0fqmT2IORK9UKrmqisa9eBUOOg5kLirbnhfbax2J/FAcvAaMS8c5ZQHMPdswyaOdSpoJPPHULpI3uoPvYDFJcbuuZWlE0tuD/Qm5/4ABOeUfInOPAWtOBKYFl+YYIWUzCMEy6QxzJpSXqqXtNlR5y1l4M+PQzVY463+DXK3XBXE=" - ARTIFACTS_BUCKET=build-artifacts.floss.flare.fireeye.com language: python matrix: include: - os: linux sudo: required python: 2.7 - os: linux sudo: required python: 3.5 # travis doesn't have py2.7 available, so we have to do it ourselves # ref: https://github.com/travis-ci/travis-ci/issues/2312 - os: osx language: generic before_install: # fix erroring OSX job because of rvm issue - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then rvm get stable --auto-dotfiles; fi # travis doesn't have py2.7 available, so we have to do it ourselves # ref: https://github.com/travis-ci/travis-ci/issues/2312 - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then git clone https://github.com/MacPython/terryfy ../terryfy; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then source ../terryfy/travis_tools.sh; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then get_python_environment macpython 2.7.10; fi install: - pip install pyinstaller pep8 - echo "__version__ = '$(git describe --tags)'" > evtxtract/version.py - pip install -e . - pyinstaller evtxtract.spec && rm -r './dist/evtxtract-dat/' script: - find . -name \*.py -exec pep8 --ignore=E501 {} \; - pushd ./tests && wget "https://dl.dropboxusercontent.com/u/55819714/joshua1.zip" && unzip joshua1.zip && popd - py.test tests/ -v addons: artifacts: debug: true paths: - $(find . -type f | grep -e '/bin/' -e 'dist/evtxtract' | awk 1 ORS=':') target_paths: travis/$TRAVIS_OS_NAME/ ================================================ FILE: LICENSE.TXT ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ Purpose ------- EVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unallocated space and memory images. Quick Run --------- Install EVTXtract via `pip`: pip install evtxtract Now the tool is ready to go! C:/Python27/Scripts/evtxtract.exe Z:/evidence/1/image.dd > Z:/work/1/evtx.xml Quicker Run ----------- Download standalone executable nightly builds of EVTXtract here: - [Linux](https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/linux/dist/evtxtract) - [MacOS](https://s3.amazonaws.com/build-artifacts.floss.flare.fireeye.com/travis/osx/dist/evtxtract) Then you can do: ./evtxtract /path/to/evidence > /path/to/output.xml Background ---------- EVTX records are XML fragments encoded using a Microsoft-specific binary XML representation. Despite the convenient format, it is not easy to recover EVTX event log records from a corrupted file or unallocated space. This is because the complete representation of a record often depends on other records found nearby. The event log service recognizes similarities among records and refactors commonalities into "templates". A template is a fixed structure with placeholders that reserve space for variable content. The on-disk event log record structure is a reference to a template, and a list of substitutions (the variable content the replaces a placeholder in a template). To decode a record into XML, the event log service resolves the template and replaces its placeholders with the entries of the substitution array. Therefore, template corruption renders many records unrecoverable within the local 64KB "chunk". However, the substitution array for the remaining records may still be intact. If so, it may be possible to produce XML fragments that match the original records if the damaged template can be reconstructed. For many common events, such as process creation or account logon, empirical testing demonstrates the relevant templates remain mostly constant. In these cases, recovering event log records boils down to identifying appropriate templates found in other EVTX chunks. Algorithm --------- 1. Scan for chunk signatures ("ElfChnk") - check header for sane values (0x80 <= size <= 0x200) - verify checksums (header, data) 2. Extract records from valid chunks found in (1) 3. Extract templates from valid chunks found in (1) 4. Scan for record signatures - check header for sane values - extract timestamp - attempt to parse substitutions - attempt to decode substitutions into EID, other fields 5. Reconstruct records by reusing old templates with recovered substitutions Usage ----- The EVTXtract is a pure Python script. This means it easily runs on Windows, Linux, and MacOS. Simply invoke the script, providing the path to a binary image, and EVTXtract writes its results to the standard out stream. The binary file can be any data: a raw image, memory dump, etc. Example command line: C:/Python27/Scripts/evtxtract.exe Z:/evidence/1/image.dd > Z:/work/1/evtx.xml Below are some example results from the above command. It shows two records: a complete and incomplete record. The first record is completely reconstructed, and is formatted just like it would be in event viewer. However, EVTXtract was unable to complete reconstruct the second record, since some critical template data was missing. So, its been formatted with as much data as was recovered. EVTXtract uses a schema that allows you to continue processing despite incomplete data. 823 0 4 49 11 0x80000000000200 1 Microsoft-Windows-PrintService/Admin JOSHUA 1 Microsoft XPS Document Writer,winspool,Ne00: 0x000000 spoolsv.exe ... 0x317198 1531 4 4 4 0 6 0 6 1531 0 21 0x8000000000000000 17 2013-03-23 02:02:35.679552 0 8 928 8 1040 10 132 4 0 19 S-1-5-18 0 1 Microsoft-Windows-User Profiles Service 15 0001010f-010c-77e3-bf2f-3ef300001200 1 Application ================================================ FILE: evtxtract/__init__.py ================================================ import logging import collections import evtxtract.utils import evtxtract.carvers import evtxtract.templates logger = logging.getLogger(__name__) VALUE = 1 class CompleteRecord(object): __slots__ = ('offset', 'eid', 'xml') def __init__(self, offset, eid, xml): super(CompleteRecord, self).__init__() self.offset = offset self.eid = eid self.xml = xml class IncompleteRecord(object): __slots__ = ('offset', 'eid', 'substitutions') def __init__(self, offset, eid, substitutions): super(IncompleteRecord, self).__init__() self.offset = offset self.eid = eid self.substitutions = substitutions def extract(buf): ''' Do the EVTXtract algorithm and reconstruct EVTX records from the given data. Args: buf (buffer): the binary data from which to extract structures. Returns: iterable[union[CompleteRecord, IncompleteRecord]]: a generator of either CompleteRecord or IncompleteRecord. You'll have to type-switch of these classes to decide out how to handle them. ''' # this does a full scan of the file (#1) chunks = set(evtxtract.carvers.find_evtx_chunks(buf)) valid_record_offsets = set([]) for chunk in chunks: for record in evtxtract.carvers.extract_chunk_records(buf, chunk): valid_record_offsets.add(record.offset) yield CompleteRecord(record.offset, record.eid, record.xml) # map from eid to dictionary mapping from templateid to template templates = collections.defaultdict(dict) for chunk in chunks: for template in evtxtract.carvers.extract_chunk_templates(buf, chunk): templates[template.eid][template.get_id()] = template # this does a full scan of the file (#2). # needs to be distinct because we must have collected all the templates # first. for record_offset in evtxtract.carvers.find_evtx_records(buf): if record_offset in valid_record_offsets: continue try: record = evtxtract.carvers.extract_record(buf, record_offset) except evtxtract.carvers.ParseError as e: logger.info('parse error for record at offset: 0x%x: %s', record_offset, str(e)) continue except ValueError as e: logger.info('timestamp parse error for record at offset: 0x%x: %s', record_offset, str(e)) continue except Exception as e: logger.info('unknown parse error for record at offset: 0x%x: %s', record_offset, str(e)) continue if len(record.substitutions) < 4: logger.info('too few substitutions for record at offset: 0x%x', record_offset) continue # we just know that the EID is substitution index 3 eid = record.substitutions[3][VALUE] matching_templates = set([]) for template in templates.get(eid, {}).values(): if template.match_substitutions(record.substitutions): matching_templates.add(template) if len(matching_templates) == 0: logger.info('no matching templates for record at offset: 0x%x', record_offset) yield IncompleteRecord(record_offset, eid, record.substitutions) continue if len(matching_templates) > 1: logger.info('too many templates for record at offset: 0x%x', record_offset) yield IncompleteRecord(record_offset, eid, record.substitutions) continue template = list(matching_templates)[0] record_xml = template.insert_substitutions(record.substitutions) yield CompleteRecord(record_offset, eid, record_xml) ================================================ FILE: evtxtract/carvers.py ================================================ import re import struct import logging import binascii import datetime import xml.sax.saxutils from collections import namedtuple import six import Evtx.Evtx import Evtx.Views import evtxtract.templates logger = logging.getLogger(__name__) # TODO: this should be part of python-evtx EVTX_HEADER_MAGIC = b"ElfChnk" EVTX_RECORD_MAGIC = b"\x2a\x2a\x00\x00" CHUNK_SIZE = 0x10000 MIN_CHUNK_HEADER_SIZE = 0x80 MAX_CHUNK_HEADER_SIZE = 0x200 class ParseError(RuntimeError): pass def is_chunk_header(buf, offset): """ Return True if the offset appears to be an EVTX Chunk header. Implementation note: Simply checks the magic header and size field for reasonable values. Args: buf (buffer): the binary data from which to extract structures. offset (int): the address of the potential EVTX chunk header. Returns: bool: if the offset appears to be an EVTX chunk header. """ if len(buf) < offset + 0x2C: # our accesses below will overflow return False magic = struct.unpack_from("<7s", buf, offset)[0] if magic != EVTX_HEADER_MAGIC: return False size = struct.unpack_from(" 100: return True ofs += 4 # template_id or size if max_offset < ofs + 4 + (4 * min(maybe_num_subs or 2, 4)): return False for i in range(min(maybe_num_subs or 2, 4)): byte = struct.unpack_from(" 100: raise ParseError("Unexpected number of substitutions: %d at %s" % (num_subs, hex(ofs))) ofs += 4 # begin sub list substitutions = [] for _ in range(num_subs): size, type_ = struct.unpack_from(" max_offset: raise MaxOffsetReached("Substitutions overran record buffer.") value = None #[0] = parse_null_type_node, if type_ == 0x0: value = None ret.append((type_, value)) #[1] = parse_wstring_type_node, elif type_ == 0x1: s = buf[ofs:ofs + size] s = s.decode('utf-16le') s = xml.sax.saxutils.escape(s) value = s ret.append((type_, value)) #[2] = parse_string_type_node, elif type_ == 0x2: s = buf[ofs:ofs + size] s = s.decode('ascii') s = xml.sax.saxutils.escape(s) value = s ret.append((type_, value)) #[3] = parse_signed_byte_type_node, elif type_ == 0x3: value = struct.unpack_from(" 1 ret.append((type_, value)) #[14] = parse_binary_type_node, elif type_ == 0xE: value = binascii.hexlify(buf[ofs:ofs + size]) ret.append((type_, value)) #[15] = parse_guid_type_node, elif type_ == 0xF: _bin = buf[offset:offset + 16] # Yeah, this is ugly h = [six.indexbytes(_bin, i) for i in range(len(_bin))] value = """{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}""".format( h[3], h[2], h[1], h[0], h[5], h[4], h[7], h[6], h[8], h[9], h[10], h[11], h[12], h[13], h[14], h[15]) ret.append((type_, value)) #[16] = parse_size_type_node, elif type_ == 0x10: if size == 0x4: value = struct.unpack_from("IH", buf, ofs + 2) value = "S-%d-%d" % (version, (id_high << 16) ^ id_low) for i in range(num_elements): val = struct.unpack_from(" 0: match = re.search(b"((?:[^\x00].)+)", bin) if match: frag = match.group() s = frag.decode("utf-16") s = xml.sax.saxutils.escape(s) value.append(s) bin = bin[len(frag) + 2:] if len(bin) == 0: break frag = re.search(b"(\x00*)", bin).group() if len(frag) % 2 == 0: for _ in range(len(frag) // 2): value.append('') else: raise ParseError("Error parsing uneven substring of NULLs") bin = bin[len(frag):] if value[-1].strip("\x00") == "": value = value[:-1] ret.append((type_, value)) else: raise ParseError("Unexpected type encountered: " + hex(type_)) ofs += size return ret ExtractedRecord = namedtuple( 'ExtractedRecord', ['offset', 'num', 'timestamp', 'substitutions']) def extract_record(buf, offset): """ Parse an EVTX record into a convenient dictionary of fields. Args: buf (buffer): the binary data from which to extract structures. offset (int): address of the EVTX record. Returns: ExtractedRecord: the thing you asked for. Raises: ParseError: for various reasons, including invalid timestamps and overruns. """ if not is_record(buf, offset): raise ValueError('not a record') record_size, record_num, qword = struct.unpack_from("') ret.append('0x%x' % (record.offset)) ret.append('%d' % (record.eid)) ret.append('') for i, (type_, value) in enumerate(record.substitutions): ret.append(' ' % (i)) ret.append(' %d' % (type_)) if value is None: ret.append(' ') else: ret.append(' %s' % (value)) ret.append(' ') ret.append('') ret.append('') return '\n'.join(ret) def main(argv=None): if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser( description="Reconstruct EVTX event log records from binary data.") parser.add_argument("input", type=str, help="Path to binary input file") parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging") parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") parser.add_argument("-s", "--split", action="store_true", help="split each event into its own file") parser.add_argument("-o", "--out", metavar='output-directory', action="store", help="output directory to store split files") args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) elif args.quiet: logging.basicConfig(level=logging.ERROR) else: logging.basicConfig(level=logging.INFO) if args.split and not args.out: logger.error('Error: the -o argument is required when using -s. please provide an output directory with -o') exit(1) if args.out and not os.path.isdir(args.out): logger.error('Error: {0} is not a directory'.format(args.out)) exit(1) with evtxtract.utils.Mmap(args.input) as mm: num_complete = 0 num_incomplete = 0 if not args.split: print('') print('') for r in evtxtract.extract(mm): output_record(args, r) if isinstance(r, evtxtract.CompleteRecord): num_complete += 1 elif isinstance(r, evtxtract.IncompleteRecord): num_incomplete += 1 else: raise RuntimeError('unexpected return type') if not args.split: print('') logging.info('recovered %d complete records', num_complete) logging.info('recovered %d incomplete records', num_incomplete) if __name__ == "__main__": sys.exit(main()) ================================================ FILE: evtxtract/templates.py ================================================ import re import sys import logging import six import Evtx.Evtx import Evtx.Nodes import Evtx.Views import evtxtract.utils import evtxtract.templates logger = logging.getLogger(__name__) class Template(object): substitition_re = re.compile("\[(Conditional|Normal) Substitution\(index=(\d+), type=(\d+)\)\]") def __init__(self, eid, xml): self.eid = eid self.xml = xml self._cached_placeholders = None self._cached_id = None def get_id(self): """ @rtype: str @return: A string that can be parsed into constraints describing what types of subsitutions this template can accept. Short example: 1100-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c] """ if self._cached_id is not None: return self._cached_id ret = [str(self.eid)] for index, type_, mode in self._get_placeholders(): if mode: mode_str = "c" else: mode_str = "n" ret.append("[%s|%s|%s]" % (index, type_, mode_str)) self._cached_id = "-".join(ret) return self._cached_id def _get_placeholders(self): """ Get descriptors for each of the substitutions required by this template. Tuple schema: (index, type, is_conditional) @rtype: list of (int, int, boolean) """ if self._cached_placeholders is not None: return self._cached_placeholders ret = [] for mode, index, type_ in Template.substitition_re.findall(self.xml): ret.append((int(index), int(type_), mode == "Conditional")) self._cached_placeholders = sorted(ret, key=lambda p: p[0]) return self._cached_placeholders def match_substitutions(self, substitutions): """ Checks to see if the provided set of substitutions match the placeholder values required by this template. Note, this is only a best guess. The number of substitutions *may* be greater than the number of available slots. So we must only check the slot and substitution types. @type substitutions: list of (int, str) @param substitutions: Tuple schema (type, value) @rtype: boolean """ logger = logging.getLogger("match_substitutions") placeholders = self._get_placeholders() logger.debug("Substitutions: %s", str(substitutions)) logger.debug("Constraints: %s", str(placeholders)) if len(placeholders) > len(substitutions): logger.debug("Failing on lens: %d vs %d", len(placeholders), len(substitutions)) return False if max(placeholders, key=lambda k: k[0])[0] > len(substitutions): logger.debug("Failing on max index: %d vs %d", max(placeholders, key=lambda k: k[0])[0], len(substitutions)) return False # it seems that some templates request different values than what are subsequently put in them # specifically, a Hex64 might be put into a SizeType field (EID 4624) # this maps from the type described in a template, to possible additional types that a # record can provide for a particular substitution overrides = { 16: set([21]) } for index, type_, is_conditional in placeholders: sub_type, sub_value = substitutions[index] if is_conditional and sub_type == 0: continue if sub_type != type_: if type_ not in overrides or sub_type not in overrides[type_]: logger.debug("Failing on type comparison, index %d: %d vs %d (mode: %s)", index, sub_type, type_, is_conditional) return False else: logger.debug("Overriding template type %d with substitution type %d", type_, sub_type) continue return True escape_re = re.compile(r"\\\\(\d)") @staticmethod def _escape(value): """ Escape the static value to be used in a regular expression subsititution. This processes any backreferences and makes them plain, escaped sequences. @type value: str @rtype: str """ return Template.escape_re.sub(r"\\\\\\\\\1", re.escape(value)) def insert_substitutions(self, substitutions): """ Return a copy of the template with the given substitutions inserted. @type substitutions: list of (int, str) @param substitutions: an ordered list of (type:int, value:str) @rtype: str """ ret = self.xml for index, pair in enumerate(substitutions): type_, value = pair from_pattern = "\[(Normal|Conditional) Substitution\(index=%d, type=\d+\)\]" % index if isinstance(value, six.string_types): value = Template._escape(value) else: value = str(value) ret = re.sub(from_pattern, value, ret) return ret REPLACEMENT_PATTERNS = { i: re.compile( "\[(Normal|Conditional) Substitution\(index=%d, type=\d+\)\]" % i) for i in range(35)} def make_replacement(template, index, substitution): """ Makes a substitution given a template as a string. Implementation is a huge hack that depends on the brittle template_format() output. @type template: str @type index: int @type substitution: str @rtype: str """ if index not in REPLACEMENT_PATTERNS: from_pattern = re.compile("\[(Normal|Conditional) Substitution\(index=%d, type=\d+\)\]" % index) REPLACEMENT_PATTERNS[index] = from_pattern return REPLACEMENT_PATTERNS[index].sub(substitution, template) def get_complete_template(root, current_index=0): """ Gets the template from a RootNode while resolving any nested templates and fixing up their indices. Depth first ordering/indexing. Implementation is a huge hack that depends on the brittle template_format() output. @type root: RootNode @type current_index: int @rtype: str """ template = Evtx.Views.evtx_template_readable_view(root) # TODO(wb): make sure this is working # walk through each substitution. # if its a normal node, continue # else its a subtemplate, and we count the number of substitutions _it_ has # so that we can later fixup all the indices replacements = [] for index, substitution in enumerate(root.substitutions()): # find all sub-templates if not isinstance(substitution, Evtx.Nodes.BXmlTypeNode): replacements.append(current_index + index) continue # TODO(wb): hack here accessing ._root subtemplate = get_complete_template(substitution._root, current_index=current_index + index) replacements.append(subtemplate) current_index += subtemplate.count("Substitution(index=") replacements.reverse() # now walk through all the indices and fix them up depth-first for i, replacement in enumerate(replacements): index = len(replacements) - i - 1 if isinstance(replacement, int): # fixup index from_pattern = "index=%d," % index to_pattern = "index=%d," % replacement template = template.replace(from_pattern, to_pattern) if isinstance(replacement, six.string_types): # insert sub-template template = make_replacement(template, index, replacement) return template def get_template(record): """ Given a complete Record, parse out the nodes that make up the Template and return it as a Template. @type record: Record @rtype: Template """ record_xml = Evtx.Views.evtx_record_xml_view(record) eid = evtxtract.utils.get_eid(record_xml) return Template(eid, get_complete_template(record.root())) ================================================ FILE: evtxtract/utils.py ================================================ import mmap import logging from lxml import etree logger = logging.getLogger(__name__) def to_lxml(record_xml): """ Convert an XML string to an Etree element. @type record_xml: str @rtype: etree.Element """ if "%s" % record_xml) else: return etree.fromstring(record_xml) def get_child(node, tag, ns="{http://schemas.microsoft.com/win/2004/08/events/event}"): """ Given an Etree element, get the first child node with the given tag. @type node: etree.Element @type tag: str @type ns: str @rtype: etree.Element or None """ return node.find("%s%s" % (ns, tag)) def get_eid(record_xml): """ Given EVTX record XML, return the EID of the record. Args: record_xml (str) Returns: int: the event ID of the record """ return int( get_child( get_child(to_lxml(record_xml), "System"), "EventID").text) class Mmap(object): """ Convenience class for opening a read-only memory map for a file path. """ def __init__(self, filename): super(Mmap, self).__init__() self._filename = filename self._f = None self._mmap = None def __enter__(self): self._f = open(self._filename, "rb") self._mmap = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ) return self._mmap def __exit__(self, type, value, traceback): if self._mmap: self._mmap.close() if self._f : self._f.close() ================================================ FILE: evtxtract/version.py ================================================ __version__ = '0.2.4' ================================================ FILE: evtxtract.spec ================================================ # -*- mode: python -*- block_cipher = None a = Analysis( ['evtxtract/main.py'], pathex=['evtxtract'], binaries=None, datas=None, hiddenimports=[], hookspath=None, runtime_hooks=None, excludes=["tkinter", "_tkinter", "Tkinter"], win_no_prefer_redirects=None, win_private_assemblies=None, cipher=None) a.binaries = a.binaries - TOC([ ('sqlite3.dll', None, None), ('tcl85.dll', None, None), ('tk85.dll', None, None), ('_sqlite3', None, None), ('_ssl', None, None), ('_tkinter', None, None)]) pyz = PYZ(a.pure, a.zipped_data, cipher=None) exe = EXE(pyz, a.scripts, a.binaries, exclude_binaries=False, name='evtxtract', #icon='resources/icon.ico', debug=False, strip=None, upx=True, console=True ) coll = COLLECT(exe, a.binaries, a.zipfiles, a.datas, strip=None, upx=True, name='evtxtract-dat') ================================================ FILE: setup.py ================================================ #!/usr/bin/env python import os import setuptools # this sets __version__ # # via: http://stackoverflow.com/a/7071358/87207 # # and: http://stackoverflow.com/a/2073599/87207 with open(os.path.join("evtxtract", "version.py"), "rb") as f: exec(f.read()) setuptools.setup(name="evtxtract", version=__version__, description="EVTXtract recovers and reconstructs fragments of EVTX log files from raw binary data, including unallocated space and memory images.", author="Willi Ballenthin", author_email="william.ballenthin@fireeye.com", url="https://github.com/williballenthin/evtxtract", license="Apache 2.0 License", packages=setuptools.find_packages(), entry_points={ "console_scripts": [ "evtxtract=evtxtract.main:main", ] }, install_requires=[ 'six', 'lxml', 'pytest', 'python-evtx>=0.5.2', ], ) ================================================ FILE: tests/.gitignore ================================================ *memoryevtx/file.None* *.dat *.vacb ================================================ FILE: tests/fixtures.py ================================================ import os import pytest import evtxtract.utils CD = os.path.dirname(__file__) IMAGE_PATH = os.path.join(CD, 'joshua1.vmem') @pytest.fixture def image(request): if not os.path.exists(IMAGE_PATH): raise RuntimeError('required image %s does not exist. see readme.' % (IMAGE_PATH)) return IMAGE_PATH @pytest.fixture def image_file(request): with open(image(request), 'rb') as f: yield f @pytest.fixture def image_mmap(request): with evtxtract.utils.Mmap(image(request)) as mm: yield mm ================================================ FILE: tests/readmd.txt ================================================ the tests require the image `joshua1.vmem` from: - referenced: http://jessekornblum.livejournal.com/293291.html - download: https://dl.dropboxusercontent.com/u/55819714/joshua1.zip ================================================ FILE: tests/test_all.py ================================================ import logging import evtxtract import evtxtract.carvers from fixtures import * #logging.basicConfig(level=logging.DEBUG) #logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def test_find_chunks(image_mmap): # these offsets were empirically collected from the test image expected = set([ 0xc7f000, 0xf0e000, 0x1374f20, 0x70cc000, 0xd727440, 0xdfe7000, 0x18851080, 0x1c31d000, 0x20b362c0, 0x276f8000, 0x2833e000, 0x28b4e000, 0x28b68000, 0x28d5e000, 0x28ead000, 0x2986e000, 0x2998c000, 0x29a9c000, 0x2ff30000, 0x2ffd0000, 0x3070f000, 0x30c1f000, 0x30c8f000, 0x30dbf000, 0x30f2f000, 0x30fff000, 0x3126f000, 0x328eac10, 0x34b75000, 0x38835000, 0x39981910, 0x39cc07a0, 0x3b91b000, ]) assert expected == set(evtxtract.carvers.find_evtx_chunks(image_mmap)) def first(s): for x in s: return x def test_extract_records(image_mmap): # these offsets were empirically collected from the test image expected_offsets = set([ 0xf0e200, 0x70cc200, 0x70cca30, 0x1c31d200, 0x1c31d858, 0x20b364c0, 0x20b36b80, 0x276f8200, 0x276f88c0, 0x29a9c200, 0x30dbf200, 0x30dbf8c8, 0x30dbfb68, 0x30dbfde8, 0x34b75200, 0x34b758a0, 0x3b91b200, ]) # these eids were empirically collected from the test image expected_eids = set([ 1, 2, 5, 21, 22, 100, 306, 823, 1001, 1002, 1006, 1009, 1020 ]) found_offsets = set([]) found_eids = set([]) for chunk_offset in evtxtract.carvers.find_evtx_chunks(image_mmap): for recovered_record in evtxtract.carvers.extract_chunk_records(image_mmap, chunk_offset): found_offsets.add(recovered_record.offset) found_eids.add(recovered_record.eid) assert expected_offsets == found_offsets assert expected_eids == found_eids def test_extract_templates(image_mmap): # these template ids were empirically collected from the test image expected_ids = set([ "1-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]", "2-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]", "21-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|1|n]", "22-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|1|n]", "5-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|1|n]", "100-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|1|n]", "306-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]", "823-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|1|n]-[19|1|n]-[20|20|n]-[21|1|n]", "1001-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]", "1002-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]", "1006-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|8|n]-[18|13|n]-[19|13|n]", "1009-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]-[18|8|n]-[19|8|n]", "1020-[0|4|c]-[1|4|c]-[2|6|c]-[3|6|c]-[4|6|c]-[5|21|c]-[6|17|c]-[7|15|c]-[8|8|c]-[9|8|c]-[10|10|c]-[11|4|c]-[12|19|c]-[13|15|c]-[14|1|c]-[15|15|c]-[16|1|c]-[17|1|n]", ]) found_ids = set([]) for chunk_offset in evtxtract.carvers.find_evtx_chunks(image_mmap): for template in evtxtract.carvers.extract_chunk_templates(image_mmap, chunk_offset): found_ids.add(template.get_id()) assert expected_ids == found_ids def test_find_records(image_mmap): records = list(evtxtract.carvers.find_evtx_records(image_mmap)) assert records[0] == 0x317198 assert records[-1] == 0x3D706A88 assert len(records) == 1674 def test_evtxtract(image_mmap): num_complete = 0 num_incomplete = 0 for r in evtxtract.extract(image_mmap): if isinstance(r, evtxtract.CompleteRecord): num_complete += 1 elif isinstance(r, evtxtract.IncompleteRecord): num_incomplete += 1 else: raise RuntimeError('unexpected return type') assert num_complete == 52 assert num_incomplete == 1615