Repository: KoreLogicSecurity/mastiff Branch: master Commit: 04d569e4fa59 Files: 97 Total size: 283.3 KB Directory structure: gitextract_tt3ov715/ ├── .gitattributes ├── .gitignore ├── MANIFEST.in ├── Makefile ├── PKG-INFO ├── README ├── README.CREDITS ├── README.INSTALL ├── README.LICENSE ├── README.PLUGINS ├── mas.py ├── mastiff/ │ ├── __init__.py │ ├── conf.py │ ├── core.py │ ├── filetype.py │ ├── plugins/ │ │ ├── __init__.py │ │ ├── analysis/ │ │ │ ├── EXE/ │ │ │ │ ├── EXE-peinfo.py │ │ │ │ ├── EXE-peinfo.yapsy-plugin │ │ │ │ ├── EXE-resources.py │ │ │ │ ├── EXE-resources.yapsy-plugin │ │ │ │ ├── EXE-sig.py │ │ │ │ ├── EXE-sig.yapsy-plugin │ │ │ │ ├── EXE-singlestring.py │ │ │ │ ├── EXE-singlestring.yapsy-plugin │ │ │ │ └── __init__.py │ │ │ ├── GEN/ │ │ │ │ ├── GEN-fileinfo.py │ │ │ │ ├── GEN-fileinfo.yapsy-plugin │ │ │ │ ├── GEN-fuzzy.py │ │ │ │ ├── GEN-fuzzy.yapsy-plugin │ │ │ │ ├── GEN-hex.py │ │ │ │ ├── GEN-hex.yapsy-plugin │ │ │ │ ├── GEN-mastiff-online.py │ │ │ │ ├── GEN-mastiff-online.yapsy-plugin │ │ │ │ ├── GEN-metascan.py │ │ │ │ ├── GEN-metascan.yapsy-plugin │ │ │ │ ├── GEN-strings.py │ │ │ │ ├── GEN-strings.yapsy-plugin │ │ │ │ ├── GEN-virustotal.py │ │ │ │ ├── GEN-virustotal.yapsy-plugin │ │ │ │ ├── GEN-yara.py │ │ │ │ ├── GEN-yara.yapsy-plugin │ │ │ │ └── __init__.py │ │ │ ├── Office/ │ │ │ │ ├── Office-metadata.py │ │ │ │ ├── Office-metadata.yapsy-plugin │ │ │ │ ├── Office-pyOLEScanner.py │ │ │ │ ├── Office-pyOLEScanner.yapsy-plugin │ │ │ │ └── __init__.py │ │ │ ├── PDF/ │ │ │ │ ├── PDF-metadata.py │ │ │ │ ├── PDF-metadata.yapsy-plugin │ │ │ │ ├── PDF-pdfid.py │ │ │ │ ├── PDF-pdfid.yapsy-plugin │ │ │ │ ├── PDF-pdfparser.py │ │ │ │ ├── PDF-pdfparser.yapsy-plugin │ │ │ │ └── __init__.py │ │ │ ├── ZIP/ │ │ │ │ ├── ZIP-extract.py │ │ │ │ ├── ZIP-extract.yapsy-plugin │ │ │ │ ├── ZIP-zipinfo.py │ │ │ │ ├── ZIP-zipinfo.yapsy-plugin │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── category/ │ │ │ ├── EXE.yapsy-plugin │ │ │ ├── PDF.yapsy-plugin │ │ │ ├── __init__.py │ │ │ ├── categories.py │ │ │ ├── exe.py │ │ │ ├── generic.py │ │ │ ├── generic.yapsy-plugin │ │ │ ├── office.py │ │ │ ├── office.yapsy-plugin │ │ │ ├── pdf.py │ │ │ ├── zip.py │ │ │ └── zip.yapsy-plugin │ │ └── output/ │ │ ├── OUTPUT-raw.py │ │ ├── OUTPUT-raw.yapsy-plugin │ │ ├── OUTPUT-text.py │ │ ├── OUTPUT-text.yapsy-plugin │ │ └── __init__.py │ ├── queue.py │ └── sqlite.py ├── mastiff.conf ├── pylint.rc ├── setup.cfg ├── setup.py ├── skeleton/ │ ├── OUTPUT-skel.py │ ├── OUTPUT-skel.yapsy-plugin │ ├── analysis-ext-skel.py │ ├── analysis-ext-skel.yapsy-plugin │ ├── analysis-skel.py │ ├── analysis-skel.yapsy-plugin │ ├── category-skel.py │ ├── category-skel.yapsy-plugin │ └── output-skel.yapsy-plugin ├── tests/ │ ├── import-test.sh │ ├── mastiff-test.sh │ └── test.doc └── utils/ ├── version2string └── version_helper ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ * ident ================================================ FILE: .gitignore ================================================ ================================================ FILE: MANIFEST.in ================================================ include *.py include *.yapsy-plugin include docs/*.pdf include pylint.rc include Makefile include README include README.CREDITS include README.INSTALL include README.LICENSE include README.PLUGINS include mastiff.conf include skeleton/*.py include skeleton/*.yapsy-plugin include tests/* include utils/* exclude README.RELENG recursive-exclude docs *.odt recursive-include mastiff *.py *.yapsy-plugin ================================================ FILE: Makefile ================================================ # $Id: 77c80f02785dfc5ef2f764bfe7f487dc0c165278 $ # # Makefile for installation of mastiff. # all: build build:: @ python setup.py build check test: @ bash tests/import-test.sh `pwd` @ bash tests/mastiff-test.sh @ rm -rf work/ check-clean test-clean: clean @ rm -f tests/test-*.txt clean: @ rm -f `find . -name "*.pyc" -o -name "*~"` @ rm -rf dist build mastiff.egg-info @ rm -f tests/*.txt clean-all: check-clean dev-clean dev: @ python setup.py develop dev-clean: clean @ python setup.py develop --uninstall @ rm -f /usr/local/bin/mas.py dist sdist:: @ python setup.py sdist install: build @ python setup.py install lint: @ find . -name "*.py" -exec pylint --rcfile=pylint.rc {} \; sign: dist @ version_number=`egrep '^version = 0x' mastiff/__init__.py | awk '{print $$3}'` ; \ version_string=`utils/version2string -t tar -v $${version_number}` ; \ dist_file="dist/mastiff-$${version_string}.tar.gz" ; \ gpg --default-key 64615D14 -s -b $${dist_file} ================================================ FILE: PKG-INFO ================================================ Metadata-Version: 1.0 Name: mastiff Version: 0.8.0.ds0 Summary: MASTIFF is a static analysis automation framework. Home-page: http://www.korelogic.com Author: Tyler Hudak Author-email: mastiff-project@korelogic.com License: Apache License V2.0 Description: MASTIFF is a static analysis framework that automates the process of extracting key characteristics from a number of different file formats. To ensure the framework remains flexible and extensible, a community-driven set of plug-ins is used to perform file analysis and data extraction. While originally designed to support malware, intrusion, and forensic analysis, the framework is well-suited to support a broader range of analytic needs. In a nutshell, MASTIFF allows analysts to focus on analysis rather than figuring out how to parse files. Platform: Linux ================================================ FILE: README ================================================ REVISION $Id: 17f09461545f9d0409f9480a417c3831ae34539d $ OVERVIEW MASTIFF is a static analysis framework that automates the process of extracting key characteristics from a number of different file formats. To ensure the framework remains flexible and extensible, a community-driven set of plug-ins is used to perform file analysis and data extraction. While originally designed to support malware, intrusion, and forensic analysis, the framework is well-suited to support a broader range of analytic needs. In a nutshell, MASTIFF allows analysts to focus on analysis rather than figuring out how to parse files. The MASTIFF Project is hosted at: https://git.korelogic.com/mastiff.git/ DOCUMENTATION General documentation is located in the docs directory. See the README.INSTALL file for instructions on how to build, test, and install the framework. LICENSE The terms and conditions under which this software is released are set forth in README.LICENSE. ================================================ FILE: README.CREDITS ================================================ REVISION $Id: 02e5406c2bbd4202e46796589395a4611897b806 $ CREDITS Tyler Hudak (author, maintainer) Klayton Monroe (contributor, maintainer) SPONSORS DARPA Cyber Fast Track Program (2012) KoreLogic (2012-present) ================================================ FILE: README.INSTALL ================================================ REVISION $Id: daec28262cb37c5a4952618675b33e234e48773d $ OVERVIEW MASTIFF is a static analysis framework that automates the process of extracting key characteristics from a number of different file formats. To ensure the framework remains flexible and extensible, a community-driven set of plug-ins is used to perform file analysis and data extraction. While originally designed to support malware, intrusion, and forensic analysis, the framework is well-suited to support a broader range of analytic needs. In a nutshell, MASTIFF allows analysts to focus on analysis rather than figuring out how to parse files. The MASTIFF Project is hosted at: https://git.korelogic.com/mastiff.git/ TECHNICAL REQUIREMENTS The following software must be installed for MASTIFF to work properly. - Python 2.6.6 or greater - Yapsy 1.10 or greater (http://yapsy.sourceforge.net/) - Python sqlite3 (http://docs.python.org/library/sqlite3) - Python setuptools (http://pypi.python.org/pypi/setuptools/) - Yara, libyara and yara-python (http://code.google.com/p/yara-project) A Python libmagic library is also required. MASTIFF supports two different libmagic libraries: - libmagic Python extensions (ftp://ftp.astron.com/pub/file/) This may be installed through the source code above or is the library installed as python-magic in most Linux code repositories. - Python-magic (https://github.com/ahupp/python-magic/) This may be installed through the source code above or via Python pip. PREREQUISITES INSTALLATION The Python setuptools and magic libraries will need to be installed on your own. For Debian/Ubuntu-based distributions, this can be accomplished with: $ sudo aptitude install python-setuptools $ sudo aptitude install python-magic On Gentoo-based distributions, there is no Python magic package. However, adding the python USE flag to the sys-apps/file package will create the correct Python libraries. Setuptools can be installed as follows: $ sudo emerge -av setuptools Yapsy will automatically download and install when the make program is run, or you can download and install it on your own. Yapsy is also located in the Gentoo Portage repository. $ sudo emerge -av yapsy Note that the plug-ins utilized by MASTIFF may have their own prerequisites. TESTING MASTIFF comes with a test set suite that can be used to determine if all prerequisites have been properly installed and MASTIFF is able to analyze files correctly. To run these tests, run: $ make test Two sets of tests will run. - Python imports for all MASTIFF core files and plug-ins will be checked to ensure they can be imported. Any that cannot will be displayed. - MASTIFF will examine 4 different files to ensure there are no issues. All output will go into the tests/ directory. INSTALLATION If you wish to only test out MASTIFF, skip to the Development Testing section. MASTIFF utilizes the Python setuptools code for installation of the package. The easiest way to install the package is: $ sudo make install This will install the package into the appropriate Python site-packages directory for your system. It will also install mas.py, the main MASTIFF wrapper script into /usr/local/bin. If you do not have Yapsy installed, it will attempt to download and install it for you. If you install using this method, the only way to uninstall is to manually delete files. After installing MASTIFF, modify the mastiff.conf configuration file to ensure the options for plug-ins are correctly set for your analysis system. DEVELOPMENT TESTING If instead you wish to only test it for development purposes, run the following command: $ sudo make dev This will install placeholders into the Python dist-packages that point to this directory. Any modifications made to the code will automatically be reflected when running the software. Additionally, mas.py will be placed in /usr/local/bin. To uninstall the dev environment, run: $ sudo make dev-clean This will remove all placeholders as well as /usr/local/bin/mas.py. PLUG-IN REQUIREMENTS At the current release, the plug-ins utilized by MASTIFF require a number of additional libraries or programs to be installed. - ssdeep (http://ssdeep.sourceforge.net/) - pydeep (https://github.com/kbandla/pydeep) - Yara, libyara and yara-python must be installed, (http://code.google.com/p/yara-project) - simplejson (https://github.com/simplejson/simplejson) - Didier Stevens pdf-parser.py (http://blog.didierstevens.com/programs/pdf-tools/) - Didier Stevens' pdfid.py (http://blog.didierstevens.com/programs/pdf-tools/) - exiftool (http://www.sno.phy.queensu.ca/~phil/exiftool/) - pefile library (http://code.google.com/p/pefile/) NOTE: Do NOT install pefile from the Debian/Ubuntu repository! Install from source! - disitool.py (http://blog.didierstevens.com/programs/disitool/) - openssl binary (http://www.openssl.org/) - Giuseppe 'Evilcry' Bonfa's pyOLEScanner.py (https://github.com/Evilcry/PythonScripts/raw/master/pyOLEScanner.zip) - distorm (http://code.google.com/p/distorm/) Some of these programs may be able to be installed from your distribution's software repository, and some may need to be installed from source. After these programs have been installed, be sure to check the MASTIFF configuration file and update all configuration options to point to the correct locations. RUNNING MASTIFF The best way to run MASTIFF is to use the mas.py program. This script has been written to provide you with the maximum number of options for using MASTIFF. This script will be installed to /usr/local/bin when you install the package. mas.py can be run by only giving it a file or directory to analyze as an argument. $ mas.py /path/to/file2analyze If MASTIFF is given a directory, it will enumerate all files within that directory, and every subdirectory, and analyze them. Although the only required argument is the filename or directory to be analyzed, the following table lists available options. -c CONFIG_FILE, --conf=CONFIG_FILE Use an alternate config file. The default is './mastiff.conf'. -h, --help Show the help message and exit. -l PLUGIN_TYPE, --list=PLUGIN_TYPE List all available plug-ins of the specified type and exit. Type must be one of 'analysis' or 'cat'. -o OVERRIDE, --option=OVERRIDE Override a config file option. Configuration options should be specified as 'Section.Key=Value' and should be quoted if any whitespace is present. Multiple overrides can be specified by using multiple '-o' options. -p PLUGIN_NAME, --plugin=PLUGIN_NAME Only run the specified analysis plug-in. Name must be quoted if it contains whitespace. -q, --quiet Only log errors. -t FTYPE, --type=FTYPE Force file to be analyzed with plug-ins from the specified category (e.g., EXE, PDF, etc.). Run with '-l cat' to list all available category plug-ins. -V, --verbose Print verbose logs. -v, --version Show program's version number and exit. Queue Options: --append-queue Append file or directory to job queue and exit. --clear-queue Clear job queue and exit. --ignore-queue Ignore the job queue and just process file. --list-queue List the contents of the job queue and exit. --resume-queue Continue processing the queue. ================================================ FILE: README.LICENSE ================================================ REVISION $Id: f19abdb0df9b2aadb274fb66a8f813edb7f508a0 $ OVERVIEW This document contains licensing information for The MASTIFF Project, which was established by Tyler Hudak of KoreLogic, Inc. in 2012. Unless specifically excluded, all files in this project fall under the terms and conditions of the Apache License, Version 2.0 as stated below. Excluded files or components that fall under other licenses are detailed below as well. THE APACHE LICENSE VERSION 2.0 (MASTIFF) Copyright 2012-2013 The MASTIFF Project All rights reserved. 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. THE NEW BSD LICENSE (WebJob) This project includes software developed for The WebJob Project, which is distributed under the following terms and conditions: Copyright 2006-2013 The WebJob Project All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the names of the copyright holders nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.PLUGINS ================================================ REVISION $Id: 9a263fb024741bc9fa6fafd3b146d260e9db4d26 $ SKELETON PLUG-INS The project's skeleton directory contains three types of skeleton plug-ins that can be used to start coding your own plug-ins for the framework. Just choose the skeleton code for the type of plug-in you would like to develop, modify a few lines, and start coding. Note that these files are intended to serve as examples and helpful hints on how to get started, not as definitive ways to create plug-ins. The three types skeleton plug-ins are: - category-skel: A skeleton category plug-in to define a new file type. - analysis-skel: A skeleton analysis plug-in to define a new type of analysis. This code is for a Generic plug-in, but can be easily modified for any file-type category. - analysis-ext-skel: A skeleton analysis plug-in to define a new type of analysis that calls an external program. This type of plug-in is excellent for acting as a wrapper script around another program. ================================================ FILE: mas.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ MASTIFF - MAlicious Static Inspection File Framework This program implements the code necessary to statically analyze files within a plugin-based framework. """ __version__ = "$Id: e3288d64e94fb2c155552a6922e77e347081d77f $" import sys import logging import os import os.path #import signal if sys.version_info < (2, 6, 6): sys.stderr.write("Mastiff requires python version 2.6.6") sys.exit(1) from optparse import OptionParser, OptionGroup import mastiff.core as Mastiff from mastiff import get_release_string import mastiff.queue as queue def add_to_queue(job_queue, fname): """ Add file and/or directory to job queue. """ log = logging.getLogger('Mastiff.queue') # check to see if we are dealing with a directory or a file and handle correctly if os.path.isdir(fname) is True: # This is a directory - walk it and add all its files log.info('Adding directory %s to queue.' % fname) for root, _, files in os.walk(fname): for new_file in [ os.path.abspath(root + os.sep + f) for f in files]: log.debug('Adding %s to job queue.' % new_file ) job_queue.append(new_file) elif os.path.isfile(fname) is True: # dealing with a file - just add it to the queue log.debug('Adding file %s to job queue.' % fname) job_queue.append(fname) else: log.error('Submission is neither file or directory. Exiting.') sys.exit(1) def analyze_file(fname, opts, loglevel): """ Analyze a file with MASTIFF. """ log = logging.getLogger('Mastiff.analyze') log.info("Starting analysis on %s", fname) my_analysis = Mastiff.Mastiff(opts.config_file, loglevel=loglevel, override=opts.override) if opts.ftype is not None: log.info('Forcing file type to include "%s"', opts.ftype) my_analysis.set_filetype(fname=fname, ftype=opts.ftype) my_analysis.analyze(fname, opts.plugin_name) def main(): """Parse options and analyze file.""" usage = "usage: %prog [options] FILE|DIRECTORY" parser = OptionParser( add_help_option = False, version = "%prog " + get_release_string(), usage = usage) parser.remove_option("--version") parser.add_option( "--conf", "-c", action = "store", default = "./mastiff.conf", dest = "config_file", help = "Use an alternate config file. The default is './mastiff.conf'.", type = "string") parser.add_option( "--help", "-h", action = "help", help = "Show the help message and exit.") parser.add_option( "--list", "-l", action = "store", dest = "list_plugins", help = "List all available plug-ins of the specified type and exit. Type must be one of 'analysis', 'cat', or 'output'.", metavar = "PLUGIN_TYPE") parser.add_option( "--option", "-o", action="append", default = None, dest = "override", help = "Override a config file option. Configuration options should be specified as 'Section.Key=Value' and should be quoted if any whitespace is present. Multiple overrides can be specified by using multiple '-o' options.") parser.add_option( "--plugin", "-p", action = "store", default = None, dest = "plugin_name", help = "Only run the specified analysis plug-in. Name must be quoted if it contains whitespace.") parser.add_option( "--quiet", "-q", action = "store_true", default = False, dest = "quiet", help = "Only log errors.") parser.add_option( "--type", "-t", action = "store", default = None, dest = "ftype", help = "Force file to be analyzed with plug-ins from the specified category (e.g., EXE, PDF, etc.). Run with '-l cat' to list all available category plug-ins.", type = "string") parser.add_option( "--verbose", "-V", action = "store_true", dest = "verbose", default = False, help = "Print verbose logs.") parser.add_option( "--version", "-v", action = "version", help = "Show program's version number and exit.") queue_group = OptionGroup(parser, "Queue Options") queue_group.add_option( "--append-queue", "", action = "store_true", dest = "append_queue", default = False, help = "Append file or directory to job queue and exit.") queue_group.add_option( "--clear-queue", "", action = "store_true", dest = "clear_queue", default = False, help = "Clear job queue and exit.") queue_group.add_option( "--ignore-queue", "", action = "store_true", dest = "ignore_queue", default = False, help = "Ignore the job queue and just process file.") queue_group.add_option( "--list-queue", "", action = "store_true", dest = "list_queue", default = False, help = "List the contents of the job queue and exit.") queue_group.add_option( "--resume-queue", action = "store_true", default = False, dest = "resume_queue", help = "Continue processing the queue.") parser.add_option_group(queue_group) (opts, args) = parser.parse_args() if (args is None or len(args) < 1) and opts.list_plugins is None \ and opts.clear_queue is False and opts.resume_queue is False \ and opts.list_queue is False: parser.print_help() sys.exit(1) if opts.verbose == True: loglevel = logging.DEBUG elif opts.quiet == True: loglevel = logging.ERROR else: loglevel = logging.INFO format_ = '[%(asctime)s] [%(levelname)s] [%(name)s] : %(message)s' logging.basicConfig(format=format_) log = logging.getLogger("Mastiff") log.setLevel(loglevel) # check to see if we are running as root if os.geteuid() == 0: log.warning('You are running MASTIFF as ROOT! This may be DANGEROUS!') if opts.list_plugins is not None: plugs = Mastiff.Mastiff(opts.config_file) plugs.list_plugins(opts.list_plugins) sys.exit(0) # set up job queue job_queue = queue.MastiffQueue(opts.config_file) # process job queue specific options if opts.clear_queue is True: log.info('Clearing job queue and exiting.') job_queue.clear_queue() sys.exit(0) elif opts.list_queue is True: if len(job_queue) == 0: log.info("MASTIFF job queue is empty.") else: log.info("MASTIFF job queue has %d entries." % len(job_queue)) print "\nFile Name\n---------\n%s" % (job_queue) sys.exit(0) if len(args) > 0: fname = args[0] else: fname = None if opts.ignore_queue is True: log.info('Ignoring job queue.') analyze_file(fname, opts, loglevel) sys.exit(0) # add file or directory to queue if fname is not None: add_to_queue(job_queue, fname) if opts.append_queue is True: sys.exit(0) # Start analysis on the files in the queue until it is empty while len(job_queue) > 0: fname = job_queue.popleft() analyze_file(fname, opts, loglevel) log.info('There are %d jobs in the queue.' % len(job_queue)) if __name__ == '__main__': main() ================================================ FILE: mastiff/__init__.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ MASTIFF - MAlicious Static Inspection File Framework This program implements the code necessary to statically analyze files within a plugin-based framework. """ """ This file contains package-level variables and functions. """ __version__ = "$Id: b55ca3df0a5fa81dea4ab70cfcb713e0759c973b $" version = 0x00800000 def get_release_number(): """ Gets the current release version. """ return version def get_release_string(): """Return the current release version.""" major = (version >> 28) & 0x0f minor = (version >> 20) & 0xff patch = (version >> 12) & 0xff state = (version >> 10) & 0x03 build = version & 0x03ff if state == 0: state_string = "ds" elif state == 1: state_string = "rc" elif state == 2: state_string = "sr" elif state == 3: state_string = "xs" if state == 2 and build == 0: return '%d.%d.%d' % (major, minor, patch) else: return '%d.%d.%d.%s%d' % (major, minor, patch, state_string, build) ================================================ FILE: mastiff/conf.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Functions to parse and maintain the Mastiff config file. The Conf class is used to parse and maintain the Mastiff config file. _init__(self, config_file=None, override=None): Initializes the config file and sets up any overridden options. get_var(section, var): Return a variable from a specified section. get_bvar(section, var): Return a boolean variable from a specified section. set_var(section, var, value): Set a variable in a specified section with a given value. get_section(section): Return a dictionary of items within the section. list_config(): Prints all configuration variables read in. dump_config(): Dump a copy of the config into the Mastiff log dir. override_option(): Override an option from the config file. """ __version__ = "$Id: daa2ace9c5481298f0650b96fe31bb786bbc3c8e $" import os import sys import logging import ConfigParser class Conf: """Parse and maintain the Mastiff configuration.""" def __init__(self, config_file=None, override=None): """Initialize the class parameters.""" log = logging.getLogger('Mastiff.Conf') self.config_file = os.path.abspath(config_file) self.config = ConfigParser.ConfigParser() self.set_defaults() # read from the default file locations and the file given # file given will be read last and will over-write any # previously read-in config files files_read = self.config.read(['/etc/mastiff/mastiff.conf', os.path.expanduser('~/.mastiff.conf'), config_file]) if not files_read: log.error("Could not read any configuration files. Exiting.") sys.exit(1) else: if self.config.getboolean('Misc', 'verbose') == True: log.setLevel(logging.DEBUG) log.debug("Read config from %s", str(files_read)) if override is not None: for opt in override: self.override_option(opt) def set_defaults(self): """ Set default variables. If set later in a config file, these will be overwritten. Note: This is being done instead of a default config file to reduce the number of files needed. """ self.config.add_section('Dir') self.set_var('Dir', 'log_dir', '/var/log/mastiff') #self.set_var('Dir', 'plugin_dir', '/usr/local/mastiff/plugins') self.config.add_section('Misc') self.set_var('Misc', 'verbose', 'off') def get_var(self, section, var): """Return a specified variable.""" try: return self.config.get(section, var) except (ConfigParser.NoOptionError, ConfigParser.NoSectionError): log = logging.getLogger('Mastiff.Conf.GetVar') log.error('Could not find "%s": "%s"', section, var) return None def get_bvar(self, section, var): """Return a boolean variable.""" try: return self.config.getboolean(section, var) except (ConfigParser.NoOptionError, ConfigParser.NoSectionError): log = logging.getLogger('Mastiff.Conf.GetVar') log.error('Could not find "%s": "%s"', section, var) return False def get_section(self, section): """Return a dictionary of items within a section.""" try: options = self.config.items(section) except ConfigParser.NoSectionError: log = logging.getLogger('Mastiff.Conf.GetSection') log.error('Could not get section "%s".', section) return None opt_dict = dict() for pairs in options: opt_dict[pairs[0]] = pairs[1] return opt_dict def set_var(self, section, var, value): """Set a given variable with a specified value.""" try: return self.config.set(section, var, value) except ConfigParser.NoSectionError: log = logging.getLogger('Mastiff.Conf.SetVar') log.error('Could not find "%s": "%s"', section, var) return None def override_option(self, override): """ Override an option from the config file. Note that if the option does not exist, it will be added. """ log = logging.getLogger('Mastiff.Conf.override') options = override.split('=') section = options[0].split('.') if len(options) != 2 or len(section) != 2: log.error('Invalid override option: %s' % override) return False log.info('Overriding option: %s.%s=%s' % (section[0], section[1], options[1])) if self.set_var(section[0], section[1], options[1]) is None: return False def list_config(self): """Print all variables read in.""" print "Configuration Options:" for section in self.config.sections(): print "%s" % (section) for (name, value) in self.config.items(section): print "\t%s:\t%s" % (name, value) return def dump_config(self): """ Dump a copy of the config into the Mastiff log dir. """ log = logging.getLogger('Mastiff.Conf.Dump') out_dir = self.get_var('Dir', 'log_dir') try: with open(out_dir + os.sep + 'mastiff-run.config', 'w') as dump_file: self.config.write(dump_file) except ConfigParser.Error, err: log.error('Unable to dump config file: %s', err) ================================================ FILE: mastiff/core.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ MASTIFF - MAlicious Static Inspection File Framework This module implements the primary class for static analysis inspection. Mastiff member variables: cat_paths: List that contains the path to the category plug-ins. plugin_paths: List that contains the paths to the analysis plug-ins. filetype: Dictionary used to store the output from the file-type identification functions. file_name: full path to the file being analyzed. hashes: Tuple of the MD5, SHA1 and SHA256 hashes of the file being analyzed. This is also stored in the configuration file. db: Sqlite3 Connection class to the database file. cat_list: List that contains all of the category plug-ins to be used during analysis. activated_plugins: List that contains all of the plug-ins that have been activated. This order of the plug-ins in this list is the order they will run. cat_manager: Yapsy PluginManager class that manages the category plug-ins. plugin_manager: Yapsy PluginManager class that manages the analysis plug-ins. Mastiff member functions: __init__(self, config_file=None, fname=None, loglevel=logging.INFO, override=None) The initialization function of the class. This function will initialize all of the member variables, set up logging, read in and store the configuration file, and find and load all plug-ins. init_file(self, fname) This function validates the filename being analyzed to ensure it exists and can be accessed, sets up the directory that all output will be logged into, and adds initial file information into the database. set_filetype(self, fname=None, ftype=None) Calls the file-type identification helper functions in mastiff/filetype.py, and loops through all of the category plug-ins to determine which ones will analyze the file. validate(self, name, plugin) Validates an analysis plug-in to ensure that it contains the correct functions. activate_plugins(self, single_plugin=None) Loops through all analysis plug-ins for category classes relevant to the file type being examined and ensures they are valid. If validated, the analysis plug-in is activated. This function also ensures that any pre-requisite plug-ins have been activated. analyze(self, fname=None, single_plugin=None) Ensures the file type of the file is set up and loops through all activated analysis plug-ins and calls their analyze() function. list_plugins(self, type='analysis') Helper function that loops through all available plug-ins and prints out their name, path and description. The function can print out analysis or category plug-in information. """ __version__ = "$Id: ace95027e1cc1f56614eaa0fc86d67b5c4aed8bb $" import sys import os import logging import hashlib from shutil import copyfile from operator import attrgetter import simplejson if sys.version_info < (2, 6, 6): sys.stderr.write("Mastiff requires python version 2.6.6") sys.exit(1) try: from yapsy.PluginManager import PluginManager except ImportError, err: print "Yapsy not installed or accessible: %s" % err sys.exit(1) import mastiff.conf as Conf import mastiff.filetype as FileType import mastiff.sqlite as DB import mastiff.plugins.category.categories as Cats import mastiff.plugins.analysis as analysis import mastiff.plugins.output as masOutput class Mastiff: """Primary class for the static analysis inspection framework.""" def __init__(self, config_file=None, fname=None, loglevel=logging.INFO, override=None): """Initialize variables.""" # configure logging for Mastiff module format_ = '[%(asctime)s] [%(levelname)s] [%(name)s] : %(message)s' logging.basicConfig(format=format_) log = logging.getLogger("Mastiff") log.setLevel(loglevel) if log.handlers: log.handlers = [] # read in config file self.config = Conf.Conf(config_file, override=override) # make sure base logging dir exists log_dir = self.config.get_var('Dir','log_dir') log_dir = os.path.abspath(os.path.expanduser(log_dir)) if not os.path.isdir(log_dir): try: os.makedirs(log_dir) except OSError, err: log.error('Could not make %s: %s. Exiting.', log_dir, err) sys.exit(1) self.config.set_var('Dir', 'base_dir', log_dir) # set up file to log output to fh = logging.FileHandler(log_dir + os.sep + 'mastiff.log' ) fh.setFormatter(logging.Formatter(format_)) log.addHandler(fh) fh.setLevel(loglevel) # verbose logging set in the config and not command line? if self.config.get_bvar('Misc','verbose') == True and \ loglevel != logging.ERROR: log.setLevel(logging.DEBUG) fh.setLevel(logging.DEBUG) # get path to category plugins self.cat_paths = [ os.path.dirname(Cats.__file__) ] self.output_paths = [ os.path.dirname(masOutput.__file__) ] # convert plugin paths to list self.plugin_paths = [ os.path.dirname(analysis.__file__)] # strip whitespace from dirs for tmp in str(self.config.get_var('Dir','plugin_dir')).split(','): if tmp: self.plugin_paths.append(os.path.expanduser(tmp.lstrip().rstrip())) # do the same for output plugins for tmp in str(self.config.get_var('Dir','output_plugin_dir')).split(','): if tmp: self.output_paths.append(os.path.expanduser(tmp.lstrip().rstrip())) self.filetype = dict() self.file_name = None self.hashes = None self.cat_list = list() self.activated_plugins = list() # Build the managers self.cat_manager = PluginManager() self.plugin_manager = PluginManager() self.output_manager = PluginManager() # Find and load all category plugins cat_filter = dict() self.cat_manager.setPluginPlaces(self.cat_paths) self.cat_manager.collectPlugins() # Import all of the modules for the categories so we can access # their classes. for pluginInfo in self.cat_manager.getAllPlugins(): log.debug('Found category: %s', pluginInfo.name) try: mod_name = "mastiff.plugins.category.%s" % \ os.path.basename(pluginInfo.path) cat_mod = __import__(mod_name, fromlist=["mastiff.plugins.category"]) except ImportError, err: log.error("Unable to import category %s: %s", pluginInfo.name, err) self.cat_manager.deactivatePluginByName(pluginInfo.name) continue else: # We were able to import it, activate it self.cat_manager.activatePluginByName(pluginInfo.name) log.debug("Activated category: %s", pluginInfo.name) # Cat is imported, add class to the category filter # cat_filter will be a dict in the form: # { cat_name: cat_class } # and contains all the category plugins that have been activated cat_class = getattr(cat_mod, pluginInfo.plugin_object.__class__.__name__) cat_filter.update({pluginInfo.plugin_object.cat_name: cat_class}) #log.debug("Category Filters: %s", cat_filter) # Now collect and load all analysis plugins self.plugin_manager.setPluginPlaces(self.plugin_paths) self.plugin_manager.setCategoriesFilter( cat_filter ) self.plugin_manager.collectPlugins() # Finally collect all output plugins self.output_manager.setPluginPlaces(self.output_paths) self.output_manager.collectPlugins() # set up database self.db = DB.open_db_conf(self.config) DB.create_mastiff_tables(self.db) # set up the output object self.output = dict() # init the filename if we have it if fname is not None: self.init_file(fname) def __del__(self): """ Class destructor. """ # Close down all logging file handles so we don't have any open file descriptors log = logging.getLogger("Mastiff") handles = list(log.handlers) for file_handle in handles: log.removeHandler(file_handle) file_handle.close() def init_file(self, fname): """ Validate the filename to ensure it can be accessed and set up class variables. This function is called when a filename is given or can be called directly. """ log = logging.getLogger("Mastiff.Init_File") if fname is None: return None try: with open(fname, 'rb') as my_file: data = my_file.read() except IOError, err: log.error("Could not open file: %s", err) return None self.file_name = fname # create tuple of md5, sha1 and sha256 hashes self.hashes = hashlib.md5(data).hexdigest(), \ hashlib.sha1(data).hexdigest(), \ hashlib.sha256(data).hexdigest() self.config.set_var('Misc', 'hashes', self.hashes) self.output[self.hashes] = dict() # update log_dir log_dir = os.path.abspath(os.path.expanduser(self.config.get_var('Dir','log_dir'))) + \ os.sep + \ self.hashes[0] self.config.set_var('Dir', 'log_dir', log_dir) # create log dir if not os.path.exists(log_dir): try: os.makedirs(log_dir) except OSError, err: log.error('Could not make %s: %s. Exiting.', log_dir, err) sys.exit(1) # lets set up the individual log file # we may miss out on a couple prior logs, but thats OK log = logging.getLogger('Mastiff') fh = logging.FileHandler(log_dir + os.sep + 'mastiff.log' ) format_ = '[%(asctime)s] [%(levelname)s] [%(name)s] : %(message)s' fh.setFormatter(logging.Formatter(format_)) log.addHandler(fh) fh.setLevel(logging.INFO) log = logging.getLogger("Mastiff.Init_File") log.info('Analyzing %s.', self.file_name) log.info("Log Directory: %s", log_dir) # copy file to the log directory if self.config.get_bvar('Misc', 'copy') is True: try: copyfile(self.file_name, log_dir + os.sep + os.path.basename(self.file_name) + '.VIR') except IOError, err: log.error('Unable to copy file: %s', err) log.debug('Copied file to log directory.') else: log.debug('Configuration set to not copy file.') # add entry to database if it exists if self.db is not None: log.debug('Adding entry to database.') DB.insert_mastiff_item(self.db, self.hashes) return self.hashes def activate_plugins(self, single_plugin=None): """ Activate all plugins that are in the categories we selected. If single_plugin is given, only activate that plug-in. Note: File Information plug-in is ALWAYS run. """ has_prereq = list() for cats in self.cat_list: log = logging.getLogger('Mastiff.Plugins.Activate') log.debug('Activating plugins for category %s.', cats) self.output[self.hashes][cats] = dict() for plugin in self.plugin_manager.getPluginsOfCategory(cats): # check if we are running a single plugin - file information always gets run if single_plugin is not None and single_plugin != plugin.name and plugin.name != 'File Information': continue plugin.plugin_object.set_name(plugin.name) log.debug('Validating plugin "%s"', plugin.name) # if the plugin validates, try to activate it if self.validate(plugin.name, plugin.plugin_object) == True: if plugin.plugin_object.prereq is not None: # this plugin has a pre-req, can't activate yet has_prereq.append([cats, plugin]) else: log.debug('Activating "%s".', plugin.name) self.plugin_manager.activatePluginByName(plugin.name, cats) self.activated_plugins.append(plugin) else: log.debug("Removing plugin %s %s.", plugin.name, cats) self.plugin_manager.deactivatePluginByName(plugin.name, cats) # now try to activate any plug-ins that have pre-reqs flag = True while flag is True: flag = False for plugins in has_prereq: # check to see if the pre-req in in the activated list inact = [p for p in self.activated_plugins if p.name == plugins[1].plugin_object.prereq] if len(inact) > 0: # our pre-req has been activated, we can activate ourself log.debug('Activating "%s". Pre-req fulfilled.', plugins[1].name) self.plugin_manager.activatePluginByName(plugins[1].name, plugins[0]) self.activated_plugins.append(plugins[1]) has_prereq.remove(plugins) flag = True # list out any plugins that were not activated due to missing pre-reqs for plugins in has_prereq: log.debug("Plugin %s not activated due to missing pre-req \"%s.\"" % \ (plugins[1].name, plugins[1].plugin_object.prereq )) # finally activate the output plugins for plugin in self.output_manager.getAllPlugins(): plugin.plugin_object.set_name(plugin.name) log.debug('Activating Output Plug-in "{}"'.format(plugin.name)) self.output_manager.activatePluginByName(plugin.name) #self.activated_plugins.append(plugin) def list_plugins(self, ctype='analysis'): """Print out a list of analysis or cat plugins.""" if ctype == 'analysis': # analysis plug-ins print "Analysis Plug-in list:\n" print "%-25s\t%-15s\t%-25s\n%-50s" % \ ("Name", "Category", "Description", "Path") print '-' * 80 for plugin in sorted(self.plugin_manager.getAllPlugins(), key=attrgetter('plugin_object.cat_name', 'name')): print "%-25s\t%-15s\t%-12s\n%-80s\n" % \ (plugin.name, plugin.plugin_object.cat_name, \ plugin.description, plugin.path) elif ctype == 'cat': print "Category Plug-in list:\n" print "%-25s\t%-15s\t%-s" % ("Name", "FType", "Description") print '-' * 80 # category plug-ins for plugin in sorted(self.cat_manager.getAllPlugins(), key=attrgetter('name')): print "%-25s\t%-15s\t%-s" % \ (plugin.name, plugin.plugin_object.cat_name, plugin.description) elif ctype == 'output': print "Output Plug-in list:\n" print "%-25s\t%-s\n%s" % ("Name", "Description", "Path") print '-' * 80 # category plug-ins for plugin in sorted(self.output_manager.getAllPlugins(), key=attrgetter('name')): print "%-25s\t%-s\n%-80s\n" % \ (plugin.name, plugin.description, plugin.path) else: print "Unknown plugin type." def set_filetype(self, fname=None, ftype=None): """ Calls the filetype functions and loops through the category plug-ins to see which ones will handle this file. """ log = logging.getLogger('Mastiff.FileType') if fname is None and self.file_name is None: log.error("No file to analyze has been specified. Exiting.") sys.exit(1) elif fname is not None and self.file_name is None: if self.init_file(fname) is None: log.error("ERROR accessing file. Exiting.") sys.exit(1) if self.cat_list: # if self.cat_list is already set, assume that we've already # gone through this function return self.filetype if ftype is not None: # we are forcing a file type to run log.info('Forcing category plug-in "%s" to be added.', ftype) self.cat_list.append(ftype) # Grab the magic file type of the file. This is done here so as not # to do it in every category plug-in. self.filetype['magic'] = FileType.get_magic(self.file_name) # Grab the TrID type trid_opts = self.config.get_section('File ID') self.filetype['trid'] = list() if trid_opts['trid']: self.filetype['trid'] = FileType.get_trid(self.file_name, trid_opts['trid'], trid_opts['trid_db']) # Cycle through all of the categories and see if they should be added # to the list of categories to be run. for pluginInfo in self.cat_manager.getAllPlugins(): cat_name = pluginInfo.plugin_object.is_my_filetype(self.filetype, self.file_name) log.debug('Checking cat %s for filetype.', pluginInfo.name) if cat_name is not None: # cat_list contains analysis plugin categories to be used self.cat_list.append(cat_name) log.debug('Adding %s to plugin selection list.', cat_name) # add file type to the DB if self.db is not None: DB.insert_mastiff_item(self.db, self.hashes, self.cat_list) return self.filetype def validate(self, name, plugin): """Return false if a plugin does not have the correct functions.""" log = logging.getLogger('Mastiff.Plugins.Validate') try: callable(plugin.activate) except AttributeError: log.error("%s missing activate function.", name) return False try: callable(plugin.deactivate) except AttributeError: log.error("%s missing deactivate function.", name) return False try: callable(plugin.analyze) except AttributeError: log.error("%s missing analyze function.", name) return False return True def analyze(self, fname=None, single_plugin=None): """Perform analysis on a given filename.""" log = logging.getLogger('Mastiff.Analysis') if fname is None and self.file_name is None: log.error("No filename specified. Exiting.") sys.exit(1) elif fname is not None and self.file_name is None: # first time seeing the file, initialize it if self.init_file(fname) is None: log.error("ERROR accessing file. Exiting.") return False # set the file_type ftype = self.set_filetype() log.info('File categories are %s.', self.cat_list) if not self.filetype: log.error("The file type has not been set. Exiting.") sys.exit(1) # activate the plugins self.activate_plugins(single_plugin) for plugin in self.activated_plugins: # skip if plugin is not activated if plugin.is_activated == False: continue log.debug('Calling plugin "%s".', plugin.name) # set the output results to be an attribute of the plugin so it can analyze it setattr(plugin.plugin_object, 'results', self.output[self.hashes]) # analyze the plugin - if plugin is compliant with universal output # its output will be returned plug_out = plugin.plugin_object.analyze(self.config, self.file_name) if plug_out is not False and plug_out is not None and isinstance(plug_out, masOutput.page): # add the plugin output to its own entry self.output[self.hashes][plugin.plugin_object.cat_name][plugin.plugin_object.name] = plug_out # go through output plugins and output the data for plugin in self.output_manager.getAllPlugins(): plugin.plugin_object.output(self.config, self.output) self.config.dump_config() log.info('Finished analysis for %s.', self.file_name) # end class mastiff ================================================ FILE: mastiff/filetype.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ File Type Analysis Functions The functions within this module provide the functionality to help determine the type of file given to it. This module now supports the use of two different type of libmagic Python libraries: - The libmagic Python library maintained with file (ftp://ftp.astron.com/pub/file/). This is the version installed via most Debian-based repositories. - ahupp's python-magic repostitory installed via pip. (https://github.com/ahupp/python-magic) """ __version__ = "$Id: 82df116d3435226d15057b63acbed2b77919a52d $" import magic import logging import subprocess import re import os try: import yara except ImportError, error: print "Could not import yara: %s" % error def get_magic(file_name): """ Determine the file type of a given file based on its magic result.""" log = logging.getLogger('Mastiff.FileType.Magic') try: # try to use magic from the file source code magic_ = magic.open(magic.MAGIC_NONE) magic_.load() try: file_type = magic_.file(file_name) except: log.error('Could not determine magic file type.') return None magic_.close() except AttributeError: # Now we are trying ahupps magic library try: file_type = magic.from_file(file_name) except AttributeError: log.error('No valid magic libraries installed.') return None except MagicException: log.error('Cound not determing magic file type.') return None log.debug('Magic file type is "%s"', file_type) return file_type def get_trid(file_name, trid, trid_db): """ DEPRECATING: RECOMMENDED NOT TO USE TrID is a file identification tool created by Marco Pontello. Unfortunately, TrID does not have a Linux library we can use, so we will run the program and store its results. file_name: file to analyze trid = path to trid binary trid_db = path to trid database Returns a list of the hits from TrID. Each item of the returned list will contain a list with [ percentage, description ] """ log = logging.getLogger('Mastiff.FileType.TrID') pattern = '^\s*([0-9\.]+)\% \([\w\.]+\) ([\S\s]+) \([0-9\/]+\)$' results = list() # if files don't exist, return empty list if not os.path.isfile(trid) or not os.path.isfile(trid_db): log.warning('TrID cannot be found. Skipping TrID file type detection.') return results trid_db = '-d:' + trid_db # TrID has a bug in it where it can't open a file it it begins with "./" # remove that if file_name.startswith('./'): file_name = file_name[2:] try: run = subprocess.Popen([trid] + [trid_db] + [file_name], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) except subprocess.CalledProcessError, err: log.error('Could not run TrID: %s', err) return results except OSError, err: log.error('Could not run TrID: %s', err) return results (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Error running TrID: %s' % error) return results data = [ re.match(pattern, line) for line in output.split('\n') ] # create a list of hits # each item in results will be [ percentage, description ] results = [ [float(match.group(1)), match.group(2)] \ for match in data \ if match is not None ] log.debug('TrID types are: %s', results) return results def yara_typecheck(filename, yara_rule): """ Check for file type based on yara rule. Returns True if found, False otherwise. """ log = logging.getLogger('Mastiff.FileType.Yara') if yara_rule is None: return False try: rules = yara.compile(source=yara_rule) except yara.SyntaxError, err: log.error('Rule Error: %s', error) return False except: log.error("Error attempting to perform Yara filetype.") return False try: matches = rules.match(filename, timeout=10) except yara.Error, err: log.error('Yara error: %s', err) return False if len(matches) > 0: log.debug('File Type matches rule %s', matches[0].rule) return True return False if __name__ == '__main__': import sys if len(sys.argv) > 1: print get_magic(sys.argv[1]) ================================================ FILE: mastiff/plugins/__init__.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ This file contains a number of helper functions for misc. tasks the plug-ins may want to use. """ __version__ = "$Id: 3fc4dad80994edc30d0dfd81ecadcca67bb486a9 $" import httplib, mimetypes import binascii """ The following are taken from http://code.activestate.com/recipes/146306/ and are used to allow the uploading of files to multipart forms. """ def post_multipart(host, method, selector, fields, files): """ Post fields and files to an http host as multipart/form-data. fields is a sequence of (name, value) elements for regular form fields. files is a sequence of (name, filename, value) elements for data to be uploaded as files Return the server's response page. """ content_type, body = encode_multipart_formdata(fields, files) if method.startswith('https') is True: h = httplib.HTTPSConnection(host) else: h = httplib.HTTP(host) h.putrequest('POST', selector) h.putheader("User-Agent", 'MASTIFF Statis Analysis Framework') h.putheader('Content-Type', content_type) h.putheader('Content-Length', str(len(body))) h.endheaders() h.send(body) myresponse = h.getresponse().read() return myresponse def encode_multipart_formdata(fields, files): """ fields is a sequence of (name, value) elements for regular form fields. files is a sequence of (name, filename, value) elements for data to be uploaded as files Return (content_type, body) ready for httplib.HTTP instance """ BOUNDARY = '----------MASTIFF_STATIC_ANALYSIS_FRAMEWORK$' CRLF = '\r\n' L = [] for (key, value) in fields: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"' % key) L.append('') L.append(value) for (key, filename, value) in files: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)) L.append('Content-Type: %s' % get_content_type(filename)) L.append('') L.append(value) L.append('--' + BOUNDARY + '--') L.append('') body = CRLF.join(L) content_type = 'multipart/form-data; boundary=%s' % BOUNDARY return content_type, body def get_content_type(filename): """ Returns MIME type for the file. """ return mimetypes.guess_type(filename)[0] or 'application/octet-stream' def bin2hex(data): """ Goes through data and turns any binary characters into its hex equivalent. """ hexstring = '' for letter in data: if ord(letter) <= 31 or ord(letter) >= 127: hexstring += '\\x' + binascii.hexlify(letter) else: hexstring += letter return hexstring def printable_str(string): """ Helper function to convert non-printable chars to its ASCII format """ new_str = '' for char in string: if ord(char) >= 32 and ord(char) <= 126: new_str = new_str + char else: new_str = new_str + (r'\x%02x' % ord(char)) return new_str ================================================ FILE: mastiff/plugins/analysis/EXE/EXE-peinfo.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ PE Info plugin Plugin Type: EXE Purpose: Dump information on the PE structure of the given executable. This is done using pefile's dump_info() API. It is not structured in any way. Sample code from the pefile and Didier Stevens pecheck.py was used or referenced for this plug-in. Output: - peinfo-quick.txt - contains minimal information that analysts may find useful. - peinfo-full.txt - contains full information on the file. Requirements: - pefile library (http://code.google.com/p/pefile/) """ __version__ = "$Id: 7dd537f22578be78ca7e142ea73a7ebe4e2163d5 $" import logging import os import time import sys try: import pefile except ImportError, err: print ("Unable to import pefile: %s" % err) from mastiff.plugins import printable_str import mastiff.plugins.category.exe as exe class PEInfo(exe.EXECat): """Dumps PE information.""" def __init__(self): """Initialize the plugin.""" exe.EXECat.__init__(self) def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') try: pe = pefile.PE(filename) except: log.error('Unable to parse PE file: %s' % sys.exc_info()[1]) return False if not self.output_file_quick(config.get_var('Dir','log_dir'), pe) or not self.output_file_full(config.get_var('Dir','log_dir'), pe): return False return True @staticmethod def _dump_section_headers(pe): """ Small internal function to dump the section headers in a table. Returns a string to do so. """ section_string = '' section_flags = pefile.retrieve_flags(pefile.SECTION_CHARACTERISTICS, 'IMAGE_SCN_') section_string += '\nNumber of Sections: %d\n' % pe.FILE_HEADER.NumberOfSections section_string += '{0:15} {1:8} {2:40}\n'.format('Section Name', 'Entropy', 'Flags') section_string += '-'*65 + '\n' for section in pe.sections: # thanks to the pefile example code for this flags = [] for flag in section_flags: if getattr(section, flag[0]): flags.append(flag[0]) # the following line was taken from Didier Steven's pecheck.py code section_string += '{0:15} {1:<8.5} {2:40}\n'.format(''.join(filter(lambda c:c != '\0', str(section.Name))), \ section.get_entropy(), ', '.join(flags)) section_string += '\n' return section_string def output_file_quick(self, outdir, pe): """Output short, useful information on file.""" log = logging.getLogger('Mastiff.Plugins.' + self.name + '.quick') try: outfile = open(outdir + os.sep + 'peinfo-quick.txt', 'w') outfile.write('PE Header Information\n\n') outfile.write('Quick Info:\n\n') try: outfile.write('TimeDateStamp: %s\n' % time.asctime(time.gmtime(pe.FILE_HEADER.TimeDateStamp))) except ValueError: outfile.write('TimeDataStamp: Invalid Time %x\n' % (pe.FILE_HEADER.TimeDateStamp)) outfile.write('Subsystem: %s\n' % pefile.SUBSYSTEM_TYPE[pe.OPTIONAL_HEADER.Subsystem]) outfile.write(self._dump_section_headers(pe)) # any parsing warnings (often related to packers outfile.write('\nParser Warnings:\n') for warning in pe.get_warnings(): outfile.write('- ' + warning + '\n') # file info - thx to Ero Carrera for sample code # http://blog.dkbza.org/2007/02/pefile-parsing-version-information-from.html outfile.write('\nFile Information:\n') if hasattr(pe, "FileInfo"): for fileinfo in pe.FileInfo: if fileinfo.Key == 'StringFileInfo': for string_entry in fileinfo.StringTable: for entry in string_entry.entries.items(): outfile.write("{0:20}:\t{1:40}\n".format(printable_str(entry[0]), \ printable_str(entry[1]))) if fileinfo.Key == 'VarFileInfo': try: for var in fileinfo.Var: outfile.write("{0:20}:\t{1:40}\n".format(printable_str(var.entry.items()[0][0]), printable_str(var.entry.items()[0][1]))) except: # there are times when a VarFileInfo structure may be present, but empty pass else: outfile.write('No file information present.\n') # imports outfile.write('\nImports:\n') if hasattr(pe, "DIRECTORY_ENTRY_IMPORT"): outfile.write('{0:20}\t{1:30}\t{2:10}\n'.format('DLL', 'API', 'Address')) outfile.write('-'*70 + '\n') for entry in pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: outfile.write('{0:20}\t{1:30}\t{2:10}\n'.format(entry.dll, imp.name, hex(imp.address))) else: outfile.write('No imports.\n') # exports outfile.write('\nExports:\n') if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"): outfile.write('{0:20}\t{1:10}\t{2:10}\n'.format('Name', 'Address', 'Ordinal')) outfile.write('-'*50 + '\n') for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: outfile.write('{0:20}\t{1:10}\t{2:10}\n'.format(exp.name, \ hex(pe.OPTIONAL_HEADER.ImageBase + exp.address),\ exp.ordinal)) else: outfile.write('No Exports.\n') outfile.close() except IOError, err: log.error('Cannot write to peinfo.txt: %s' % err) return False except pefile.PEFormatError, err: log.error('Unable to parse PE file: %s' % err) return False return True def output_file_full(self, outdir, pe): """Output full information on file.""" log = logging.getLogger('Mastiff.Plugins.' + self.name + '.full') try: outfile = open(outdir + os.sep + 'peinfo-full.txt', 'w') outfile.write('\nFull Information Dump:\n') outfile.write(self._dump_section_headers(pe)) outfile.write(pe.dump_info()) outfile.close() except IOError, err: log.error('Cannot write to peinfo.txt: %s' % err) return False except: log.error('Unable to parse PE file.') return False return True ================================================ FILE: mastiff/plugins/analysis/EXE/EXE-peinfo.yapsy-plugin ================================================ [Core] Name = PE Info Module = EXE-peinfo [Documentation] Description = Dump information on the PE header and structure of an executable. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/EXE/EXE-resources.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ PE Resources Plug-in Plugin Type: EXE Purpose: This plug-in obtains information on any resources contained within the Windows EXE and extracts them. More information on how resources are stored can be found in the Microsoft PE and COFF Specification document. http://msdn.microsoft.com/library/windows/hardware/gg463125 Thanks to Ero Carrera for creating the pefile library, whose code helped understand how to process resources. Output: resources.txt - File containing a list of all resources in the EXE and any associated information. log_dir/resource - Directory containing any extracted resource. Pre-requisites: - pefile library (http://code.google.com/p/pefile/) """ __version__ = "$Id: 519a2014141003f89b18bb5c3de571729a952f8e $" import logging import os import time try: import pefile except ImportError, err: print ("Unable to import pefile: %s" % err) import mastiff.plugins.category.exe as exe class EXE_Resources(exe.EXECat): """EXE Resources plugin code.""" def __init__(self): """Initialize the plugin.""" exe.EXECat.__init__(self) self.resources = list() self.pe = None self.output = dict() def analyze_dir(self, directory, prefix='', _type='', timedate=0): """ Analyze a resource directory and obtain all of its items.""" log = logging.getLogger('Mastiff.Plugins.' + self.name + '.analyze') # save the timedate stamp timedate = directory.struct.TimeDateStamp for top_item in directory.entries: if hasattr(top_item, 'data'): # at the language level that contains all of our information resource = dict() resource['Id'] = prefix resource['Type'] = _type # store the offset as the offset within the file, not the RVA! try: resource['Offset'] = self.pe.get_offset_from_rva(top_item.data.struct.OffsetToData) resource['Size'] = top_item.data.struct.Size resource['Lang'] = [ pefile.LANG.get(top_item.data.lang, '*unknown*'), \ pefile.get_sublang_name_for_lang( top_item.data.lang, top_item.data.sublang ) ] resource['TimeDate'] = timedate except pefile.PEFormatError, err: log.error('Error grabbing resource \"%s\" info: %s' % (prefix, err)) return False self.resources.append(resource) log.debug('Adding resource item %s' % resource['Id']) elif hasattr(top_item, 'directory'): if top_item.name is not None: # in a name level if len(prefix) == 0: newprefix = prefix + str(top_item.name) else: newprefix = ', '.join([prefix, str(top_item.name)]) else: # if name is blank, we are in a Type level if len(prefix) == 0: newprefix = 'ID ' + str(top_item.id) _type = pefile.RESOURCE_TYPE.get(top_item.id) else: newprefix = ', '.join([prefix, 'ID ' + str(top_item.id)]) # we aren't at the end, recurse self.analyze_dir(top_item.directory, prefix=newprefix, _type=_type) def extract_resources(self, log_dir, filename): """ Extract any resources from the file and put them in the resources dir. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.extract') if len(self.resources) == 0: # no resources return False # create the dir if it doesn't exist log_dir = log_dir + os.sep + 'resources' if not os.path.exists(log_dir): try: os.makedirs(log_dir) except IOError, err: log.error('Unable to create dir %s: %s' % (log_dir, err)) return False try: my_file = open(filename, 'rb') except IOError, err: log.error('Unable to open file.') return False file_size = os.path.getsize(filename) # cycle through resources and extract them for res_item in self.resources: # check to make sure we won't go past the EOF if (res_item['Offset'] + res_item['Size']) > file_size: log.error('File is smaller than resource location. Could be a packed file.') continue my_file.seek(res_item['Offset']) data = my_file.read(res_item['Size']) out_name = res_item['Id'].replace('ID ', '_').replace(', ', '_').lstrip('_') if res_item['Type'] is not None and len(res_item['Type']) > 0: out_name += '_' + res_item['Type'] with open(log_dir + os.sep + out_name, 'wb') as out_file: log.debug('Writing %s to %s.' % (res_item['Id'], out_name)) out_file.write(data) out_file.close() my_file.close() return True def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') try: self.pe = pefile.PE(filename) except pefile.PEFormatError, err: log.error('Unable to parse PE file: %s' % err) return False if not hasattr(self.pe, 'DIRECTORY_ENTRY_RESOURCE'): log.info('No resources for this file.') return False # parse the directory structure self.analyze_dir(self.pe.DIRECTORY_ENTRY_RESOURCE) self.output['metadata'] = { } self.output['data'] = dict() if len(self.resources) == 0: log.info('No resources could be found.') else: # output data to file and extract resources self.gen_output(config.get_var('Dir','log_dir')) self.output_file(config.get_var('Dir','log_dir')) self.extract_resources(config.get_var('Dir','log_dir'), filename) return self.output def gen_output(self, outdir): """ Generate the output to send back. """ self.output['data']['resources'] = list() self.output['data']['resources'].append([ 'Name/ID', 'Type', 'File Offset', 'Size', 'Language', 'Time Date Stamp']) for item in sorted(self.resources, key=lambda mydict: mydict['Offset']): lang = ', '.join(item['Lang']).replace('SUBLANG_', '').replace('LANG_', '') my_time = time.asctime(time.gmtime(item['TimeDate'])) self.output['data']['resources'].append([ item['Id'], item['Type'], hex(item['Offset']), hex(item['Size']), lang, my_time ]) return True def output_file(self, outdir): """Print output from analysis to a file.""" log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output') try: outfile = open(outdir + os.sep + 'resources.txt', 'w') outfile.write('Resource Information\n\n') except IOError, err: log.error('Could not open resources.txt: %s' % err) return False outstr = '{0:20} {1:15} {2:15} {3:8} {4:<30} {5:<25}\n'.format( \ 'Name/ID', 'Type', 'File Offset', 'Size', 'Language', 'Time Date Stamp') outfile.write(outstr) outfile.write('-' * len(outstr) + '\n') for item in sorted(self.resources, key=lambda mydict: mydict['Offset']): lang = ', '.join(item['Lang']).replace('SUBLANG_', '').replace('LANG_', '') my_time = time.asctime(time.gmtime(item['TimeDate'])) outstr = '{0:20} {1:15} {2:<15} {3:<8} {4:30} {5:<25}\n'.format(item['Id'], item['Type'], hex(item['Offset']), hex(item['Size']), lang, my_time) outfile.write(outstr) return True ================================================ FILE: mastiff/plugins/analysis/EXE/EXE-resources.yapsy-plugin ================================================ [Core] Name = Resources Module = EXE-resources [Documentation] Description = Obtain information on and extract PE resources. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/EXE/EXE-sig.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ PE Digital Signature Plugin Type: EXE Purpose: This plug-in extracts any digital signatures from a PE executable and converts it to both DER and text format. Extraction is performed using the disitool.py tool from Didier Stevens. Many thanks to him for permission to use it. Conversion to text is performed using the openssl program. Validation of the signature is not yet done. Pre-requisites: - pefile library (http://code.google.com/p/pefile/) - disitool.py (http://blog.didierstevens.com/programs/disitool/) - openssl binary (http://www.openssl.org/) Configuration file: [Digital Signatures] # Options to extract the digital signatures # # disitool - path to disitool.py script. # openssl - path to openssl binary disitool = /usr/local/bin/disitool.py openssl = /usr/bin/openssl Output: sig.der - DER version of Authenticode signature. sig.txt - Text representation of signature. TODO: - Validate the signature. """ __version__ = "$Id: c0be897e44fd598577a3739b7b978b52a0e8c997 $" import logging import os import subprocess import sys from cStringIO import StringIO import pefile # Change the following line to import the category class you for the files # you wish to perform analysis on import mastiff.plugins.category.exe as exe class EXESig(exe.EXECat): """PE digital signature analysis plugin.""" def __init__(self): """Initialize the plugin.""" exe.EXECat.__init__(self) def activate(self): """Activate the plugin.""" exe.EXECat.activate(self) def deactivate(self): """Deactivate the plugin.""" exe.EXECat.deactivate(self) def dump_sig_to_text(self, log_dir, openssl): """ Convert a DER signature to its text format and writes it out.""" log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output_sig') der_file = log_dir + os.sep + 'sig.der' # check to see if file exists if os.path.exists(der_file) == False: log.error('Cannot find DER file: %s' % der_file) return False elif openssl is None or os.path.exists(openssl) is False: log.error('Cannot open openssl binary: %s' % openssl) return False cmd = [openssl, 'pkcs7', '-inform', 'DER', '-print_certs', '-text', '-in', der_file] run = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Error running openssl: %s' % error) return False if output is not None: with open(log_dir + os.sep + 'sig.txt', 'w') as out_file: log.debug('Signature converted to text.') out_file.write(output) out_file.close() return True def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # get my config options sig_opts = config.get_section(self.name) # import disitool disitool_path = config.get_var(self.name, 'disitool') if disitool_path is None: log.error('disitool.py path is empty.') return False elif os.path.exists(disitool_path) == False: log.error('disitool.py does not exist: %s' % disitool_path) return False sys.path.append(os.path.dirname(disitool_path)) try: try: reload(disitool) except: import disitool except ImportError, err: log.error('Unable to import disitool: %s' % err) return False # extract sig # turn off stdout bc disitool.ExtractDigitalSignature is noisy try: old_stdout = sys.stdout sys.stdout = StringIO() sig = disitool.ExtractDigitalSignature(str(filename), \ config.get_var('Dir','log_dir') + os.sep + 'sig.der') sys.stdout = old_stdout except pefile.PEFormatError, err: log.error('Unable to extract signature: %s' %err) return False if sig is None: log.info("No signature on the file.") else: log.info("Signature extracted.") if sig_opts['openssl'] is None: log.error('openssl binary not present. Not converting signature.') else: # convert the sig to text self.dump_sig_to_text(config.get_var('Dir','log_dir'), config.get_var(self.name, 'openssl')) return True ================================================ FILE: mastiff/plugins/analysis/EXE/EXE-sig.yapsy-plugin ================================================ [Core] Name = Digital Signatures Module = EXE-sig [Documentation] Description = Extract PE digital signatures. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/EXE/EXE-singlestring.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Single-byte string plug-in Plugin Type: EXE Purpose: Attackers have begun to obfuscate embedded strings by moving a single byte at a time into a character array. In assembler, it looks like: mov mem, 0x68 mov mem+4, 0x69 mov mem+8, 0x21 ... Using a strings program, these strings will not be found. This script looks for any strings embedded in this way and prints them out. It does this by looking through the file for C6 opcodes, which are the start of the "mov mem/reg, imm" instruction. It will then decode it, grab the value and create a string from it. Requirements: - distorm3 (http://code.google.com/p/distorm/) Output: None """ __version__ = "$Id: 6322146c8d971464c6f726ebdba3a3d7a2540028 $" import logging import re import os try: from distorm3 import Decode, Decode32Bits except ImportError, err: print "EXE-SingleString: Could not import distorm3: %s" % error import mastiff.plugins.category.exe as exe # Change the class name and the base class class SingleString(exe.EXECat): """Extract single-byte strings from an executable.""" def __init__(self): """Initialize the plugin.""" exe.EXECat.__init__(self) self.length = 3 self.raw = False def activate(self): """Activate the plugin.""" exe.EXECat.activate(self) def deactivate(self): """Deactivate the plugin.""" exe.EXECat.deactivate(self) def findMov(self, filename): """ look through the file for any c6 opcode (mov reg/mem, imm) when it finds one, decode it and put it into a dictionary """ #log = logging.getLogger('Mastiff.Plugins.' + self.name + '.findMov') f = open(filename,'rb') offset = 0 instructs = {} mybyte = f.read(1) while mybyte: if mybyte == "\xc6": # found a mov op - decode and record it f.seek(offset) mybyte = f.read(16) # p will come back as list of (offset, size, instruction, hexdump) p = Decode(offset, mybyte, Decode32Bits) # break up the mnemonic ma = re.match('(MOV) ([\S\s]+), ([x0-9a-fA-F]+)', p[0][2]) if ma is not None: instructs[offset] = [ma.group(1), ma.group(2), ma.group(3), p[0][1]] # mnemonic, size #log.debug( "MOV instructions detected: %x %s %d" % (offset,p[0][2],p[0][1]) ) f.seek(offset+1) mybyte = f.read(1) offset = offset + 1 f.close() return instructs def decodeBytes(self, instructs): """ Take in a dict of instructions - parse through each instruction and grab the strings """ #log = logging.getLogger('Mastiff.Plugins.' + self.name + '.decodeBytes') curString = "" curOffset = 0 strList = [] usedBytes = [] for off in sorted(instructs.keys()): if off not in usedBytes: # set up the new offset if needed if curOffset == 0: curOffset = off while off in instructs: usedBytes.append(off) hexVal = int(instructs[off][2], 16) opLen = instructs[off][3] # is hexVal out of range? if hexVal < 32 or hexVal > 126 and (hexVal != 10 or hexVal != 13 or hexVal != 9): # end of string #log.debug("%x non-string char - new string: %d: %s" % (curOffset, hexVal,curString)) strList.append([curOffset, curString]) curOffset = off + opLen curString = "" else: #add to string if not self.raw and hexVal == 10: # line feed curString = curString + "\\r" elif not self.raw and hexVal == 13: # return curString = curString + "\\n" elif not self.raw and hexVal == 9: # tab curString = curString + "\\t" else: curString = curString + chr(hexVal) off = off + opLen strList.append([curOffset, curString]) curOffset = 0 curString = "" usedBytes.append(off) return strList def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') self.length = config.get_var(self.name, 'length') if self.length is None: self.length = 3 self.raw = config.get_bvar(self.name, 'raw') # find the bytes in the file instructs = self.findMov(filename) # now lets get the strings strlist = self.decodeBytes(instructs) self.output_file(config.get_var('Dir','log_dir'), strlist) return True def output_file(self, outdir, strlist): """Print output from analysis to a file.""" log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output_file') # if the string is of the right len, print it outstr = "" for string in strlist: if len(string[1]) >= int(self.length): outstr = outstr + '0x%x: %s\n' % (string[0], string[1]) if len(outstr) > 0: try: outfile = open(outdir + os.sep + 'single-string.txt', 'w') except IOError, err: log.debug("Cannot open single-string.txt: %s" % err) return False outfile.write(outstr) outfile.close() else: log.debug('No single-byte strings found.') return True ================================================ FILE: mastiff/plugins/analysis/EXE/EXE-singlestring.yapsy-plugin ================================================ [Core] Name = Single-Byte Strings Module = EXE-singlestring [Documentation] Description = Extract single-byte strings. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/EXE/__init__.py ================================================ ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-fileinfo.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ File Info plugin Plugin Type: Generic Purpose: This plug-in obtains the file information, such as the name and file size and stores it into the database. Database: A new table named files will be added to the database. This table contains the following fields: id - Primary Key sid - The id # of the file in the mastiff table. filename - The filename, including path, of the file being analyzed. size - The file size in bytes. firstseen - GMT date of when it was first seen (in UNIX timestamp). lastseen - GMT date of when it was last seen (in UNIX timestamp). times - Number of times this file has been analyzed. Output: Data is only sent to the database. No files are created. """ __version__ = "$Id: bc5c3cee7ede3183312b586a2e800bddc31bca1e $" import os import time import logging import sqlite3 import mastiff.plugins.category.generic as gen import mastiff.sqlite as DB class GenFileInfo(gen.GenericCat): """File Information plugin code.""" def __init__(self): """Initialize the plugin.""" gen.GenericCat.__init__(self) self.page_data.meta['filename'] = 'file_info' def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') data = dict() data['filename'] = filename data['size'] = os.stat(filename).st_size data['time'] = time.time() data['hashes'] = config.get_var('Misc', 'hashes') self.gen_output(config, data) self.output_db(config, data) return self.page_data def gen_output(self, config, data): """ Add the output into the local page structure. """ info_table = self.page_data.addTable('File Information') info_table.addheader([('name', str), ('info', str)], printHeader=False) info_table.addrow(['File Name', data['filename']]) info_table.addrow(['Size', data['size']]) info_table.addrow(['Time Analyzed', data['time']]) hash_table = self.page_data.addTable('File Hashes') hash_table.addheader([('Algorithm', str), ('Hash', str)]) hash_table.addrow(['MD5', data['hashes'][0]]) hash_table.addrow(['SHA1', data['hashes'][1]]) hash_table.addrow(['SHA256', data['hashes'][2]]) def output_db(self, config, data): """Print output from analysis to a file.""" log = logging.getLogger('Mastiff.Plugins.' + self.name) db = DB.open_db_conf(config) if db is None: return False db.text_factory = str # If the 'files' table does now exist, add it if DB.check_table(db, 'files') == False: log.debug('Adding table files') fields = [ 'id INTEGER PRIMARY KEY', 'sid INTEGER', 'filename TEXT', 'size INTEGER', 'firstseen INTEGER', 'lastseen INTEGER', 'times INTEGER'] if DB.add_table(db, 'files', fields) is None: return False db.commit() cur = db.cursor() sqlid = DB.get_id(db, data['hashes']) if sqlid is None: log.error('%s hashes do not exist in the database', data['filename']) return False # see if the filename already exists in the db try: cur.execute('SELECT id, times FROM files WHERE filename=? AND sid=?', (data['filename'], sqlid, )) except sqlite3.Error, err: log.error('Could not query filename table: %s', err) return None results = cur.fetchone() if results is not None: # filename is already in there. just update the lastseen item log.debug('%s is already in the database for hashes. Updating times.', data['filename']) try: cur.execute('UPDATE files SET lastseen=?, times=? WHERE id=?', (int(data['time']), results[1]+1, results[0], )) db.commit() except sqlite3.OperationalError, err: log.error('Could not update times: %s', err) return False return True # file info is not in the database, add it try: cur.execute('INSERT INTO files (sid, filename, size, firstseen, lastseen, times) \ VALUES (?, ?, ?, ?, ?, ?)', (sqlid, data['filename'], data['size'], int(data['time']), int(data['time']), 1, )) db.commit() except sqlite3.Error, err: log.error('Could not insert filename into files: %s', err) return False return True ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-fileinfo.yapsy-plugin ================================================ [Core] Name = File Information Module = GEN-fileinfo [Documentation] Description = File Information Retrieval Plug-in Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-fuzzy.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Fuzzy Hashing plug-in Plugin Type: Generic Purpose: This plug-in generates the fuzzy hash of the given file. Also compares the fuzzy hashes against all of hashes already generated in the database. Requirements: - ssdeep (http://ssdeep.sourceforge.net/) - pydeep (https://github.com/kbandla/pydeep) Output: - fuzzy.txt - File listing the fuzzy hash of the file and any files that match. - The 'fuzzy' field will get added to the files table in the DB to store the fuzzy hash. """ __version__ = "$Id: 1e313a680096a1bea3ff4e5ed5f497a2ca29cd57 $" import logging try: import pydeep except ImportError, error: print 'Gen-fuzzy: Could not import pydeep: %s'.format(error) import mastiff.sqlite as DB import sqlite3 import mastiff.plugins.category.generic as gen class GenFuzzy(gen.GenericCat): """Fuzzy hashing plugin.""" def __init__(self): """Initialize the plugin.""" gen.GenericCat.__init__(self) self.page_data.meta['filename'] = 'fuzzy' # we will be adding to the file information hashes, so make sure it runs before us self.prereq = 'File Information' def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') log.info('Generating fuzzy hash.') try: my_fuzzy = pydeep.hash_file(filename) except pydeep.error, err: log.error('Could not generate fuzzy hash: %s', err) return False if self.output_db(config, my_fuzzy) is False: return False fuzz_results = list() if config.get_bvar(self.name, 'compare') is True: fuzz_results = self.compare_hashes(config, my_fuzzy) self.output_file(config, my_fuzzy, fuzz_results) return self.page_data def compare_hashes(self, config, my_fuzzy): """ Compare the current hash to all of the fuzzy hashes already collected. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.compare') db = DB.open_db_conf(config) conn = db.cursor() log.info('Comparing fuzzy hashes.') fuzz_results = list() my_md5 = config.get_var('Misc', 'hashes')[0] query = 'SELECT md5, fuzzy FROM mastiff WHERE fuzzy NOT NULL' try: # compare current hash for all fuzzy hashes for results in conn.execute(query): percent = pydeep.compare(my_fuzzy, results[1]) if percent > 0 and my_md5 != results[0]: fuzz_results.append([results[0], percent]) except sqlite3.OperationalError, err: log.error('Could not grab other fuzzy hashes: %s', err) return None except pydeep.error, err: log.error('pydeep error: %s', err) return None return fuzz_results def output_file(self, config, my_fuzzy, fuzz_results): """ Writes output to a file. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output_file') if self.results['Generic']['File Information'] is None: # File Information is not present, cannot continue log.error('Missing File Information plug-in output. Aborting.') return False # add fuzzy hashes to the hashes already generated if self.results['Generic']['File Information'] is not None: # adding a new data onto an existing table my_table = self.results['Generic']['File Information']['File Hashes'] my_table.addrow(['Fuzzy Hash', my_fuzzy]) fuzz_table = self.page_data.addTable('Similar Fuzzy Hashes') if fuzz_results is not None and len(fuzz_results) > 0: fuzz_table.addheader([('MD5', str), ('Percent', str)]) for (md5, percent) in fuzz_results: fuzz_table.addrow([md5, percent]) elif config.get_bvar(self.name, 'compare') is True: # This only gets printed if we actually compared fuzz_table.addheader([('Data', str)], printHeader=False) fuzz_table.addrow(['No other fuzzy hashes were related to this file.']) return True def output_db(self, config, my_fuzzy): """ Add fuzzy hash to the DB.""" log = logging.getLogger('Mastiff.Plugins.' + self.name + '.DB_output') # open up the DB and extend the mastiff table to include fuzzy hashes db = DB.open_db_conf(config) # there is a possibility the mastiff table is not available yet # check for that and add it if DB.check_table(db, 'files') == False: log.debug('Adding table "files"') fields = [ 'id INTEGER PRIMARY KEY', 'sid INTEGER', 'filename TEXT', 'size INTEGER', 'firstseen INTEGER', 'lastseen INTEGER', 'times INTEGER'] if DB.add_table(db, 'files', fields) is None: return False db.commit() if not DB.add_column(db, 'mastiff', 'fuzzy TEXT DEFAULT NULL'): log.error('Unable to add column.') return False conn = db.cursor() # update our hash sqlid = DB.get_id(db, config.get_var('Misc', 'Hashes')) query = 'UPDATE mastiff SET fuzzy=? WHERE id=?' try: conn.execute(query, (my_fuzzy, sqlid, )) db.commit() except sqlite3.OperationalError, err: log.error('Unable to add fuzzy hash: %s', err) return False db.close() return True ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-fuzzy.yapsy-plugin ================================================ [Core] Name = Fuzzy Hashing Module = GEN-fuzzy [Documentation] Description = Fuzzy Hashing Plug-in Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-hex.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Hex Dump plugin Plugin Type: Generic Purpose: This plug-in creates a hex view of the file being analyzed. Output: hexdump.txt - Contents of the file displayed as hex and ASCII characters. """ __version__ = "$Id: b5381b6505e0ffbd3d2a8beba9fabba187a9b1b2 $" import os import logging # Change the following line to import the category class you for the files # you wish to perform analysis on import mastiff.plugins.category.generic as gen # Change the class name and the base class class GEN_Hex(gen.GenericCat): """Hex Plug-in Code.""" def __init__(self): """Initialize the plugin.""" gen.GenericCat.__init__(self) def activate(self): """Activate the plugin.""" gen.GenericCat.activate(self) def deactivate(self): """Deactivate the plugin.""" gen.GenericCat.deactivate(self) def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # make sure we are enabled if config.get_bvar(self.name, 'enabled') is False: log.info('Disabled. Exiting.') return True try: in_file = open(filename, 'rb') except IOError, err: log.error('Unable to open file.') return False offset = 0 in_size = os.stat(filename).st_size out_string = '' while offset < in_size: try: chars = in_file.read(16) except IOError, err: log.error('Cannot read data from file: %s' % err) in_file.close() return False alpha_string = '' out_string = out_string + '%08x: ' % offset for byte in chars: out_string = out_string + "%02x " % (ord(byte)) alpha_string = alpha_string + self.is_ascii(byte) if len(chars) < 16: # we are at the end of the file - need to adjust so things line up out_string = out_string + ' '*((16-len(chars))*3) # add on the alpha version of the string out_string = out_string + ' |' + alpha_string + '|\n' offset += len(chars) in_file.close() return self.output_file(config.get_var('Dir','log_dir'), out_string) #return True def is_ascii(self, letter): """ Returns the letter if it is a printable ascii character, period otherwise. """ if 31 < ord(letter) < 127: return letter return '.' def output_file(self, outdir, data): """Print output from analysis to a file.""" log = logging.getLogger('Mastiff.Plugins.' + self.name) try: outfile = open(outdir + os.sep + 'hexdump.txt', 'w') outfile.write(data) outfile.close() except IOError, err: log.error('Could not open resources.txt: %s' % err) return False return True ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-hex.yapsy-plugin ================================================ [Core] Name = Hex Dump Module = GEN-hex [Documentation] Description = Creates a hex dump of the file. Author = Tyler Hudak Version = 0.1 Website = www.korelogic.com ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-mastiff-online.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ MASTIFF Online Submission Plug-in Plugin Type: Generic Purpose: This plug-in provides an interface to upload a file to MASTIFF Online. Output: None """ __version__ = "$Id: 80ab7046885b0c48bf287c08e87fcb08e78be0df $" import logging import mastiff.plugins as plugins import simplejson as json import os import sys # Change the following line to import the category class you for the files # you wish to perform analysis on import mastiff.plugins.category.generic as gen # Change the class name and the base class class GenMastiffOnline(gen.GenericCat): """MASTIFF Online plugin code.""" def __init__(self): """Initialize the plugin.""" gen.GenericCat.__init__(self) self.page_data.meta['filename'] = 'MASTIFF-online' def activate(self): """Activate the plugin.""" gen.GenericCat.activate(self) def deactivate(self): """Deactivate the plugin.""" gen.GenericCat.deactivate(self) def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # get terms of service acceptance tos = config.get_bvar(self.name, 'accept_terms_of_service') if tos is None or tos is False: log.info('Terms of service not accepted. Accept to enable MASTIFF Online submission.') return self.page_data myjson = None submit = config.get_bvar(self.name, 'submit') if submit is False: log.info('Not configured to send to MASTIFF Online.') return self.page_data # send data to MASTIFF Online server host = 'mastiff-online.korelogic.com' method = 'https' selector="/cgi/dispatcher.cgi/UploadMOSample" fields = [('accept_terms_of_service', 'true')] file_to_send = open(filename, "rb").read() files = [("upload", os.path.basename(filename), file_to_send)] log.debug('Sending sample to MASTIFF Online.') response = plugins.post_multipart(host, method, selector, fields, files) # what gets returned isn't technically JSON, so we have to manipulate it a little bit try: myjson = json.loads(response[60:-14].replace('\'','\"')) except json.scanner.JSONDecodeError, err: log.error('Error processing response: {}'.format(err)) except: e = sys.exc_info()[0] log.error('Error processing incoming response: {}.'.format(e)) if myjson is not None: self.gen_output(myjson) return self.page_data def gen_output(self, myjson): """Place the results into a Mastiff Output Page.""" log = logging.getLogger('Mastiff.Plugins.' + self.name) mytable = self.page_data.addTable('MASTIFF Online') mytable.addheader([('name', str), ('data', str)], printHeader=False) mytable.addrow(['Sample Uploaded On', myjson['sample_uploaded_on']]) if myjson['sample_state'] == 'todo': mytable.addrow(['Status', 'In queue']) elif myjson['sample_state'] == 'done': mytable.addrow(['Status', 'Completed']) else: mytable.addrow(['Status', myjson['sample_state']]) mytable.addrow(['URL', 'https://mastiff-online.korelogic.com/index.html?sample_hash_md5=' + myjson['sample_hash_md5']]) return True ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-mastiff-online.yapsy-plugin ================================================ [Core] Name = MASTIFF Online Module = GEN-mastiff-online [Documentation] Description = MASTIFF Online Submission Plug-in Author = Tyler Hudak Version = 0.1 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-metascan.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Metascan Online Submission plugin Plugin Type: Generic Purpose: This plug-in determines if the file being analyzed has been analyzed on www.metascan-online.com previously. Information on the Metascan Online API can be found at: https://www.metascan-online.com/en/public-api Requirements: - A Metascan Online API key is required to be entered into the configuration file. This can be obtained from www.metascan-online.com. - The simplejson module must be present. (https://github.com/simplejson/simplejson) Configuration Options: api_key: Your API key from metascan-online.com. Leave this blank to disable the plug-in. submit [on|off]: Whether you want to submit files to the site or not. Output: The results from Metascan Online retrieval or submission will be placed into metascan-online.txt. """ __version__ = "$Id: f8b6fe885be9b46a67dd7bc27e74c40d7a9eeff6 $" import logging import simplejson import urllib2 import os import socket import mastiff.plugins.category.generic as gen class GenMetascan(gen.GenericCat): """MetaScan Online plugin code.""" def __init__(self): """Initialize the plugin.""" self.api_key = None gen.GenericCat.__init__(self) def retrieve(self, sha256): """ Retrieve results for this hash from Metascan Online. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.retrieve') url = "https://hashlookup.metascan-online.com/v2/hash/" + sha256 headers = { 'apikey' : self.api_key} # set up request log.debug('Submitting request to Metascan Online.') try: req = urllib2.Request(url, headers=headers) response = urllib2.urlopen(req, timeout=30) except urllib2.HTTPError, err: log.error('Unable to contact URL: %s', err) return None except urllib2.URLError, err: log.error('Unable to open connection: %s', err) return None except socket.timeout, err: log.error('Timeout when contacting URL: %s', err) return None except: log.error('Unknown Error when opening connection.') return None json = response.read() try: response_dict = simplejson.loads(json) except simplejson.decoder.JSONDecodeError: log.error('Error in Metascan Online JSON response. Are you submitting too fast?') return None else: log.debug('Response received.') return response_dict def submit(self, config, filename): """ Submit a file to Metascan Online for analysis. Note: This function will likely fail if a proxy is used. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.submit') try: outdir = config.get_var('Dir', 'log_dir') mo_file = open(outdir + os.sep + 'metascan-online.txt', 'w') except IOError, err: log.error('Unable to open %s for writing: %s', outdir + 'metascan-online.txt', err) return False # make sure we are allowed to submit if config.get_bvar(self.name, 'submit') == False: log.info('Submission disabled. Not sending file.') mo_file.write('File does not exist on Metascan Online.\n') mo_file.write('Submission is disabled, not sending file.\n') mo_file.close() return False log.info('File had not been analyzed by Metascan Online.') log.info('Sending file to Metascan Online.') # send file to Metascan Online url = "https://scan.metascan-online.com/v2/file" headers = { 'apikey' : self.api_key, 'filename': os.path.basename(filename)} try: req = urllib2.Request(url, headers=headers) file_to_send = open(filename, "rb").read() response = urllib2.urlopen(req, data=file_to_send, timeout=30) json = simplejson.loads(response.read()) except urllib2.HTTPError, err: log.error('Unable to contact URL: %s', err) return None except urllib2.URLError, err: log.error('Unable to open connection: %s', err) return None except socket.timeout, err: log.error('Timeout when contacting URL: %s', err) return None except: log.error('Unknown Error when sending file.') return None # write to file mo_file.write('File uploaded and processing.\n') mo_file.write('Link: https://www.metascan-online.com/en/scanresult/file/%s\n' % json['data_id']) mo_file.close() return True def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') self.api_key = config.get_var(self.name, 'api_key') if self.api_key is None or len(self.api_key) == 0: log.error('No Metascan Online API Key - exiting.') return False sha256 = config.get_var('Misc', 'hashes')[2] response = self.retrieve(sha256) if response is None: # error occurred log.error('Did not get a response from Metascan Online. Exiting.') return False if sha256.upper() in response and response[sha256.upper()] == "Not Found": # The file has not been submitted self.submit(config, filename) else: # write response to file self.output_file(config.get_var('Dir', 'log_dir'), response) return True def output_file(self, outdir, response): """Format the output from Metascan Online into a file. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + 'output_file') try: mo_file = open(outdir + os.sep + 'metascan-online.txt', 'w') except IOError, err: log.error('Unable to open %s for writing: %s', outdir + 'metascan-online.txt', err) return False out_str = '' result_str = '' out_str += 'Metascan Online Results for %s\n' % response['file_info']['md5'] out_str += 'Last scan date: %s\n' % response['scan_results']['start_time'] foundAV = 0 if response['scan_results']['scan_all_result_i'] > 0: result_str += '{0:22} {1:24} {2:40}\n'.format('AV', 'Version', 'Results') for av_key in sorted(response['scan_results']['scan_details'].keys(), key=lambda s: s.lower()): # scan_result_i should be 1-9 (10 is engine updating) if 10 > response['scan_results']['scan_details'][av_key]['scan_result_i'] > 0 : threat_name = response['scan_results']['scan_details'][av_key]['threat_found'].encode('utf-8') if threat_name == u'': threat_name = u'Unknown Threat' result_str += '{0:22} {1:24} {2:40}\n'.format(av_key, \ response['scan_results']['scan_details'][av_key]['def_time'], \ threat_name) foundAV += 1 out_str += 'Total positive results: %d/%d\n' % (foundAV, response['scan_results']['total_avs']) out_str += 'Link to metascan-online.com:\nhttps://www.metascan-online.com/en/scanresult/file/%s\n\n' % response['data_id'] mo_file.write(out_str) mo_file.write(result_str) mo_file.close() return True ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-metascan.yapsy-plugin ================================================ [Core] Name = Metascan Online Module = GEN-metascan [Documentation] Description = MetaScan Online Submission Plug-in Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-strings.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Embedded Strings Extraction Plugin Plugin Type: Generic Purpose: Execute the 'strings' program and obtain embedded ASCII and UNICODE strings within the given filename. These will be returned in a dictionary where the key is the decimal offset of the string within the file and the value is a list of string type (U or A) and the string itself. Configuration Options: strcmd = Path to the strings binary DO NOT CHANGE THE FOLLOWING OPTIONS UNLESS YOU KNOW WHAT YOU ARE DOING. str_opts = Options to send to strings every time its called. This should be set to "-a -t d" (without quotes). str_uni = Options to send to strings to obtain UNICODE strings. This should be set to "-e l" (without quotes). Output: Output will be put into a file given a directory and the strings dictionary. """ __version__ = "$Id: 8970ce879282a3479538dd5d159f65ab4ad1092f $" import subprocess import re import logging import os import mastiff.plugins.category.generic as gen class GenStrings(gen.GenericCat): """Extract embedded strings.""" def __init__(self): """Initialize the plugin.""" gen.GenericCat.__init__(self) self.strings = {} self.page_data.meta['filename'] = 'strings' self.prereq = 'File Information' def _insert_strings(self, output, str_type): """Insert output from strings command into self.strings list.""" for line in output.split('\n'): m = re.match('\s*([0-9]+)\s+(.*)', line) if m is not None and m.group(2): self.strings[int(m.group(1))] = [str_type, m.group(2)] def analyze(self, config, filename): """ Run the strings command on the given filename and extract ASCII and UNICODE strings. The formatted output is stored in self.strings. """ # make sure we are activated if self.is_activated == False: return None log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # get my config options str_opts = config.get_section(self.name) if not str_opts['strcmd'] or \ not os.path.isfile(str_opts['strcmd']) or \ not os.access(str_opts['strcmd'], os.X_OK): log.error('%s is not accessible. Skipping.') return None if not str_opts['str_opts'] or not str_opts['str_uni_opts']: log.error('Strings options do not exist. Please check config. Exiting.') return None # obtain ASCII strings run = subprocess.Popen([str_opts['strcmd']] + \ str_opts['str_opts'].split() + [ filename ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Error running program: %s' % error) return False self._insert_strings(output,'A') # obtain Unicode strings run = subprocess.Popen([str_opts['strcmd']] + str_opts['str_opts'].split() + str_opts['str_uni_opts'].split() + [ filename ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Error running program: %s' % error) return False self._insert_strings(output,'U') #self.gen_output(config.get_var('Dir','log_dir')) self.gen_output() log.debug ('Successfully grabbed strings.') return self.page_data def gen_output(self): """Place the results into a Mastiff Output Page.""" log = logging.getLogger('Mastiff.Plugins.' + self.name) # self.page_data was previously initialized # add a table to it str_table = self.page_data.addTable('Embedded Strings') if len(self.strings) == 0: log.warn("No embedded strings detected.") str_table.addheader([('Message', str)], printHeader=False) str_table.addrow(['No embedded strings detected.' ]) return True str_table.addheader([('Offset', str), ('Type', str), ('String', str)]) for k in sorted(self.strings.iterkeys()): str_table.addrow([ '{:0x}'.format(k), self.strings[k][0], self.strings[k][1] ]) return True ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-strings.yapsy-plugin ================================================ [Core] Name = Embedded Strings Plugin Module = GEN-strings [Documentation] Description = Embedded Strings Plugin Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-virustotal.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ VirusTotal Submission plugin Plugin Type: Generic Purpose: This plug-in determines if the file being analyzed has been analyzed on www.virustotal.com previously. Information on the VT API can be found at: https://www.virustotal.com/documentation/public-api/ Requirements: - A VirusTotal API key is required to be entered into the configuration file. This can be obtained from virustotal.com. - The simplejson module must be present. (https://github.com/simplejson/simplejson) Configuration Options: api_key: Your API key from virustotal.com. Leave this blank to disable the plug-in. submit [on|off]: Whether you want to submit files to VT or not. Output: The results from VirusTotal retrieval or submission will be placed into virustotal.txt. Note: Unless special arrangements are made, VT will not let you send more than 4 queries in a 1 minute timeframe. You may receive errors if you do. """ __version__ = "$Id: 8603d09770a593e2a2f9c03f2fa34aa6f6440112 $" import logging import simplejson import urllib import urllib2 import os import socket import mastiff.plugins as plugins import mastiff.plugins.category.generic as gen class GenVT(gen.GenericCat): """VirusTotal plugin code.""" def __init__(self): """Initialize the plugin.""" self.api_key = None gen.GenericCat.__init__(self) def retrieve(self, md5): """ Retrieve results for this hash from VT. This code based on the code from the VT API documentation. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.retrieve') url = "https://www.virustotal.com/vtapi/v2/file/report" parameters = dict() parameters['apikey'] = self.api_key # set resource to the MD5 hash of the file parameters['resource'] = md5 # set up request log.debug('Submitting request to VT.') data = urllib.urlencode(parameters) try: req = urllib2.Request(url, data) response = urllib2.urlopen(req) except urllib2.HTTPError, err: log.error('Unable to contact URL: %s', err) return None except urllib2.URLError, err: log.error('Unable to open connection: %s', err) return None except: log.error('Unknown Error when opening connection.') return None json = response.read() try: response_dict = simplejson.loads(json) except simplejson.decoder.JSONDecodeError: log.error('Error in VT JSON response. Are you submitting too fast?') return None else: log.debug('Response received.') return response_dict def submit(self, config, filename): """ Submit a file to VT for analysis. This code based on the code from the VT API documentation. Note: This function will likely fail if a proxy is used. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.submit') try: outdir = config.get_var('Dir', 'log_dir') vt_file = open(outdir + os.sep + 'virustotal.txt', 'w') except IOError, err: log.error('Unable to open %s for writing: %s', outdir + 'virustotal.txt', err) return False # make sure we are allowed to submit if config.get_bvar(self.name, 'submit') == False: log.info('Submission disabled. Not sending file.') vt_file.write('File does not exist on VirusTotal.\n') vt_file.write('Submission is disabled, not sending file.\n') vt_file.close() return False log.info('Sending file to VirusTotal') # send file to VT host = "www.virustotal.com" method = 'https' selector = "/vtapi/v2/file/scan" fields = [("apikey", config.get_var(self.name, 'api_key'))] file_to_send = open(filename, "rb").read() files = [("file", os.path.basename(filename), file_to_send)] try: json = simplejson.loads(plugins.post_multipart(host, method, selector, fields, files)) except socket.error, err: log.error('Unable to send file: %s' % err) return False # check for success if json['response_code'] != 1: # error log.error('Could not submit to VT:\n%s', json['verbose_msg']) return False # write to file vt_file.write(json['verbose_msg'] + '\n') vt_file.write('Link:\n' + json['permalink'] + '\n') vt_file.close() return True def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') self.api_key = config.get_var(self.name, 'api_key') if self.api_key is None or len(self.api_key) == 0: log.error('No VirusTotal API Key - exiting.') return False md5 = config.get_var('Misc', 'hashes')[0] response = self.retrieve(md5) if response is None: # error occurred log.error('Did not get a response from VT. Exiting.') return False # response of 1 means it has been scanned on VT before # response of 0 means that is has not if response['response_code'] != 1: # The file has not been submitted self.submit(config, filename) else: # write response to file self.output_file(config.get_var('Dir', 'log_dir'), response) return True def output_file(self, outdir, response): """Format the output from VT into a file. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + 'output_file') try: vt_file = open(outdir + os.sep + 'virustotal.txt', 'w') except IOError, err: log.error('Unable to open %s for writing: %s', outdir + 'virustotal.txt', err) return False vt_file.write('VirusTotal Results for %s\n' % response['md5']) vt_file.write('Last scan date: %s\n' % response['scan_date']) vt_file.write('Total positive results: %d/%d\n' % \ (response['positives'], response['total'])) vt_file.write('Link to virustotal.com:\n%s\n\n' % response['permalink']) if response['positives'] > 0: vt_file.write('{0:25} {1:15} {2:40}\n'.format('AV', 'Version', 'Results')) for av_key in sorted(response['scans'].keys(), key=lambda s: s.lower()): if response['scans'][av_key]['detected'] == True: out_str = '{0:25} {1:15} {2:40}\n' out_str = out_str.format(av_key, \ response['scans'][av_key]['version'], \ response['scans'][av_key]['result']) vt_file.write(out_str) vt_file.close() return True ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-virustotal.yapsy-plugin ================================================ [Core] Name = VirusTotal Module = GEN-virustotal [Documentation] Description = VirusTotal.com Submission Plug-in Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-yara.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Yara Plugin Plugin Type: Generic Purpose: This plug-in allows the use of Yara plug-ins to be run on the file being analyzed. Yara rules are specified through a configuration option and all rules will be applied to the file. Requirements: - Yara, libyara and yara-python must be installed. http://code.google.com/p/yara-project Configuration Options: [yara] yara_sigs = Base path to Yara signatures. This path will be recursed to find additional signatures. Files with ".yar" or ".yara" will be used. Leave blank to disable the plug-in. Output: yara.txt - Output listing all matches found. This file will not be present if no matches were found. Database: A new table named 'yara' will be created with the following fields: id INTEGER PRIMARY KEY = Primary key sid INTEGER DEFAULT NULL = ID of file being analyzed rule_name TEXT DEFAULT NULL = Name of the Yara rule matched meta TEXT DEFAULT NULL = Yara meta information tag TEXT DEFAULT NULL = Yara tag information rule_file TEXT DEFAULT NULL = Full path to rule file match is from file_offset INTEGER DEFAULT NULL = Offset in analyzed file match was found string_id TEXT DEFAULT NULL = ID of match variable from Yara rule data TEXT DEFAULT NULL = Data Yara rule matched on Only new information will be added to the database. The database is _NOT_ checked to see if old information is present. NOTE: Since the Yara output can contain data that is in binary, any potential binary data is converted to hex. Within the string, the binary data will be represented as "backslash-xXX" with the XX being the hex equivalent. Please ensure all of your rules work in Yara before using them in mas.py. """ __version__ = "$Id: 0f0233e8220e4ca4a6677253006de25ecdb365f6 $" import logging import os import sqlite3 try: import yara except ImportError, error: print "GenYara: Could not import yara: %s" % error import mastiff.sqlite as DB import mastiff.plugins.category.generic as gen import mastiff.plugins as plugins class GenYara(gen.GenericCat): """Yara signature plug-in.""" def __init__(self): """Initialize the plugin.""" gen.GenericCat.__init__(self) self.filename = "" def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') self.filename = filename # get my config options plug_opts = config.get_section(self.name) if plug_opts is None: log.error('Could not get %s options.', self.name) return False elif len(plug_opts['yara_sigs']) == 0: log.debug('No yara_sigs parameter. Disabling plug-in.') return False # find all yara signature files sig_files = self.get_sigs(plug_opts['yara_sigs']) if sig_files is None or len(sig_files) == 0: log.debug('No signature files detected. Exiting plug-in.') return True # create sig dict of all files found. # namespace is the file name of the rule sig_dict = dict() for files in sig_files: sig_dict[files] = files # compile rules and run against file try: rules = yara.compile(filepaths=sig_dict) except yara.SyntaxError, err: log.error('Rule error: %s', err) return False # generate matches try: matches = rules.match(self.filename, callback=self._debug_print) except yara.Error, err: log.error('Yara error: %s', err) return False if len(matches) > 0: self.output_file(config.get_var('Dir','log_dir'), matches) self.output_db(config, matches) return True def get_sigs(self, sig_dir): """ Recurse through a directory for Yara signature files. Files should end in ".yar" or "yara". Returns a list of signature files, None on errors. """ # sanity check the path log = logging.getLogger('Mastiff.Plugins.' + self.name + '.get_sigs') if not os.path.isdir(os.path.expanduser(sig_dir)) \ or not os.path.exists(os.path.expanduser(sig_dir)): log.error('%s is not a directory or does not exist.' % sig_dir) return None sig_files = list() # walk the directory for items in os.walk(os.path.expanduser(sig_dir)): # find each yara sig file in the dir for files in items[2]: if files.endswith('.yar') or \ files.endswith('.yara'): sig_files.append(items[0] + os.sep + files) return sig_files def _debug_print(self, data): """ Debug printing of Yara matches.""" log = logging.getLogger('Mastiff.Plugins.' + self.name + '.match') if data['matches'] == True: for match in data['strings']: log.debug('Match: %s: %s' % (data['rule'], plugins.bin2hex(match[2]))) return yara.CALLBACK_CONTINUE def output_file(self, outdir, matches): """Prints any Yara matches to a file named yara.txt.""" out_file = open(outdir + os.sep + 'yara.txt', 'w') if len(matches) == 0: out_file.write('No Yara matches.') else: out_file.write('Yara Matches for %s\n' % self.filename) for item in matches: out_file.write('\nRule Name: %s\n' % item.rule) out_file.write('Yara Meta: %s\n' % item.meta) out_file.write('Yara Tags: %s\n' % item.tags) out_file.write('Rule File: %s\n' % item.namespace) out_file.write('Match Info:\n') for y_match in item.strings: out_file.write('\tFile Offset: %d\n' % y_match[0]) out_file.write('\tString ID: %s\n' % y_match[1]) out_file.write('\tData: %s\n\n' % plugins.bin2hex(y_match[2])) out_file.write('*'*79 + '\n') out_file.close() return True def output_db(self, config, matches): """ Output any matches to the database. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output_db') db = DB.open_db_conf(config) if db is None: return False # add the table 'yara' if it doesn't exist if DB.check_table(db, 'yara') == False: fields = ['id INTEGER PRIMARY KEY', 'sid INTEGER DEFAULT NULL', 'rule_name TEXT DEFAULT NULL', 'meta TEXT DEFAULT NULL', 'tag TEXT DEFAULT NULL', 'rule_file TEXT DEFAULT NULL', 'file_offset INTEGER DEFAULT NULL', 'string_id TEXT DEFAULT NULL', 'data TEXT DEFAULT NULL' ] if not DB.add_table(db, 'yara', fields ): log.error('Unable to add "yara" database table.') return False sqlid = DB.get_id(db, config.get_var('Misc', 'hashes')) sel_query = 'SELECT count(*) FROM yara ' sel_query += 'WHERE sid=? AND rule_name=? AND meta=? AND tag=? AND ' sel_query += 'rule_file=? AND file_offset=? AND string_id=? AND data=? ' query = 'INSERT INTO yara ' query += '(sid, rule_name, meta, tag, rule_file, file_offset, string_id, data) ' query += 'VALUES (?, ?, ?, ?, ?, ?, ?, ?)' cur = db.cursor() # go through all matches and insert into DB if needed try: for item in matches: for y_match in item.strings: match_insert = ( sqlid, item.rule, str(item.meta), \ str(item.tags), item.namespace, \ y_match[0], y_match[1], plugins.bin2hex(y_match[2]), ) # check to see if its already in there cur.execute(sel_query, match_insert) if cur.fetchone()[0] == 0: # not in the db already, add it in log.debug('Adding %s match to database.' % (item.rule)) cur.execute(query, match_insert) db.commit() except sqlite3.Error, err: log.error('SQL error when adding item to DB: %s' % err) return False db.close() return True ================================================ FILE: mastiff/plugins/analysis/GEN/GEN-yara.yapsy-plugin ================================================ [Core] Name = yara Module = GEN-yara [Documentation] Description = Yara Signature Plug-in Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/GEN/__init__.py ================================================ ================================================ FILE: mastiff/plugins/analysis/Office/Office-metadata.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Office MetaData Plug-in Plugin Type: PDF Purpose: Extracts any metadata from an Office document using exiftool (http://www.sno.phy.queensu.ca/~phil/exiftool/). Output: metadata.txt - Contains selected pieces of metadata. Requirements: The exiftool binary is required for this plug-in. The binary can be downloaded from http://www.sno.phy.queensu.ca/~phil/exiftool/. Configuration Options: [Office Metadata] exiftool = Path to exiftool program """ __version__ = "$Id: 036849ac813bffb3d941d7ec24f8911f0a5f7da0 $" import subprocess import logging import os import mastiff.plugins.category.office as office class OfficeMetadata(office.OfficeCat): """Office Metadata plug-in.""" def __init__(self): """Initialize the plugin.""" office.OfficeCat.__init__(self) self.page_data.meta['filename'] = 'office-metadata' def analyze(self, config, filename): """ Obtain the command and options from the config file and call the external program. """ # make sure we are activated if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # get my config options plug_opts = config.get_section(self.name) if plug_opts is None: log.error('Could not get %s options.', self.name) return False # verify external program exists and we can call it if not plug_opts['exiftool'] or \ not os.path.isfile(plug_opts['exiftool']) or \ not os.access(plug_opts['exiftool'], os.X_OK): log.error('%s is not accessible. Skipping.', plug_opts['exiftool']) return False # run your external program here run = subprocess.Popen([plug_opts['exiftool']] + \ [ filename ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Error running program: {}'.format(error)) return False metadata = dict() keywords = [ 'Author', 'Code Page', 'Comments', 'Company', 'Create Date', 'Current User', 'Error', 'File Modification Date/Time', 'File Type', 'Internal Version Number', 'Keywords', 'Last Modified By', 'Last Printed', 'MIME Type', 'Modify Date', 'Security', 'Software', 'Subject', 'Tag PID GUID', 'Template', 'Title', 'Title Of Parts', 'Total Edit Time', 'Warning'] # set up output table new_table = self.page_data.addTable(title='Office Document Metadata') # grab only data we are interested in for line in output.split('\n'): if line.split(' :')[0].rstrip() in keywords: metadata[line.split(':')[0].rstrip()] = line.split(' :')[1].rstrip().lstrip(' ') if len(metadata) == 0: # no data log.warn("No PDF metadata detected.") new_table.addheader([('Message', str)], printHeader=False) new_table.addrow(['No Office metadata detected.' ]) else: # set up output table new_table.addheader([('Data', str), ('Value', str)]) # sort and add to table for key in sorted(metadata.iterkeys()): new_table.addrow([key, metadata[key]]) log.debug ('Successfully ran %s.', self.name) return self.page_data ================================================ FILE: mastiff/plugins/analysis/Office/Office-metadata.yapsy-plugin ================================================ [Core] Name = Office Metadata Module = Office-metadata [Documentation] Description = Extract Office metadata from document. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/Office/Office-pyOLEScanner.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ pyOLEScanner.py Plug-in Plugin Type: Office Purpose: This plugin runs Giuseppe 'Evilcry' Bonfa's pyOLEScanner.py script. pyOLEScanner.py examines an Office document and looks for specific instances of malicious code. Pre-requisites: - pyOLEScanner.py must be downloaded. It can be found at: https://github.com/Evilcry/PythonScripts/raw/master/pyOLEScanner.zip Output: office-analysis.txt - File containing output from scan. deflated_doc/ - If Office document is an Office 2007 or later document, it will be deflated and extracted into this directory. Configuration Options: [Office Metadata] exiftool = Path to exiftool program NOTE: - An Error such as "('An Error Occurred:', 'no such table: BWList')" in the output file is normal and can be ignored. - For OfficeX files, an error: Starting Deflate Procedure An error occurred during deflating may occur when the script is unable to unzip the archive. """ __version__ = "$Id: 4cff51f78ebe3e9404a8c73b1a0512383d600e1d $" import subprocess import logging import os import sys import mastiff.plugins.category.office as office class OfficepyOLEScanner(office.OfficeCat): """ Wrapper for Giuseppe 'Evilcry' Bonfa's pyOLEScanner.py office analysis plug-in. """ def __init__(self): """Initialize the plugin.""" office.OfficeCat.__init__(self) def analyze(self, config, filename): """ Obtain the command and options from the config file and call the external program. """ # make sure we are activated if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # get my config options plug_opts = config.get_section(self.name) if plug_opts is None: log.error('Could not get %s options.', self.name) return False # verify external program exists and we can call it if not plug_opts['olecmd'] or \ not os.path.isfile(plug_opts['olecmd']) or \ not os.access(plug_opts['olecmd'], os.X_OK): log.error('%s is not accessible. Skipping.', plug_opts['olecmd']) return False # we need to change dir to log_dir as pyOLEScanner.py places files in # the directory we run in my_dir = os.getcwd() if os.path.isabs(filename) is False: # we need to update the filename to point to the right file filename = my_dir + os.sep + filename os.chdir(config.get_var('Dir','log_dir')) run = subprocess.Popen([sys.executable] + [plug_opts['olecmd']] + \ [ filename ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Error running program: %s' % error) os.chdir(my_dir) return False # ole2.sqlite is created by pyOLEScanner.py, but is not usable to us # so lets delete it try: if os.path.isfile('ole2.sqlite'): os.remove('ole2.sqlite') log.debug('Deleted ole2.sqlite.') except OSError, err: log.error('Unable to delete ole2.sqlite: %s', err) # change directories back os.chdir(my_dir) self.output_file(config.get_var('Dir','log_dir'), output) log.debug ('Successfully ran %s.', self.name) return True def output_file(self, outdir, data): """Place the data into a file.""" log = logging.getLogger('Mastiff.Plugins.' + self.name) try: out_file = open(outdir + os.sep + "office-analysis.txt",'w') except IOError, err: log.error('Write error: %s', err) return False out_file.write(data) out_file.close() return True ================================================ FILE: mastiff/plugins/analysis/Office/Office-pyOLEScanner.yapsy-plugin ================================================ [Core] Name = Office pyOLEScanner Module = Office-pyOLEScanner [Documentation] Description = pyOLEScanner plug-in based on Giuseppe 'Evilcry' Bonfa's code. Author = Tyler Hudak/Giuseppe 'Evilcry' Bonfa Version = 1.0 Website = www.korelogic.com / https://github.com/Evilcry/PythonScripts/raw/master/pyOLEScanner.zip License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/Office/__init__.py ================================================ ================================================ FILE: mastiff/plugins/analysis/PDF/PDF-metadata.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ PDF MetaData Plug-in Plugin Type: PDF Purpose: Extracts any metadata from a PDF using exiftool (http://www.sno.phy.queensu.ca/~phil/exiftool/) Output: metadata.txt - Contains selected pieces of extracted metadata. Requirements: The exiftool binary is required for this plug-in. The binary can be downloaded from http://www.sno.phy.queensu.ca/~phil/exiftool/. TODO: Exiftool will miss some metadata, especially if the Info object is present but not specified. Future versions of this plug-in will brute force the metadata, but PDF-parsing code needs to be written (or import pdf-parser.py). Configuration Options: [PDF Metadata] exiftool = Path to exiftool program """ __version__ = "$Id: 0ba78966f263ce6cb3ec0447e392d8c544baa55f $" import subprocess import logging import os import mastiff.plugins.category.pdf as pdf class PDFMetadata(pdf.PDFCat): """PDF Metadata plug-in.""" def __init__(self): """Initialize the plugin.""" pdf.PDFCat.__init__(self) self.page_data.meta['filename'] = 'pdf-metadata' def analyze(self, config, filename): """ Obtain the command and options from the config file and call the external program. """ # make sure we are activated if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # get my config options plug_opts = config.get_section(self.name) if plug_opts is None: log.error('Could not get %s options.', self.name) return False # verify external program exists and we can call it if not plug_opts['exiftool'] or \ not os.path.isfile(plug_opts['exiftool']) or \ not os.access(plug_opts['exiftool'], os.X_OK): log.error('%s is not accessible. Skipping.', plug_opts['exiftool']) return False # run your external program here run = subprocess.Popen([plug_opts['exiftool']] + \ [ filename ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Error running program: {}'.format(error)) return False metadata = dict() keywords = [ 'Creator', 'Create Date', 'Title', 'Author', 'Producer', 'Modify Date', 'Creation Date', 'Mod Date', 'Subject', 'Keywords', 'Author', 'Metadata Date', 'Description', 'Creator Tool', 'Document ID', 'Instance ID', 'Warning'] # grab only data we are interested in for line in output.split('\n'): if line.split(' :')[0].rstrip() in keywords: metadata[line.split(':')[0].rstrip()] = line.split(' :')[1].rstrip() new_table = self.page_data.addTable(title='PDF Document Metadata') if len(metadata) == 0: # no data log.warn("No PDF metadata detected.") new_table.addheader([('Message', str)], printHeader=False) new_table.addrow(['No PDF metadata detected.' ]) else: # set up output table new_table.addheader([('Data', str), ('Value', str)]) # sort and add to table for key in sorted(metadata.iterkeys()): new_table.addrow([key, metadata[key]]) log.debug ('Successfully ran %s.', self.name) return self.page_data ================================================ FILE: mastiff/plugins/analysis/PDF/PDF-metadata.yapsy-plugin ================================================ [Core] Name = PDF Metadata Module = PDF-metadata [Documentation] Description = Extract PDF metadata from document. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/PDF/PDF-pdfid.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ pdfid.py Plug-in Plugin Type: PDF Purpose: Run Didier Stevens' pdfid.py script against a PDF and place the results into a file. Output: pdfid.txt - Output of pdfid.py. Requirements: The pdfid.py script must be installed. Configuration Options: [pdfid] pdfid_cmd - Path to the pdfid.py script. Must be executable. pdfid_opts - Options to give to the script. Can be empty. """ __version__ = "$Id: a83e6c90f42bdd7ada3f1393dc749b5b61668c4e $" import subprocess import logging import os import sys import mastiff.plugins.category.pdf as pdf class PDFid(pdf.PDFCat): """Run Didier Stevens pdfid.py""" def __init__(self): """Initialize the plugin.""" pdf.PDFCat.__init__(self) self.page_data.meta['filename'] = 'pdf-id' def analyze(self, config, filename): """ Obtain the command and options from the config file and call the external program. """ # make sure we are activated if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # get my config options plug_opts = config.get_section(self.name) if plug_opts is None: log.error('Could not get %s options.', self.name) return False # verify external program exists and we can call it if not plug_opts['pdfid_cmd'] or \ not os.path.isfile(plug_opts['pdfid_cmd']) or \ not os.access(plug_opts['pdfid_cmd'], os.X_OK): log.error('%s is not accessible. Skipping.', plug_opts['pdfid_cmd']) return False elif len(plug_opts['pdfid_cmd']) == 0: log.debug('Plug-in disabled.') return False # options cannot be empty - at least have a blank option if 'pdfid_opts' not in plug_opts: plug_opts['pdfid_opts'] = '' elif len(plug_opts['pdfid_opts']) == 0: plug_opts['pdfid_opts'] = '' else: plug_opts['pdfid_opts'] = plug_opts['pdfid_opts'].split() # run pdfid.py here try: run = subprocess.Popen([plug_opts['pdfid_cmd']] + \ list(plug_opts['pdfid_opts']) + \ [ filename ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() except: log.error('Error executing pdfid.py: {}'.format(sys.exc_info()[0])) return False if error is not None and len(error) > 0: log.error('Error running program: {}'.format(error)) return False # parse through output if 'PDF Header' in output.split('\n')[1]: # By default, pdfid.py displays the PDF header as the first. This is different enough from the # other data extracted it should be in its own table. header_table = self.page_data.addTable(title='PDF Header') header_table.addheader([('Name', str), ('Value', str)], printHeader=False) header_table.addrow(output.split('\n')[1].lstrip().split(': ')) # grab the rest of the data if 'PDF Header' in output.split('\n')[1]: pdf_objects = [ x.lstrip().split() for x in output.split('\n')[2:] ] else: pdf_objects = [ x.lstrip().split() for x in output.split('\n')[1:] ] new_table = self.page_data.addTable(title='PDF Objects') new_table.addheader([('Object___Name', str), ('Count', int)]) [ new_table.addrow([my_obj[0], my_obj[1]]) for my_obj in pdf_objects if my_obj ] log.debug ('Successfully ran %s.', self.name) return self.page_data ================================================ FILE: mastiff/plugins/analysis/PDF/PDF-pdfid.yapsy-plugin ================================================ [Core] Name = pdfid Module = PDF-pdfid [Documentation] Description = Run Didier Stevens' pdfid.py script Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/PDF/PDF-pdfparser.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ PDF-pdfparser Plugin Type: PDF Purpose: This plug-in uses Didier Stevens pdf-parser.py code to perform two tasks: - Writes an uncompressed copy of the PDF to a file named uncompressed-pdf.txt - Searches the PDF for keywords in objects, specified by the self.interesting_objects list, and writes those objects, and any they reference, to a file in pdf-objects/. All rights for pdf-parser.py belong to Didier Stevens. Requirements: - Didier Stevens pdf-parser.py must be installed. (http://blog.didierstevens.com/programs/pdf-tools/) Configuration Options: [pdf-parser] pdf_cmd = Path to pdf-parser.py """ __version__ = "$Id: e784c089c5df767e0b92109f46fd67ec540973a3 $" import os import subprocess import logging import re import mastiff.queue as queue import mastiff.plugins.category.pdf as pdf class PDFparser(pdf.PDFCat): """Plug-in to run Didier Stevens pdf-parser.py script.""" def __init__(self): """Initialize the plugin.""" pdf.PDFCat.__init__(self) # list of objects we want to search for self.interesting_objects = [ 'JavaScript', 'JS', 'OpenAction', 'AA' ] def analyze(self, config, filename): """ Obtain the command and options from the config file and call the external program. """ # make sure we are activated if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # get my config options plug_opts = config.get_section(self.name) if plug_opts is None: log.error('Could not get %s options.', self.name) return False # verify external program exists and we can call it if not plug_opts['pdf_cmd'] or \ not os.path.isfile(plug_opts['pdf_cmd']) or \ not os.access(plug_opts['pdf_cmd'], os.X_OK): log.error('%s is not accessible. Skipping.', plug_opts['pdf_cmd']) return False self.uncompress(config, plug_opts, filename) self.get_objects(config, plug_opts, filename) log.debug ('Successfully ran %s.', self.name) return True def output_object(self, plug_opts, pdf_file, obj_num, reasons, log_dir): """ Run pdf-parser to extract a given obj_num and place it into the log_dir directory, in the form obj-#.txt. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.outobj') # create the dir if it doesn't exist log_dir = log_dir + os.sep + 'pdf-objects' if not os.path.exists(log_dir): try: os.makedirs(log_dir) except IOError, err: log.error('Unable to create dir %s: %s' % (log_dir, err)) return False # if we get the obj_num in the form "12 0", remove the gen # if ' ' in obj_num: # contains whitespace obj_num = obj_num.split(' ')[0] filename = log_dir + os.sep + 'obj-' + obj_num + '.txt' # have pdf-parser extract the object for us options = list(['-o ' + obj_num, '-f', '-w']) run = subprocess.Popen([plug_opts['pdf_cmd']] + \ options + \ [ pdf_file ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Unable to extract object %s.' % obj_num) return False # output the file - we don't use the pdf-parser.py -d option as # there are times it errors out when attempting to dump an object with open(filename, 'w') as out_file: out_file.write('Object %s\n' % obj_num) out_file.write('Flagged due to:\n') for why in reasons: out_file.write('\t%s\n' % why) out_file.write('\n') out_file.write(output) return True def get_objects(self, config, plug_opts, filename): """ Search through the PDF for objects associated with malicious activity and extract those into their own file. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.get_objects') log.info('Extracting interesting objects.') #objects = list() objects = dict() for keyword in self.interesting_objects: # let pdf-parser.py grab the object containing our keywords run = subprocess.Popen([plug_opts['pdf_cmd']] + \ ['--search=' + keyword ] + [ filename ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() # skip anything that gives us an error if error is not None and len(error) > 0: continue # go through pdf-parser output and grab any objects and # their referenced objects to dump for line in output.split('\n'): obj_match = re.match('obj\s+([0-9]+\s+[0-9]+)', line) ref_match = re.search('Referencing: ([0-9]+\s+[0-9\s,R]+)', line) if obj_match is not None: # obj # # cur_obj = obj_match.group(1) if cur_obj not in objects.keys(): objects[cur_obj] = list() objects[cur_obj].extend(['Keyword: %s' % keyword ]) log.debug('Adding object %s for keyword %s' % (cur_obj, keyword)) elif ref_match is not None: # Referenced by: object list for ref_obj in \ [ x.lstrip()[:-2] for x in ref_match.group(1).split(',')]: if ref_obj not in objects.keys(): # item not created yet objects[ref_obj] = list() if 'Referenced by %s' % cur_obj not in objects[ref_obj]: # make sure we didn't add already objects[ref_obj].extend(['Referenced by %s' % cur_obj ]) log.debug('Adding object %s from reference "%s"' % (ref_obj, cur_obj)) # output collected objects to file for my_obj in objects.keys(): self.output_object(plug_opts, filename, my_obj, objects[my_obj], config.get_var('Dir', 'log_dir')) def uncompress(self, config, plug_opts, filename): """ Uncompress the PDF using pdf-parser.py """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.uncompress') log.info('Uncompressing PDF.') feedback = config.get_bvar(self.name, 'feedback') if feedback is True: job_queue = queue.MastiffQueue(config.config_file) else: job_queue = None # run pdf-parser with -w (raw) and -f (decompress) opts run = subprocess.Popen([plug_opts['pdf_cmd']] + \ ['-w', '-f' ] + [ filename ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Unable to uncompress PDF: %s.' % filename) return False self.output_file(config.get_var('Dir', 'log_dir'), output) if job_queue is not None and feedback is True and not filename.endswith('uncompressed-pdf.txt'): log.info('%s' % filename) log.info('Adding uncompressed PDF to queue.') job_queue.append(config.get_var('Dir', 'log_dir') + os.sep + "uncompressed-pdf.txt") def output_file(self, outdir, data): """Place the data into a file.""" log = logging.getLogger('Mastiff.Plugins.' + self.name) try: out_file = open(outdir + os.sep + "uncompressed-pdf.txt",'w') except IOError, err: log.error('Write error: %s', err) return False out_file.write(data) out_file.close() return True ================================================ FILE: mastiff/plugins/analysis/PDF/PDF-pdfparser.yapsy-plugin ================================================ [Core] Name = pdf-parser Module = PDF-pdfparser [Documentation] Description = Use Didier Stevens pdf-parser.py to uncompress PDF and find interesting objects. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/PDF/__init__.py ================================================ ================================================ FILE: mastiff/plugins/analysis/ZIP/ZIP-extract.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Zip archive extract plug-in. Plugin Type: ZIP Purpose: Extract all of the files within the archive into a directory. If the filename contains an absolute path or '..'s, they are removed before extraction occurs. Configuration Options: enabled = [on|off]: Whether you want to submit files to VT or not. Output: Extracts all of the files in the archive to log_dir/zip_contents. """ __version__ = "$Id: ed40be29fdba1a1b71bcb47d5c5933a737f2a4b2 $" import logging import os import zipfile import struct import mastiff.plugins.category.zip as zip import mastiff.queue as queue class ZIP_Extract(zip.ZipCat): """Zip archive extraction plug-in.""" def __init__(self): """Initialize the plugin.""" zip.ZipCat.__init__(self) def activate(self): """Activate the plugin.""" zip.ZipCat.activate(self) def deactivate(self): """Deactivate the plugin.""" zip.ZipCat.deactivate(self) def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') feedback = config.get_bvar(self.name, 'feedback') if feedback is True: job_queue = queue.MastiffQueue(config.config_file) else: job_queue = None # make sure we are enabled if config.get_bvar(self.name, 'enabled') is False: log.info('Disabled. Exiting.') return True try: my_zip = zipfile.ZipFile(filename, 'r', allowZip64=True) except (zipfile.BadZipfile, IOError, struct.error), err: log.error('Unable to open zip file: {}'.format(err)) return False log_dir = config.get_var('Dir', 'log_dir') log_dir += os.sep + 'zip_contents' try: os.mkdir(log_dir) except OSError, err: # dir already exists, skip pass # grab password if one exists pwd = config.get_var(self.name, 'password') if pwd is not None and len(pwd) > 0: log.info('Password \"{}\" will be used for this zip.'.format(pwd)) # cycle through files and extract them for file_member in my_zip.namelist(): # if its an absolute directory, remove os.sep if file_member[0:1] == os.sep: log.info('Zip member \"{}\" contains absolute path. Stripping.'.format(file_member)) zipfile_name = os.path.normpath(file_member[1:]) try: zipfile_name = unicode(os.path.normpath(file_member)) except UnicodeDecodeError: zipfile_name = unicode(os.path.normpath(file_member), errors='replace') # warn about the ..'s, normpath above removes them if os.pardir in file_member: log.warning('File contains ..s: {}'.format(file_member)) # we can't just blindly extract in case there are absolute paths or '..'s # so we read in the file, create any directories, and write it out try: log.debug(u'Creating directory {}.'.format(os.path.dirname(zipfile_name))) os.makedirs(log_dir + os.sep + os.path.dirname(zipfile_name)) except OSError, err: log.debug(u'Directory {} already exists.'.format(os.path.dirname(zipfile_name))) if len(os.path.basename(file_member)) == 0: try: log.debug('{} is just a directory. Not creating file.'.format(file_member)) except UnicodeEncodeError: log.debug('{} is just a directory. Not creating file.'.format(file_member.encode('utf-8'))) continue log.info(u'Extracting {}.'.format(zipfile_name)) try: in_file = my_zip.open(file_member, 'r', pwd=pwd) data = in_file.read() in_file.close() except RuntimeError, err: log.error('Problem extracting: {}'.format(err.message.encode('utf-8'))) continue except (IOError, zipfile.BadZipfile) as err: log.error('Problem extracting {}.'.format(file_member)) log.error('Possible obfuscation or corruption: {}'.format(err.message)) continue try: outfile = open(log_dir + os.sep + zipfile_name, 'w') outfile.write(data) outfile.close() except IOError, err: log.error('Could not write file: {}'.format(err)) return False # now feed back to mastiff if asked to if job_queue is not None and feedback is True: log.info('Adding {} to queue.'.format(zipfile_name.encode('utf-8'))) job_queue.append(log_dir + os.sep + zipfile_name) my_zip.close() return True ================================================ FILE: mastiff/plugins/analysis/ZIP/ZIP-extract.yapsy-plugin ================================================ [Core] Name = ZipExtract Module = ZIP-extract [Documentation] Description = Extract zip archive contents. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/ZIP/ZIP-zipinfo.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Zipinfo Analysis Plug-in Plugin Type: ZIP Purpose: This plug-in extracts metadata information stored within a zip archive for the analysis. Alot of information was taken from http://www.pkware.com/documents/casestudies/APPNOTE.TXT. TO DO: - Decode external attributes. - Decode extra data. Output: zipinfo.txt - File containing all of the metadata. """ __version__ = "$Id: eabccb2f29d8d5bd52fc2fb77e8e180ed3a4e875 $" import os import logging import zipfile import codecs import struct import mastiff.plugins.category.zip as zip class ZIP_Info(zip.ZipCat): """Class to extract zip metadata and place it into a file.""" def __init__(self): """Initialize the plugin.""" zip.ZipCat.__init__(self) self.page_data.meta['filename'] = 'zipinfo' def activate(self): """Activate the plugin.""" zip.ZipCat.activate(self) def deactivate(self): """Deactivate the plugin.""" zip.ZipCat.deactivate(self) def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # grab the info out of the file try: my_zip = zipfile.ZipFile(filename, 'r') info_list = my_zip.infolist() except (zipfile.BadZipfile, IOError, struct.error), err: log.error('Unable to open or process zip file: {}'.format(err)) return False info_table = self.page_data.addTable(title='Zip Archive Information') info_table.addheader([('Data', str), ('Value', str)], printHeader=False) info_table.addrow(['File Name', os.path.basename(filename) ]) if my_zip.comment is None or len(my_zip.comment) == 0: info_table.addrow(['Comment', 'This file has no comment.']) else: # ignore any unprintable unicode characters info_table.addrow(['Comment', unicode("%s" % (my_zip.comment), errors='ignore')]) if len(my_zip.filelist) > 0: self.quick_info(info_list) self.full_info(info_list) else: info_table.addrow(['Warning', 'Zip archive has no files.']) my_zip.close() return self.page_data def quick_info(self, info_list): """ Obtain quick directory listing of the archive with some information.""" quick_table = self.page_data.addTable('Quick Info') quick_table.addheader([('Modification___Date', str), ('File___Size', int), ('File___Name', str)]) for file_info in info_list: date_str = "%02d/%02d/%d %02d:%02d:%02d" % \ (file_info.date_time[1], file_info.date_time[2], file_info.date_time[0], \ file_info.date_time[3], file_info.date_time[4], file_info.date_time[5]) # if file is encrypted, flag it try: filename = unicode(file_info.filename) except UnicodeDecodeError, err: filename = unicode(file_info.filename, 'utf-8', 'replace') if file_info.flag_bits & 0x1 == 0x1: filename = '* ' + filename quick_table.addrow([date_str, file_info.file_size, filename]) return def _version_created(self, version): """ Return a string containing the system that created the archive. Taken from http://www.pkware.com/documents/casestudies/APPNOTE.TXT """ sys_list = ["MS-DOS, OS/2, FAT/VFAT/FAT32", "Amiga", "OpenVMS", "UNIX", "VM/CMS", "Atari ST", "OS/2 H.P.F.S.", "Macintosh", "Z-System", "CP/M", "Windows NTFS", "MVS (OS/390 - Z/OS)", "VSE", "Acorn Risc", "VFAT", "alternative MVS", "BeOS", "Tandem", "OS/400", "OS X Darwin", "Unknown"] if version > 20: version = 19 return sys_list[version] def _flag_bits(self, flag_bits, method): """ Returns a string containing the explanation of the flag bits. """ output = "" if flag_bits & 0x1 == 0x1: output += " "*24 + "- This file is encrypted.\n" if method == 6: # Imploding if flag_bits & 0x2 == 0x2: output += " "*24 + "- 8K sliding dictionary used for compression.\n" else: output += " "*24 + "- 4K sliding dictionary used for compression.\n" if flag_bits & 0x4 == 0x4: output += " "*24 + "- 3 Shannon-Fano trees used for sliding dictionary.\n" else: output += " "*24 + "- 2 Shannon-Fano trees used for sliding dictionary.\n" elif method == 8 or method == 9: # Deflating if flag_bits & 0x6 == 0: output += " "*24 + "- Normal (-en)" elif flag_bits & 0x6 == 0x2: output += " "*24 + "- Maximum (-exx/-ex)" elif flag_bits & 0x6 == 0x4: output += " "*24 + "- Fast (-ef)" elif flag_bits & 0x6 == 0x6: output += " "*24 + "- Super Fast (-es)" else: output += " "*24 + "- UNKNOWN" output += " compression option was used.\n" elif method == 14: # LZMA if flag_bits & 0x02 == 0x02: output += " "*24 + "- EOS marker indicates end of compressed data stream.\n" if flag_bits & 8 == 8: output += " "*24 + "- Correct values for CRC-32 and sizes are in data descriptor.\n" if flag_bits & 32 == 32: output += " "*24 + "- File is compressed patched data.\n" if flag_bits & 64 == 64: output += " "*24 + "- Strong encryption is used.\n" if flag_bits & 2048 == 2048: output += " "*24 + "- Filename and comments must be encoded in UTF-8.\n" if flag_bits & 8192 == 8192: output += " "*24 + "- Central Directory encrypted." return output def _compression_method(self, method): """ Returns a string describing the compression method used. """ methods = [ 'no compression', 'Shrunk', 'Reduced with compression factor 1', 'Reduced with compression factor 2', 'Reduced with compression factor 3', 'Reduced with compression factor 4', 'Imploded', 'Tokenizing compression algorithm', 'Deflated', 'Enhanced Deflating using Deflate64(tm)', 'PKWARE Data Compression Library Imploding (old IBM TERSE)', 'Reserved by PKWARE', 'BZIP2 algorithm', 'Reserved by PKWARE', 'LZMA (EFS)', 'Reserved by PKWARE', 'Reserved by PKWARE', 'Reserved by PKWARE', 'IBM TERSE (new)', 'IBM LZ77 z Architecture (PFS)', 'WavPack compressed', 'PPMd version I, Rev 1', 'UNKNOWN'] if method == 97: method = 20 elif method == 98: method = 21 elif method > 19: method = 22 return methods[method] def _internal_attribs(self, attrib): """ Returns a string describing the internal attributes.""" output = "" if attrib & 0x01 == 0x01: output += " "*24 + "- File is apparently ASCII or text.\n" """ NOTE: bit 0x0002 means that a 4 byte variable record length field is present, but this info doesn't seem useful in this case. """ return output def full_info(self, info_list): """ Obtain a full set of information for each file within the archive. """ log = logging.getLogger('Mastiff.Plugins.' + self.name + '.fileinfo') full_table = self.page_data.addTable('Zip Archive File Info') try: for file_info in info_list: my_headers = list() my_output = list() my_headers.append(('File___Name', str)) try: my_output.append(unicode(file_info.filename)) except UnicodeDecodeError, err: my_output.append(unicode(file_info.filename, errors='replace' )) date_str = "%02d/%02d/%d %02d:%02d:%02d" % \ (file_info.date_time[1], file_info.date_time[2], file_info.date_time[0], \ file_info.date_time[3], file_info.date_time[4], file_info.date_time[5]) my_headers.append(('Last___Modification___Date', str)) my_output.append(date_str) #(file_info.compress_type, self._compression_method(file_info.compress_type)) my_headers.append(('Compression___Type', str)) my_output.append("%d - %s" % (file_info.compress_type, self._compression_method(file_info.compress_type))) my_headers.append(('File___Comment', str)) if file_info.comment is None or len(file_info.comment) == 0: my_output.append('None') else: my_output.append(u"%s\n" % file_info.comment) #(self._version_created(file_info.create_system), file_info.create_system) my_headers.append(('Creation___System', str)) my_output.append("%s (%d)" % (self._version_created(file_info.create_system), file_info.create_system)) my_headers.append(('PKZIP___creation___version', str)) my_output.append(file_info.create_version) my_headers.append(('Version___to___extract', str)) my_output.append(file_info.extract_version) my_headers.append(('Flag___bits', str)) my_output.append("0x%x\n%s" % (file_info.flag_bits, self._flag_bits(file_info.flag_bits, file_info.compress_type).rstrip('\n'))) my_headers.append(('Volume___number', str)) my_output.append(file_info.volume) my_headers.append(('Internal___attributes', str)) my_tmpstr = self._internal_attribs(file_info.internal_attr) if len(my_tmpstr) > 0: my_output.append("0x%x\n%s" % (file_info.internal_attr, my_tmpstr)) else: my_output.append("0x%x" % (file_info.internal_attr)) my_headers.append(('External___attributes', str)) my_output.append("0x%x" % file_info.external_attr) my_headers.append(('CRC32', str)) my_output.append(file_info.CRC) my_headers.append(('Header___offset', str)) my_output.append(file_info.header_offset) my_headers.append(('Compressed___size', str)) my_output.append(file_info.compress_size) my_headers.append(('Uncompress___size', str)) my_output.append(file_info.file_size) my_headers.append(('Extra___Data', str)) if file_info.extra is not None: my_output.append('This file entry contains extra data. Not supported yet.') else: my_output.append('No extra data.') # add the header if necessary if full_table.header is None: full_table.addheader(my_headers, printVertical=True) full_table.addrow(my_output) except ImportError: log.error('Error obtaining file information from archive for {}.'.format(file_info.filename.encode('utf-8','backslashreplace'))) return def output_file(self, outdir, data): """Print output from analysis to a file.""" log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output') try: outfile = codecs.open(outdir + os.sep + 'zipinfo-old.txt', 'w', encoding='utf-8') outfile.write(data) outfile.close() except IOError, err: log.error('Could not open zipinfo.txt: {}'.format(err)) return False return True ================================================ FILE: mastiff/plugins/analysis/ZIP/ZIP-zipinfo.yapsy-plugin ================================================ [Core] Name = ZipInfo Module = ZIP-zipinfo [Documentation] Description = Extract zip metadata and file information. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/analysis/ZIP/__init__.py ================================================ ================================================ FILE: mastiff/plugins/analysis/__init__.py ================================================ ================================================ FILE: mastiff/plugins/category/EXE.yapsy-plugin ================================================ [Core] Name = Windows Executable Category Module = exe [Documentation] Description = Windows Executable Category Plugin Author = Tyler Hudak Website = www.korelogic.com Version = 1.0 License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/category/PDF.yapsy-plugin ================================================ [Core] Name = Adobe PDF Category Module = pdf [Documentation] Description = Adobe PDF Category Plugin Author = Tyler Hudak Website = www.korelogic.com Version = 1.0 License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/category/__init__.py ================================================ ================================================ FILE: mastiff/plugins/category/categories.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ The base category classes for each of the file types analyzed by mastiff. """ __version__ = "$Id: e7abe9b27e953709d06c590305ce0c16eaa36c34 $" from yapsy.IPlugin import IPlugin import mastiff.plugins.output as output class MastiffPlugin(IPlugin): """The base plugin class every category class should inherit.""" def __init__(self, name=None): """Initialize the Mastiff plugin class.""" IPlugin.__init__(self) self.name = name self.prereq = None self.yara_filetype = None self.page_data = output.page() self.page_data.meta['filename'] = 'CHANGEME' def activate(self): """Power rings activate! Form of Mastiff Plugin!""" IPlugin.activate(self) def analyze(self, config, filename, output=None): pass def deactivate(self): """Deactivate plugin.""" IPlugin.deactivate(self) def set_name(self, name=None): """ Yapsy does not provide an easy way to get or set our own name, so here's a function to do so. """ self.name = name return self.name ================================================ FILE: mastiff/plugins/category/exe.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Windows Executable File Category Plugin File Type: Windows Executable Programs Purpose: This file contains the code for the category class "exe", which allows plugins to be created to be run on Windows executable files. Output: None __init__(): MANDATORY: Any initialization code the category requires. It must also call the __init__ for the MastiffPlugin class. is_my_filetype(id_dict, file_name): MANDATORY: This function will return the cat_name if the given id_dict contains one of the file types this category can examine, or the yara rule matches the file type. The file_name is also given so additional tests can be performed, if required. None should be returned if it does not analyze this type. """ __version__ = "$Id: 609d6d02a651ff56ef7b7da434603e150b723876 $" import struct import mastiff.plugins.category.categories as categories import mastiff.filetype as FileType class EXECat(categories.MastiffPlugin): """Category class for Windows executables.""" def __init__(self, name=None): """Initialize the category.""" categories.MastiffPlugin.__init__(self, name) self.cat_name = 'EXE' self.my_types = [ 'PE32 executable', 'MS-DOS executable', 'Win32 Executable', 'Win32 EXE' ] self.yara_filetype = """rule isexe { strings: $MZ = "MZ" condition: $MZ at 0 and uint32(uint32(0x3C)) == 0x00004550 }""" def is_exe(self, filename): """ Look to see if the filename has the header format we expect,""" with open(filename, 'rb') as exe_file: header = exe_file.read(2) if header != 'MZ': return False exe_file.seek(0x3c) offset = struct.unpack(' 1024: # seems a bit too far - we'll stop just in case return False exe_file.seek(offset[0]) pe_header = exe_file.read(2) if pe_header != 'PE': return False return True def is_my_filetype(self, id_dict, file_name): """Determine if magic string is appropriate for this category.""" # check magic string first try: if [ type_ for type_ in self.my_types if type_ in id_dict['magic']]: return self.cat_name except: return None # run Yara type check if FileType.yara_typecheck(file_name, self.yara_filetype) is True: return self.cat_name # perform a manual check if self.is_exe(file_name): return self.cat_name return None ================================================ FILE: mastiff/plugins/category/generic.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Generic File Category Plugin File Type: Any files Purpose: This file contains the code for the category class "generic", which allows plugins to be created to be run on any file. Output: None __init__(): MANDATORY: Any initialization code the category requires. It must also call the __init__ for the MastiffPlugin class. is_my_filetype(id_dict, file_name): MANDATORY: This function will return the cat_name if the given id_dict pertains to one of the file types this category can examine. The file_name is also given so additional tests can be performed, if required. None should be returned if it does not analyze this type. """ __version__ = "$Id: 58d893fbc4b026eb0104912013663e1562446620 $" import mastiff.plugins.category.categories as categories class GenericCat(categories.MastiffPlugin): """Category class for any file.""" def __init__(self, name=None): """Initialize the category.""" categories.MastiffPlugin.__init__(self, name) self.cat_name = 'Generic' self.my_types = [] def is_my_filetype(self, id_dict, file_name): """Generic plugins are run against every file, so always return the cat_name.""" return self.cat_name if __name__ == '__main__': # testing code genclass = GenericCat() print genclass.cat_name ================================================ FILE: mastiff/plugins/category/generic.yapsy-plugin ================================================ [Core] Name = Generic Category Module = generic [Documentation] Description = Generic Files Category Plugin Author = Tyler Hudak Website = www.korelogic.com Version = 1.0 License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/category/office.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Microsoft Office File Category Plugin File Type: Microsoft Office Documents Purpose: This file contains the code for the category class "office", which allows plugins to be created to be run on Microsoft Office documents. Output: None __init__(): MANDATORY: Any initialization code the category requires. It must also call the __init__ for the MastiffPlugin class. """ __version__ = "$Id: 55366bcaec0c51d2372ef988b3eef4141f351416 $" import mastiff.plugins.category.categories as categories import mastiff.filetype as FileType class OfficeCat(categories.MastiffPlugin): """Category class for Microsoft Office files.""" def __init__(self, name=None): """Initialize the category.""" categories.MastiffPlugin.__init__(self, name) self.cat_name = 'Office' self.my_types = [ 'CDF V2 Document', # PPT, DOC, XLS 'Composite Document File V2', 'Microsoft Word', 'Microsoft Office Word', 'Microsoft Excel', 'Microsoft PowerPoint', 'Microsoft Office Document' ] self.yara_filetype = """rule isOleDoc { condition: ( uint32(0x0) == 0xe011cfd0 and uint32(0x4) == 0xe11ab1a1 ) or // some old beta versions have this signature ( uint32(0x0) == 0x0dfc110e and uint32(0x4) == 0x0e11cfd0 ) }""" def is_my_filetype(self, id_dict, file_name): """Determine if magic string is appropriate for this category.""" try: if [ type_ for type_ in self.my_types if type_ in id_dict['magic']]: return self.cat_name except: return None # run Yara type check if FileType.yara_typecheck(file_name, self.yara_filetype) is True: return self.cat_name return None ================================================ FILE: mastiff/plugins/category/office.yapsy-plugin ================================================ [Core] Name = Microsoft Office Category Module = office [Documentation] Description = Microsoft Office Category Plugin Author = Tyler Hudak Website = www.korelogic.com Version = 1.0 License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/category/pdf.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Adobe PDF Category Plugin File Type: Adobe PDF files Purpose: This file contains the code for the category class "pdf", which allows plugins to be created to be run on any file. Output: None __init__(): MANDATORY: Any initialization code the category requires. It must also call the __init__ for the MastiffPlugin class. """ __version__ = "$Id: 310cf87b738bb0ecdf968865c63e94ed0af9d83a $" import mastiff.plugins.category.categories as categories import mastiff.filetype as FileType class PDFCat(categories.MastiffPlugin): """Category class for Adobe PDFs.""" def __init__(self, name=None): """Initialize the category.""" categories.MastiffPlugin.__init__(self, name) self.cat_name = 'PDF' self.my_types = [ 'PDF document', 'Adobe Portable Document Format' ] self.yara_filetype = """rule ispdf { strings: $PDF = "%PDF-" condition: $PDF in (0..1024) }""" def is_my_filetype(self, id_dict, file_name): """Determine if magic string is appropriate for this category.""" # check the magic string for our file type try: if [ type_ for type_ in self.my_types if type_ in id_dict['magic'] ]: return self.cat_name except: return None # run Yara type check if FileType.yara_typecheck(file_name, self.yara_filetype) is True: return self.cat_name # the PDF header may be in the first 1024 bytes of the file # libmagic and TrID may not pick this up with open(file_name, 'r') as pdf_file: data = pdf_file.read(1024) if '%PDF-' in data: return self.cat_name return None ================================================ FILE: mastiff/plugins/category/zip.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Zip File Category Plugin File Type: Zip Archive Purpose: This file contains the category class to analyze Zip archives. Output: None """ __version__ = "$Id: a59af7dd53c334712c50d1d05787a63da5e448a6 $" import zipfile import mastiff.plugins.category.categories as categories import mastiff.filetype as FileType class ZipCat(categories.MastiffPlugin): """ Category class for Zip documents.""" def __init__(self, name=None): """Initialize the category.""" categories.MastiffPlugin.__init__(self, name) self.cat_name = 'ZIP' self.my_types = [ 'Zip archive', 'ZIP compressed archive' ] self.yara_filetype = """rule iszip { condition: uint32(0x0) == 0x04034b50 }""" def is_my_filetype(self, id_dict, file_name): """Determine if the magic string is appropriate for this category""" # Use the python library first try: # there are times where is_zipfile returns true for non-zipfiles # so we have to try and open it as well if zipfile.is_zipfile(file_name) is True: return self.cat_name except: return None # check magic string next try: if [ type_ for type_ in self.my_types if type_ in id_dict['magic']]: return self.cat_name except TypeError: return None # run Yara type check if FileType.yara_typecheck(file_name, self.yara_filetype) is True: return self.cat_name return None ================================================ FILE: mastiff/plugins/category/zip.yapsy-plugin ================================================ [Core] Name = Zip Archive Category Plugin Module = zip [Documentation] Description = Zip Archive Category Plugin Author = Tyler Hudak Website = www.korelogic.com Version = 1.0 License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/output/OUTPUT-raw.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Raw Output Plug-In This output plug-in writes the output in its raw repr() state to a file. """ __version__ = "$Id: 4c5a3bcd2b75a26af7638c27124b544b3ce3d8f0 $" import logging import mastiff.plugins.output as masOutput class OUTPUTRaw(masOutput.MastiffOutputPlugin): """Raw output plugin..""" def __init__(self): """Initialize the plugin.""" masOutput.MastiffOutputPlugin.__init__(self) def activate(self): """Activate the plugin.""" masOutput.MastiffOutputPlugin.activate(self) def deactivate(self): """Deactivate the plugin.""" masOutput.MastiffOutputPlugin.deactivate(self) def output(self, config, output): log = logging.getLogger('Mastiff.Plugins.Output.' + self.name) if config.get_bvar(self.name, 'enabled') is False: log.debug('Disabled. Exiting.') return True log.info('Writing raw output.') try: raw_file = open(config.get_var('Dir', 'log_dir')+'/output_raw.txt', 'w') except IOError, err: log.error('Could not open output_raw.txt file for writing: {}'.format(err)) return False raw_file.write(repr(output)) raw_file.close() return True ================================================ FILE: mastiff/plugins/output/OUTPUT-raw.yapsy-plugin ================================================ [Core] Name = Raw Output Module = OUTPUT-raw [Documentation] Description = Dumps output in its raw structure format. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/output/OUTPUT-text.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Text Output Plug-In This output plug-in writes the output to a text file. """ __version__ = "$Id: 3ba469857b0e052b44f17b90268cbfeace7145cf $" import logging import mastiff.plugins.output as masOutput def renderText(page_format, logdir, filename, datastring): """ Places the datastring previously created into the appropriate file or files. """ log = logging.getLogger('Mastiff.Plugins.Output.OUTPUTtext.renderText') # print out the formatted text for the plug-in if page_format == 'single': # all data is on one page, open up one file for it out_filename = logdir + '/output_txt.txt' mode = 'a' # add a separater between plug-in output datastring += '*'*80 + '\n' elif page_format == 'multiple': # data should be broken up into individual files. # this will be set for each file out_filename = logdir + '/' + filename + '.txt' mode = 'w' else: log.error('Invalid format type for output plugin: {}'.format(format)) return False try: txt_file = open(out_filename, mode) except IOError, err: log.error('Could not open {} file for writing: {}'.format(out_filename, err)) return False txt_file.write(datastring.encode('utf-8', 'replace')) txt_file.close() def _extend(data, length=0): """ Returns a unicode string that is left justified by the length given. """ if data is None: return u"" try: outstr = data.ljust(length) except AttributeError: outstr = str(data).ljust(length) except UnicodeEncodeError: outstr = data.decode('utf-8').ljust(length) if isinstance(outstr, unicode): return outstr else: return unicode(outstr, 'utf-8', 'replace') def processPage(plugin, page, page_format): """ Processes a page of data and puts it into the correct format. """ txtstr = unicode('', 'utf-8') if page_format == 'single': txtstr += '\n{} Plug-in Results\n\n'.format(plugin) # loop through each table in the page for tabledata in sorted(page, key=lambda page: page[2]): (title, mytable, index) = tabledata # first we need to go through the table and find the max length for each column col_widths = [ len(getattr(col_name, 'name').replace(masOutput.SPACE, ' ')) for col_name in mytable.header ] # check to see if it should be printed like a horizontal or vertical table if mytable.printVertical is False: outlist = list() for row in mytable: # modify the col_widths to set a maximum length of each column to 60 characters row_lens = list() for col in row[1:]: try: row_lens.append(min(60, len(col))) except TypeError: # if this isn't a str or unicode value, explicitly convert it row_lens.append(min(60, len(str(col)))) col_widths = map(max, zip(col_widths, row_lens)) # format the header if mytable.printHeader is not False: txtstr += " ".join((getattr(val, 'name')).replace(masOutput.SPACE, ' ').ljust(length) for val, length in zip(mytable.header, col_widths)) + '\n' txtstr += ' '.join([ '-'*val for val in col_widths ]) # format the data for row in mytable: # combine the row values together and extend them as needed # this may be a confusing statement, but its fast! #outlist.append("".join(map(lambda x: _extend(x[0], x[1]+2), zip(row[1:], col_widths)))) outlist.append("".join([_extend(x[0], x[1]+2) for x in zip(row[1:], col_widths) ])) txtstr += '\n' txtstr += "\n".join(outlist) txtstr += '\n\n' else: outlist = list() # get max column width + 2 max_col = max(col_widths) + 2 # pre-justify header newheader = [ getattr(data,'name').replace(masOutput.SPACE, ' ').ljust(max_col) for data in mytable.header ] # this adds a slight speed increase for large output myappend = outlist.append # go through each row of data and join the header and values together for row in mytable: #myappend("\n".join(map(lambda x: x[0] + _extend(x[1], 0), zip(newheader, row[1:])))) myappend("\n".join([ x[0] + _extend(x[1], 0) for x in zip(newheader, row[1:])])) myappend("\n\n") txtstr += "".join(outlist) txtstr += '\n' return txtstr class OUTPUTtext(masOutput.MastiffOutputPlugin): """Text output plugin..""" def __init__(self): """Initialize the plugin.""" masOutput.MastiffOutputPlugin.__init__(self) def activate(self): """Activate the plugin.""" masOutput.MastiffOutputPlugin.activate(self) def deactivate(self): """Deactivate the plugin.""" masOutput.MastiffOutputPlugin.deactivate(self) def output(self, config, data): log = logging.getLogger('Mastiff.Plugins.Output.' + self.name) if config.get_bvar(self.name, 'enabled') is False: log.debug('Disabled. Exiting.') return True log.info('Writing text output.') txtstr = unicode('', 'utf-8') page_format = config.get_var(self.name, 'format') # we need to output the File Information plugin first as it contains the # summary information on the analyzed file try: log.debug('Writing file information.') txtstr += processPage('File Information', data[data.keys()[0]]['Generic']['File Information'], page_format) renderText(page_format, config.get_var('Dir', 'log_dir'), data[data.keys()[0]]['Generic']['File Information'].meta['filename'], txtstr) txtstr = unicode('', 'utf-8') except KeyError: log.error('File Information plug-in data missing. Aborting.') return False # loop through category data for cats, catdata in data[data.keys()[0]].iteritems(): if page_format == 'single': catstr = '{} Category Analysis Results'.format(cats) log.debug('Writing {} results.'.format(cats)) txtstr += '{}\n'.format(catstr) + '-'*len(catstr) + '\n' # loop through plugin data and generate the output text for plugin, pages in catdata.iteritems(): if cats == 'Generic' and plugin == 'File Information': continue # process the page into its output string txtstr += processPage(plugin, pages, page_format) # render the text into the appropriate location renderText(page_format, config.get_var('Dir', 'log_dir'), pages.meta['filename'], txtstr) txtstr = '' return True ================================================ FILE: mastiff/plugins/output/OUTPUT-text.yapsy-plugin ================================================ [Core] Name = Text Output Module = OUTPUT-text [Documentation] Description = Dumps output in text format. Author = Tyler Hudak Version = 1.0 Website = www.korelogic.com License = Apache License, Version 2.0 ================================================ FILE: mastiff/plugins/output/__init__.py ================================================ #!/usr/bin/env python __version__ = "$Id: e4ef370e46aed6093a66918da42c5f2b1665cf83 $" import collections import time from yapsy.IPlugin import IPlugin BASEHEADER = collections.namedtuple('BASEHEADER', 'name type') BASEROW = collections.namedtuple('BASEROW', 'ROWINDEX') # the data types we accept for rows. # TODO: Extensive testing on time to be able to represent the multitude of time formats # Maybe have our own class? DATATYPES = [int, str, float, unicode, time.struct_time] # characters that spaces should be replaced with SPACE='___' class TableError(Exception): """ Table Exception class """ pass class PageError(Exception): """ Page Exception class """ pass class table(object): """ Base constructor for table of data. A table contains a header and rows of data. - The header is just a single row that contains the description of the data. - You may only add one row of data at a time """ def __init__(self, header=None, data=None, title=None): """ Initialize the table. self.header: List containing the column names in BASEHEADER named tuple type. self.rowdef: Named tuple based on BASEROW. Names are based on header def. self.title: String describing the contents of the table. self.rows: List of self.rowdef named tuples. Contains the table data. self.INDEX: Used for row order. Currently automatically generated. Input: - header: List containing the data definition. - data: List containing the initial row of data to initialize. - title: String containing the title for the table. """ self.INDEX = 0 self.header = None self.printHeader = True self.printVertical = False self.rowdef = None self.addheader(header) self.title = title self.rows = list() if data is not None: self.addrow(data) return def __str__(self): """ Return a string containing a quickly formatted view of the table. """ outstring = '' if self.title is not None and self.title != '': outstring += self.title + '\n' if self.header is not None: for item in self.header: outstring += str(item.name) + '\t' outstring += '\n' if self.rows is not None and len(self.rows) > 0: for rows in sorted(self.rows, key=lambda x: x[0]): outstring += '\t'.join([ str(x) for x in rows[1:] ]) + '\n' return outstring def __repr__(self): return '' def __iter__(self): """ Generator to go through table rows. Returns the row tuple of the item. """ for item in self.rows: yield item def addtitle(self, title=None): """ Add a title to the table. """ if title is not None: self.title = title else: self.title = '' def addheader(self, header=None, printHeader=True, printVertical=False): """ Add a header to the table. The header defines the format of the table and should be a list composed of the names of the fields in the table, and their type After created, the header is used to construct the named tuple for all the rows in the table. """ if header is not None: self.header = list() if isinstance(header, list): rowdef = tuple() for (item, itemtype) in header: # make sure itemtype is a valid data type if itemtype not in DATATYPES: raise TypeError('Data type is not a valid type for MASTIFF output.') self.header.append(BASEHEADER(item, str)) rowdef = rowdef + (item, ) else: raise TypeError('Headers must be of type list.') if printHeader is False: self.printHeader = False if printVertical is True: self.printVertical = True # if we have a rowdef, create the row def tuple if len(rowdef) > 0: self.rowdef = collections.namedtuple('ROWTUPLE', BASEROW._fields + (rowdef )) def addrow(self, row): """ Add a row of data to the table. A header must be defined prior to adding any rows of data. Input: - row: Iterable containing row of data to add to the table. (best if list or tuple used) Each item in the iterable will be placed into a separate column in the table. """ # make sure we have a header defined if self.header is None: raise TableError('Header is needed before rows can be added.') if self.rows is None: self.rows = list() # go through the data and add to the table if row is not None: # The data should be an iterable. try: if len(row) != len(self.header): raise TableError('Row length ({0}) does not equal header length ({1}).'.format(len(row), len(self.header))) # Currently the index (row position in the table) is by the order the data is received # TODO: Take in an index rowlist = [self.INDEX] self.INDEX += 1 for item in row: rowlist.append(item) # create and add named tuple into self.rows self.rows.append(self.rowdef._make(rowlist)) except TypeError: raise TypeError('Invalid type given for data.') class page(object): """ A page is a container for multiple tables of data. Tables will be listed in the order they are added, unless an index is specified when the table is added. """ def __init__(self): self.tables = dict() self.meta = dict() self.meta['filename'] = 'CHANGEME' self.counter = 0 def __getitem__(self, title): """ Overload the getitem operator to return a specified table. """ try: return self.tables[title]['table'] except KeyError: raise KeyError('Table {} does not exist.'.format(title)) def __iter__(self): """ Generator to go through the list of tables, sorted by index. Yields a list of [ title, table, index ] """ for title in self.tables: yield [ title, self.tables[title]['table'], self.tables[title]['index'] ] def __str__(self): outstring = '' for mytable in sorted(self.tables.iteritems(), key=lambda (k, v): v['index']): outstring += str(mytable[1]['table']) return outstring def __repr__(self): return '' def addTable(self, title, header=None, index=None): if title is None or title == '': raise PageError('New tables must have a title.') if index is None: index = self.counter newTable = table(header=header, title=title) self.tables[title] = { 'table': newTable, 'index': index } self.counter += 1 return newTable class MastiffOutputPlugin(IPlugin): """The base plugin class every output plugin should inherit.""" def __init__(self, name=None): """Initialize the Mastiff plugin class.""" IPlugin.__init__(self) self.name = name def activate(self): """Power rings activate! Form of Mastiff Plugin!""" IPlugin.activate(self) def deactivate(self): """Deactivate plugin.""" IPlugin.deactivate(self) def output(self, config, data): """ Output function. Should be overwritten by plugins. """ return False def set_name(self, name=None): """ Yapsy does not provide an easy way to get or set our own name, so here's a function to do so. """ self.name = name return self.name ================================================ FILE: mastiff/queue.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ The queue module is used to add a job queue to MASTIFF. The MastiffQueue class uses the MASTIFF SQLite database to keep track of any files that are required to be analyzed. It works as a LIFO queue and has no priorities. This module was originally taken from Thiago Arruda's public domain Python job queue at http://flask.pocoo.org/snippets/88/ and has had some minor modifications made to make it in-line with MASTIFF. """ __version__ = "$Id" import os, sqlite3, os.path import sys from cPickle import loads, dumps from time import sleep try: from thread import get_ident except ImportError: from dummy_thread import get_ident import mastiff.conf as Conf import logging class MastiffQueue(object): """ Class to implement a LIFO job queue in a SQLite Database. """ _create = ( 'CREATE TABLE IF NOT EXISTS queue ' '(' ' id INTEGER PRIMARY KEY AUTOINCREMENT,' ' file BLOB' ')' ) _count = 'SELECT COUNT(*) FROM queue' _iterate = 'SELECT id, file FROM queue' _append = 'INSERT INTO queue (file) VALUES (?)' _write_lock = 'BEGIN IMMEDIATE' _popleft_get = ( 'SELECT id, file FROM queue ' 'ORDER BY id LIMIT 1' ) _popleft_del = 'DELETE FROM queue WHERE id = ?' _peek = ( 'SELECT file FROM queue ' 'ORDER BY id LIMIT 1' ) _peek_all = ( 'SELECT file FROM queue ' 'ORDER BY id' ) def __init__(self, config): """ Initialize the class. """ #Read the config file and find where the DB is log = logging.getLogger('Mastiff.Queue.init') conf = Conf.Conf(config) self.path = os.path.abspath(conf.get_var('Dir', 'log_dir') + os.sep + conf.get_var('Sqlite', 'db_file')) log.debug('Setting up queue table at %s' % self.path) # create the dir if it doesn't exist if not os.path.isdir(os.path.dirname(self.path)): try: os.makedirs(os.path.dirname(self.path)) except OSError, err: log.error('Could not make %s: %s. Exiting.', self.path, err) sys.exit(1) if not os.path.exists(self.path) or not os.path.isfile(self.path): # does not exist, create try: sqlite3.connect(self.path) except sqlite3.OperationalError, err: log.error('Cannot access sqlite DB: %s.', err) self._connection_cache = {} with self._get_conn() as conn: # create the database if required conn.execute(self._create) def __len__(self): """ Allows len(queue) to return the number of items to be processed. """ with self._get_conn() as conn: my_len = conn.execute(self._count).next()[0] return my_len def __iter__(self): """ Iterable object. """ with self._get_conn() as conn: for my_id, obj_buffer in conn.execute(self._iterate): yield loads(str(obj_buffer)) def __str__(self): """ Return contents of database. """ return '\n'.join(self) def _get_conn(self): """ Returns a connection to the database. """ my_id = get_ident() if my_id not in self._connection_cache: self._connection_cache[my_id] = sqlite3.Connection(self.path, timeout=60) return self._connection_cache[my_id] def append(self, obj): """ Add a job to the queue. """ obj_buffer = buffer(dumps(obj, 2)) with self._get_conn() as conn: conn.execute(self._append, (obj_buffer,)) def popleft(self, sleep_wait=False): """ Pops a job off the queue and returns it. It will return the next item in the queue, or None is none exist. By default, the function will not wait if it cannot access the queue table or there is nothing. """ keep_pooling = True wait = 0.1 max_wait = 2 tries = 0 with self._get_conn() as conn: my_id = None while keep_pooling: conn.execute(self._write_lock) cursor = conn.execute(self._popleft_get) try: my_id, obj_buffer = cursor.next() keep_pooling = False except StopIteration: conn.commit() # unlock the database if not sleep_wait: keep_pooling = False continue tries += 1 sleep(wait) wait = min(max_wait, tries/10 + wait) if id: conn.execute(self._popleft_del, (my_id,)) return loads(str(obj_buffer)) return None def peek(self): """ Return the next item in the queue, but do not remove it. """ with self._get_conn() as conn: cursor = conn.execute(self._peek) try: return loads(str(cursor.next()[0])) except StopIteration: return None def clear_queue(self): """ Clear the job queue. """ while self.__len__() > 0: self.popleft(sleep_wait=False) ================================================ FILE: mastiff/sqlite.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ sqlite.py This file contains helper functions used to assist MASTIFF plug-ins in placing data into a sqlite database. """ __version__ = "$Id: 1ca5305893915a251426468100fa9688b59332d7 $" import logging import os import re import sqlite3 def open_db(db_name): """ Return a sqlite3 Connection object for the given database name. If the file does not exist, it will attempt to create it. """ log = logging.getLogger('Mastiff.DB.open') if not os.path.exists(db_name) or not os.path.isfile(db_name): log.warning('%s does not exist. Will attempt to create.', db_name) try: db = sqlite3.connect(db_name) except sqlite3.OperationalError, err: log.error('Cannot access sqlite DB: %s.', err) db = None db.text_factory = str return db def open_db_conf(config): """ Read the DB information from a MASTIFF config file. Return a Sqlite Connection or None. """ log = logging.getLogger('Mastiff.DB.open_db_conf') log_dir = config.get_var('Dir','base_dir') mastiff_db = config.get_var('Sqlite', 'db_file') if mastiff_db is None or log_dir is None or len(mastiff_db) == 0: log.error('Unable to open DB.') return None # db_file can be a full path - if it is, then use it dirname = os.path.expanduser(os.path.dirname(mastiff_db)) if len(dirname) > 0 and os.path.exists(dirname) == True: return open_db(mastiff_db) return open_db(os.path.expanduser(log_dir) + os.sep + mastiff_db) def sanitize(string): """ Sanitize a string that cannot be sent correctly to sqlite3. Returns a string only containing letters, numbers, whitespace or underscore. """ return re.sub(r'[^a-zA-Z0-9_\s]', '', string) def check_table(db, table): """ Return True is a table exists, False otherwise""" conn = db.cursor() # sqlite3 won't let us use table names as variables, so we have to # use string substitution query = 'SELECT * FROM ' + sanitize(table) try: conn.execute(query) return True except sqlite3.OperationalError: # table doesn't exist return False def add_table(db, table, fields): """ Add a table to a database. Table is a string of the table name. fields is a list of columns in the form 'column_name column_type' Returns True if successful, False otherwise. """ conn = db.cursor() if check_table(db, table): # Table already exists return True query = 'CREATE TABLE ' + sanitize(table) + '(' for item in fields: query = query + sanitize(item) + ',' query = query[:-1] + ')' try: conn.execute(query) db.commit() except sqlite3.OperationalError, err: log = logging.getLogger('Mastiff.DB.add_table') log.error('Could not add table %s: %s', table, err) return False return True def add_column(db, table, col_def): """ Alter an existing table by adding a column to it. db is a sqlite3 db connection table is the table name col_def is the column definition """ log = logging.getLogger('Mastiff.DB.add_column') if check_table(db, table) == False: log.error('Table %s does not exist.', table) return False conn = db.cursor() query = 'ALTER TABLE ' + table + ' ADD COLUMN ' + col_def try: conn.execute(query) db.commit() except sqlite3.OperationalError, err: # dup column name errors are fine if 'duplicate column name' not in str(err): log.error('Could not add column: %s', err) return False else: log.debug('Extended %s with column def "%s".', table, col_def) return True def create_mastiff_tables(db): """ Create the tables in the MASTIFF database to store the main analysis information. db is a sqlite3 db connection """ if check_table(db, 'mastiff') == True: # table already exists, nothing to do return True fields = ['id INTEGER PRIMARY KEY', 'md5 TEXT DEFAULT NULL', 'sha1 TEXT DEFAULT NULL', 'sha256 TEXT DEFAULT NULL', 'type TEXT DEFAULT NULL'] # if we were not successful, return None if add_table(db, 'mastiff', fields) is None: return False db.commit() return True def get_id(db, hashes): """ Return the db id number of the given tuple of hashes. Returns None if tuple does not exist. """ log = logging.getLogger('Mastiff.DB.get_id') cur = db.cursor() try: cur.execute('SELECT id FROM mastiff WHERE (md5=? AND \ sha1=? AND sha256=?)', [ hashes[0], hashes[1], hashes[2], ]) except sqlite3.OperationalError, err: log.error('Could not execute query: %s', err) return None sqlid = cur.fetchone() if sqlid is None: return sqlid else: return sqlid[0] def insert_mastiff_item(db, hashes, cat_list=None): """ Insert info on analyzed file into database. hashes tuple and cat_list will be inserted into mastiff table. """ log = logging.getLogger('Mastiff.DB.Insert') # we'll create the tables just to be sure they exist create_mastiff_tables(db) cur = db.cursor() sqlid = get_id(db, hashes) if sqlid is not None: # already in there, just send back the id log.debug('Hashes %s are already in the database.', hashes) else: try: cur.execute('INSERT INTO mastiff (md5, sha1, sha256) \ VALUES (?, ?, ?)', (hashes[0], hashes[1], hashes[2])) db.commit() except sqlite3.OperationalError, err: log.error('Could not insert item into mastiff: %s', err) return None sqlid = cur.lastrowid if cat_list is not None and sqlid is not None: try: log.info('Adding %s', str(cat_list)) cur.execute('UPDATE mastiff SET type=? WHERE id=?', (str(cat_list), sqlid, )) db.commit() except sqlite3.OperationalError, err: log.error('Could not update file type in DB: %s', err) if sqlid is None: return sqlid return sqlid # testing functions if __name__ == '__main__': # configure logging for Mastiff module format_ = '[%(asctime)s] [%(levelname)s] [%(name)s] : %(message)s' logging.basicConfig(format=format_) log = logging.getLogger("Mastiff") log.setLevel(logging.DEBUG) mysql = open_db('/tmp/test.db') if mysql is None: print "Was not created" create_mastiff_tables(mysql) print "*** TEST: inserting items" insert_mastiff_item(mysql, ('123', '345', '456'), 'filename') insert_mastiff_item(mysql, ('135', '790', '246'), 'filename2') insert_mastiff_item(mysql, ('111', '333', '555'), 'filename3') insert_mastiff_item(mysql, ('444', '666', '888'), 'filename4') print "*** TEST: insert dup hashes" insert_mastiff_item(mysql, ('111', '333', '555'), 'filename5') print "*** TEST: insert dup filename" insert_mastiff_item(mysql, ('111', '333', '555'), 'filename3') print "*** TEST: add column" add_column(mysql, 'mastiff', 'test_col TEXT DEFAULT NULL') mysql.close() ================================================ FILE: mastiff.conf ================================================ # This is the configuration file for mastiff. # # Comments are preceded by a # or ; # [Dir] # log_dir is the base directory where the logs generated will # be placed in. #log_dir = /usr/local/mastiff/log log_dir = ./work/log # plugin_dir is a list of directories test plugins may be present in. # should be comma-separated. # This may be left blank. # For example: #plugin_dir = ./plugins, /etc/mastiff plugin_dir = # output_plugin_dir is a list of directories test output plugins may be present in. # should be comma-separated. # This may be left blank. # For example: #output_plugin_dir = ./plugins, /etc/mastiff output_plugin_dir = [Misc] # verbose = [on|off] verbose = off # Make a copy of the analyzed file in the log directory with a .VIR extension. # copy = [on|off] copy = on [Sqlite] # Sqlite database options # db_file = Name of the database file db_file = mastiff.db [File ID] # trid is the location of the TrID binary # trid_db is the location of the TrID database #trid = /usr/local/bin/trid trid = trid_db = [Fuzzy Hashing] # compare decides whether or not to correlate previous fuzzy hashes # compare = [on|off] compare = on [Hex Dump] # Options for Hex Dump plug-in # enabled = [on|off] enabled = off [Embedded Strings Plugin] # Options for the Embedded Strings Plugin. # strcmd is the path to the strings command # DO NOT CHANGE THE FOLLOWING OPTIONS UNLESS YOU KNOW WHAT YOU ARE DOING! # str_opts are the options to use for all strings operations # str_uni_opts are the options to use to obtain UNICODE strings strcmd = /usr/bin/strings str_opts = -a -t d str_uni_opts = -e l [VirusTotal] # Options for the VirusTotal Submission Plug-in. # api_key is your API key from virustotal.com # - Leave this empty if you wish to disable this plug-in api_key = # submit [on|off] - submit binary to VirusTotal submit = off [Metascan Online] # Options for the Metascan Online Submission Plug-in. # api_key is your API key from metascan-online.com # - Leave this empty if you wish to disable this plug-in api_key = # submit [on|off] - submit binary to Metascan Online submit = off [MASTIFF Online] # Options for submission to MASTIFF Online # accept_terms_of_service [true|false] - To upload samples to MASTIFF Online, # you agree to the terms of service and privacy policy located at # https://mastiff-online.korelogic.com. Set the option below to true to # indicate you agree to the terms. accept_terms_of_service = false # submit [on|off] - submit sample to MASTIFF Online submit = off [pdfid] # Options to run Didier Stevens pdfid.py script # pdfid_cmd = Path to the pdfid.py script # - Leave blank if you want the script disabled. # pdfid_opts = Options for program. # - Do not put multiple options in quotes. # Note: pdfid.py has bugs that may cause errors when examining # malformed PDFs when using the -e option. pdfid_cmd = /usr/local/bin/pdfid.py #pdfid_opts = -e pdfid_opts = [pdf-parser] # Options to run Didier Stevens pdf-parser.py script # pdf_cmd = Path to pdf-parser.py. # feedback: [on|off] - Feed extracted files back into the MASTIFF queue. pdf_cmd = /usr/local/bin/pdf-parser.py feedback = on [PDF Metadata] # Options for PDF Metadata script # exiftool = path to exitfool exiftool = /usr/bin/exiftool [yara] # Options for the Yara signature plug-in # yara_sigs = Base path to Yara signatures. This path will be recursed # to find additional signatures. # Leave blank to disable the plug-in. yara_sigs = /usr/local/yara [Digital Signatures] # Options to extract the digital signatures # # disitool - path to disitool.py script. # openssl - path to openssl binary disitool = /usr/local/bin/disitool.py openssl = /usr/bin/openssl [Office Metadata] # Options for Office Metadata script # exiftool = path to exitfool exiftool = /usr/bin/exiftool [Single-Byte Strings] # options for single-byte string extraction plug-in # length - Minimum length to extract length = 3 # raw - print raw characters instead of formatted ones (e.g. \\n vs. \n) raw = False [ZipExtract] # options for Zip archive file extraction plug-in # enabled: [on|off] - Extract files or not # password: Password to use for zip file. OK to leave blank. # feedback: [on|off] - Feed extracted files back into the MASTIFF queue. enabled = on password = feedback = on [Office pyOLEScanner] # olecmd = Path to pyOLEScanner.py olecmd=/usr/local/src/pyOLEScanner/pyOLEScanner.py ################################ # Output Plug-in Configuration ################################ [Raw Output] # enabled: [on|off] - Dump output in raw form or not enabled = off [Text Output] # enabled = [on|off] - Dump output in raw form or not # format = [multiple|single] - Put text output in individual files or one page. enabled = on format = multiple ================================================ FILE: pylint.rc ================================================ [MASTER] # Specify a configuration file. #rcfile= # Python code to execute, usually for sys.path manipulation such as # pygtk.require(). #init-hook= # Profiled execution. profile=no # Add files or directories to the blacklist. They should be base names, not # paths. ignore=CVS # Pickle collected data for later comparisons. persistent=yes # List of plugins (as comma separated values of python modules names) to load, # usually to register additional checkers. load-plugins= [MESSAGES CONTROL] # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option # multiple time. #enable= # Disable the message, report, category or checker with the given id(s). You # can either give multiple identifier separated by comma (,) or put this option # multiple time (only on the command line, not in the configuration file where # it should appear only once). disable=C0301,C0326 [REPORTS] # Set the output format. Available formats are text, parseable, colorized, msvs # (visual studio) and html output-format=parseable # Include message's id in output include-ids=no # Put messages in a separate file for each module / package specified on the # command line instead of printing them on stdout. Reports (if any) will be # written in a file name "pylint_global.[txt|html]". files-output=no # Tells whether to display a full report or only the messages reports=yes # Python expression which should return a note less than 10 (10 is the highest # note). You have access to the variables errors warning, statement which # respectively contain the number of errors / warnings messages and the total # number of statements analyzed. This is used by the global evaluation report # (RP0004). evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) # Add a comment according to your evaluation note. This is used by the global # evaluation report (RP0004). comment=no [BASIC] # Required attributes for module, separated by a comma required-attributes= # List of builtins function names that should not be used, separated by a comma bad-functions=map,filter,apply,input # Regular expression which should only match correct module names module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ # Regular expression which should only match correct module level names const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ # Regular expression which should only match correct class names class-rgx=[A-Z_][a-zA-Z0-9]+$ # Regular expression which should only match correct function names function-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct method names method-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct instance attribute names attr-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct argument names argument-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct variable names variable-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct list comprehension / # generator expression variable names inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ # Good variable names which should always be accepted, separated by a comma good-names=i,j,k,ex,Run,_ # Bad variable names which should always be refused, separated by a comma bad-names=foo,bar,baz,toto,tutu,tata # Regular expression which should only match functions or classes name which do # not require a docstring no-docstring-rgx=__.*__ [FORMAT] # Maximum number of characters on a single line. max-line-length=80 # Maximum number of lines in a module max-module-lines=1000 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 # tab). indent-string=' ' [TYPECHECK] # Tells whether missing members accessed in mixin class should be ignored. A # mixin class is detected if its name ends with "mixin" (case insensitive). ignore-mixin-members=yes # List of classes names for which member attributes should not be checked # (useful for classes with attributes dynamically set). ignored-classes=SQLObject # When zope mode is activated, add a predefined set of Zope acquired attributes # to generated-members. zope=no # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E0201 when accessed. Python regular # expressions are accepted. generated-members=REQUEST,acl_users,aq_parent [SIMILARITIES] # Minimum lines number of a similarity. min-similarity-lines=4 # Ignore comments when computing similarities. ignore-comments=yes # Ignore docstrings when computing similarities. ignore-docstrings=yes [VARIABLES] # Tells whether we should check for unused import in __init__ files. init-import=no # A regular expression matching the beginning of the name of dummy variables # (i.e. not used). dummy-variables-rgx=_|dummy # List of additional names supposed to be defined in builtins. Remember that # you should avoid to define new builtins when possible. additional-builtins= [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. notes=FIXME,XXX,TODO [CLASSES] # List of interface methods to ignore, separated by a comma. This is used for # instance to not check methods defines in Zope's Interface base class. ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by # List of method names used to declare (i.e. assign) instance attributes. defining-attr-methods=__init__,__new__,setUp # List of valid names for the first argument in a class method. valid-classmethod-first-arg=cls [IMPORTS] # Deprecated modules which should not be used, separated by a comma deprecated-modules=regsub,string,TERMIOS,Bastion,rexec # Create a graph of every (i.e. internal and external) dependencies in the # given file (report RP0402 must not be disabled) import-graph= # Create a graph of external dependencies in the given file (report RP0402 must # not be disabled) ext-import-graph= # Create a graph of internal dependencies in the given file (report RP0402 must # not be disabled) int-import-graph= [DESIGN] # Maximum number of arguments for function / method max-args=5 # Argument names that match this expression will be ignored. Default to name # with leading underscore ignored-argument-names=_.* # Maximum number of locals for function / method body max-locals=15 # Maximum number of return / yield for function / method body max-returns=6 # Maximum number of branch for function / method body max-branchs=12 # Maximum number of statements in function / method body max-statements=50 # Maximum number of parents for a class (see R0901). max-parents=7 # Maximum number of attributes for a class (see R0902). max-attributes=7 # Minimum number of public methods for a class (see R0903). min-public-methods=2 # Maximum number of public methods for a class (see R0904). max-public-methods=20 [EXCEPTIONS] # Exceptions that will emit a warning when being caught. Defaults to # "Exception" overgeneral-exceptions=Exception ================================================ FILE: setup.cfg ================================================ [egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 ================================================ FILE: setup.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ This file is the setup/install script for MASTIFF. """ import sys from setuptools import setup, find_packages from mastiff import get_release_string if sys.version_info < (2, 6, 6): sys.stderr.write("Mastiff requires python version 2.6.6") sys.exit(1) setup( author='Tyler Hudak', author_email='mastiff-project@korelogic.com', data_files=[('/etc/mastiff', ['mastiff.conf'])], description="""MASTIFF is a static analysis automation framework.""", install_requires=['Yapsy == 1.10, !=1.10-python3'], license='Apache License V2.0', long_description="""MASTIFF is a static analysis framework that automates the process of extracting key characteristics from a number of different file formats. To ensure the framework remains flexible and extensible, a community-driven set of plug-ins is used to perform file analysis and data extraction. While originally designed to support malware, intrusion, and forensic analysis, the framework is well-suited to support a broader range of analytic needs. In a nutshell, MASTIFF allows analysts to focus on analysis rather than figuring out how to parse files.""", maintainer='Tyler Hudak', maintainer_email='mastiff-project@korelogic.com', name='mastiff', packages=find_packages(), package_data={'': ['*.py', '*.yapsy-plugin'] }, platforms=['Linux'], scripts=['mas.py'], url='http://www.korelogic.com', version=get_release_string()) ================================================ FILE: skeleton/OUTPUT-skel.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Output plugin skeleton code Purpose: This file provides the skeleton code for a plugin that formats the data generated by the analysis plug-ins. This is an example that shows all functions defined. __init__(): MANDATORY: Any initialization code the plugin requires. It must also call the __init__ for masOutput.MastiffOutputPlugin. activate(): OPTIONAL: Activation code called by Yapsy to activate the plugin. deactivate(): OPTIONAL: Deactivated code called by Yapsy. output(config, output): MANDATORY: Function that formats the data from analysis plug-ins into a specific format. Receives the MASTIFF configuration as the config parameter, and the pages of data in the data parameter. """ __version__ = "$Id: 960d687e79158fbba349a472f85ff2b75d8c9bb1 $" import logging import mastiff.plugins.output as masOutput class OUTPUTSkeleton(masOutput.MastiffOutputPlugin): """Raw output plugin..""" def __init__(self): """Initialize the plugin.""" masOutput.MastiffOutputPlugin.__init__(self) def activate(self): """Activate the plugin.""" masOutput.MastiffOutputPlugin.activate(self) def deactivate(self): """Deactivate the plugin.""" masOutput.MastiffOutputPlugin.deactivate(self) def output(self, config, data): log = logging.getLogger('Mastiff.Plugins.Output.' + self.name) # see if we are enabled if config.get_bvar(self.name, 'enabled') is False: log.debug('Disabled. Exiting.') return True log.info('Writing FORMAT output.') # loop through category data for cats, catdata in data[data.keys()[0]].iteritems(): catstr = '{} Category Analysis Results'.format(cats) log.debug('Writing {} results.'.format(cats)) # loop through plugin data and generate the output text for plugin, pages in catdata.iteritems(): # process the page data into the specific format and # output it to the appropriate file/files # loop through each table in the page for tabledata in sorted(pages, key=lambda page: pages[2]): (title, mytable, index) = tabledata # process table data here for row in mytable: # act on row data # (REMOVE THE NEXT LINE) pass return True ================================================ FILE: skeleton/OUTPUT-skel.yapsy-plugin ================================================ [Core] Name = Generic Output Skeleton Plugin Module = OUTPUT-skel [Documentation] Description = Your Description Here Author = Your Name Here Version = 0.1 Website = Your Website Here ================================================ FILE: skeleton/analysis-ext-skel.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Analysis Plugin using external program code Plugin Type: Generic Purpose: This file provides the skeleton code for a plugin that performs static analysis on any file given to the Mastiff framework using an external program. This is an example that shows all functions defined. Output: None. In the MASTIFF configuration file, the options for this particular plug-in would be: [GenSkel Ext Prog] plugcmd = /path/to/my_prog """ __version__ = "$Id: 042c8a566d07d74c75251d9ab7306f4a8ab71c0d $" import subprocess import logging import os # Change the following line to import the category class you for the files # you wish to perform analysis on import mastiff.plugins.category.generic as gen # Change the class name and the base class class GenSkelExt(gen.GenericCat): """Skeleton generic plugin that calls external program.""" def __init__(self): """Initialize the plugin.""" gen.GenericCat.__init__(self) self.page_data.meta['filename'] = 'CHANGEME' def analyze(self, config, filename): """ Obtain the command and options from the config file and call the external program. """ # make sure we are activated if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # get my config options plug_opts = config.get_section(self.name) if plug_opts is None: log.error('Could not get %s options.', self.name) return False # *** plug_opts['plugcmd'] SHOULD BE CHANGED TO THE PLUGIN SPECIFIC OPTIONS # verify external program exists and we can call it if not plug_opts['plugcmd'] or \ not os.path.isfile(plug_opts['plugcmd']) or \ not os.access(plug_opts['plugcmd'], os.X_OK): log.error('%s is not accessible. Skipping.', plug_opts['plugcmd']) return False # run your external program here run = subprocess.Popen([plug_opts['plugcmd']] + \ [ filename ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (output, error) = run.communicate() if error is not None and len(error) > 0: log.error('Error running program: %s' % error) return False self.gen_output(output) log.debug ('Successfully ran %s.', self.name) return True def gen_output(self, output): """Place the results into a Mastiff Output Page.""" log = logging.getLogger('Mastiff.Plugins.' + self.name) # self.page_data was previously initialized # add a table to it new_table = self.page_data.addTable('ANALYSIS PLUGIN DESCRIPTION') # parse through data generated from output here # add header to table # example: new_table.addHeader([('Header 1', str), ('Header 2', int)]) # add rows of data to table # example: new_table.addRow(['row1', 1]) return True ================================================ FILE: skeleton/analysis-ext-skel.yapsy-plugin ================================================ [Core] Name = GenSkel Ext Prog Module = analysis-ext-skel [Documentation] Description = Your Description Here Author = Your Name Here Version = 0.1 Website = Your Website Here ================================================ FILE: skeleton/analysis-skel.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Analysis plugin skeleton code Plugin Type: Generic Purpose: This file provides the skeleton code for a plugin that performs static analysis on any file given to the Mastiff framework. This is an example that shows all functions defined. Output: None __init__(): MANDATORY: Any initialization code the plugin requires. It must also call the __init__ for its category class. activate(): OPTIONAL: Activation code called by Yapsy to activate the plugin. deactivate(): OPTIONAL: Deactivated code called by Yapsy. analyze(config, filename): MANDATORY: The main body of code that performs the analysis on the file. gen_output(outdir): Function that puts the data into self.page_data for the output plug-ins. """ __version__ = "$Id: 107798be8154ef41517034e77db3b5a95dd4fe6b $" import logging # Change the following line to import the category class you for the files # you wish to perform analysis on import mastiff.plugins.category.generic as gen # Change the class name and the base class class GenSkeleton(gen.GenericCat): """Skeleton generic plugin code.""" def __init__(self): """Initialize the plugin.""" gen.GenericCat.__init__(self) self.page_data.meta['filename'] = 'CHANGEME' def activate(self): """Activate the plugin.""" gen.GenericCat.activate(self) def deactivate(self): """Deactivate the plugin.""" gen.GenericCat.deactivate(self) def analyze(self, config, filename): """Analyze the file.""" # sanity check to make sure we can run if self.is_activated == False: return False log = logging.getLogger('Mastiff.Plugins.' + self.name) log.info('Starting execution.') # Add analysis code here. Data can be added to tables or passed into gen_output self.gen_output() return self.page_data def gen_output(self): """Place the results into a Mastiff Output Page.""" log = logging.getLogger('Mastiff.Plugins.' + self.name) # self.page_data was previously initialized # add a table to it new_table = self.page_data.addTable('ANALYSIS PLUGIN DESCRIPTION') # add header to table # example: new_table.addHeader([('Header 1', str), ('Header 2', int)]) # add rows of data to table # example: new_table.addRow(['row1', 1]) return True ================================================ FILE: skeleton/analysis-skel.yapsy-plugin ================================================ [Core] Name = Generic Skeleton Plugin Module = analysis-skel [Documentation] Description = Your Description Here Author = Your Name Here Version = 0.1 Website = Your Website Here ================================================ FILE: skeleton/category-skel.py ================================================ #!/usr/bin/env python """ Copyright 2012-2013 The MASTIFF Project, All Rights Reserved. This software, having been partly or wholly developed and/or sponsored by KoreLogic, Inc., is hereby released under the terms and conditions set forth in the project's "README.LICENSE" file. For a list of all contributors and sponsors, please refer to the project's "README.CREDITS" file. """ __doc__ = """ Category Skeleton Plugin File Type: New File Type Purpose: This file contains the skeleton code for a new category class to analyze a new file type. Output: None __init__(): MANDATORY: Any initialization code the category requires. It must also call the __init__ for its superclass - in this case OfficeCat. """ __version__ = "$Id: 64ee75c4869a530a4030a50ff7add6ab87601a11 $" import mastiff.plugins.category.categories as categories import mastiff.filetype as FileType # Change the class name to identify the new file type class SkelCat(categories.MastiffPlugin): """ Category class for Word documents.""" def __init__(self, name=None): """Initialize the category.""" categories.MastiffPlugin.__init__(self, name) # cat_name should be a one word description of the file type self.cat_name = 'SkelCat' # Add in strings from libmagic and TrID output self.my_types = [ 'libmagic string', 'TrID string' ] # Add in the Yara rule self.yara_filetype = """rule istype { } """ def is_my_filetype(self, id_dict, file_name): """Determine if the magic string is appropriate for this category""" # check magic string first try: if [ type_ for type_ in self.my_types if type_ in id_dict['magic']]: return self.cat_name except: return None # run Yara type check if FileType.yara_typecheck(file_name, self.yara_filetype) is True: return self.cat_name # check TrID output, if available # this can likely be removed for (percent, desc) in id_dict['trid']: for type_ in self.my_types: # make sure percent is high enough and trid string matches if type_ in desc and percent > 50: return self.cat_name # add your own code on additional file type determination here return None ================================================ FILE: skeleton/category-skel.yapsy-plugin ================================================ [Core] Name = Category Skeleton Plug-in Module = category-skeleton [Documentation] Description = Your Description Here Author = Your Name Here Website = Your Website Here Version = 0.1 ================================================ FILE: skeleton/output-skel.yapsy-plugin ================================================ [Core] Name = Generic Output Skeleton Plugin Module = output-skel [Documentation] Description = Your Description Here Author = Your Name Here Version = 0.1 Website = Your Website Here ================================================ FILE: tests/import-test.sh ================================================ #!/bin/bash # $Id: 00c702350cf2edd48c2e57517593c5bce6a64781 $ # # Find all imports from the MASTIFF python files and ensure they can be # imported. # # $1 = directory to test if [ $# -eq 0 ] ; then echo "Need a directory to scan." exit elif [ ! -d $1 ] ; then echo "$1 is not a directory." exit fi PWD=`pwd` SAVEIFS=$IFS IFS=$(echo -en "\n\b") echo "Checking Python imports in $1 and below." echo cd $1 for FILE in `find . -name "*.py"`; do for IMPORT in `egrep "^\s*import\s+|^\s*from \S+ import" ${FILE} | sed -e 's/^[ \t]*//' | sort -u`; do ERROR=`python -c "${IMPORT}" 2>&1 | grep "ImportError" | grep -vi disitool` if [ $? -ne 1 ]; then echo ERROR: ${FILE}: ${ERROR} fi done; done cd ${PWD} IFS=${SAVEIFS} echo echo "Done checking imports." echo ================================================ FILE: tests/mastiff-test.sh ================================================ #!/bin/bash MASCMD="python ./mas.py -c ./mastiff.conf -V " # Test mastiff by running it against various file types. # $1 = file type # $2 = file to test # $3 = outfile mas_test() { echo -n "Testing ${1}: " if [ ! -f $2 ] ; then echo "$2 missing. Unable to test." return 0 fi ${MASCMD} ${2} > ${3} 2>&1 if [ $? -ne 0 ] ; then OUTMSG="Failed. See ${3} for details." else OUTMSG="Success." fi echo $OUTMSG } echo "Checking for MASTIFF functionality." echo mas_test EXE tests/test.exe tests/test-EXE.txt mas_test Office tests/test.doc tests/test-DOC.txt mas_test PDF tests/test.pdf tests/test-PDF.txt mas_test ZIP tests/test.zip tests/test-ZIP.txt echo echo "Done checking MASTIFF functionality." ================================================ FILE: utils/version2string ================================================ #!/usr/bin/perl -w ###################################################################### # # $Id: 6c139ab440c14c954b44b9fad19f5c34154259f7 $ # ###################################################################### # # Copyright 2008-2013 The WebJob Project, All Rights Reserved. # ###################################################################### # # Purpose: Convert version numbers to a string representation. # ###################################################################### use strict; use File::Basename; use File::Path; use Getopt::Std; ###################################################################### # # Main Routine # ###################################################################### #################################################################### # # Punch in and go to work. # #################################################################### my ($sProgram); $sProgram = basename(__FILE__); #################################################################### # # Validation expressions. # #################################################################### my $sVersionRegex = qq(0x[0-9A-Fa-f]{8}); my $sTypeRegex = qq((?:cvs|program|tar)); #################################################################### # # Get Options. # #################################################################### my (%hOptions); if (!getopts('t:v:', \%hOptions)) { Usage($sProgram); } #################################################################### # # A type, '-t', is optional. # #################################################################### my $sType; $sType = (exists($hOptions{'t'})) ? $hOptions{'t'} : "program"; if (defined($sType) && $sType !~ /^$sTypeRegex$/) { print STDERR "$sProgram: Type='$sType' Error='Invalid version type.'\n"; exit(2); } #################################################################### # # A version, '-v', is required. # #################################################################### my $sVersion = (exists($hOptions{'v'})) ? $hOptions{'v'} : undef; if (!defined($sVersion)) { Usage($sProgram); } if ($sVersion !~ /^$sVersionRegex$/) { print STDERR "$sProgram: Version='$sVersion' Error='Invalid version.'\n"; exit(2); } #################################################################### # # If any arguments remain, it's an error. # #################################################################### if (scalar(@ARGV) > 0) { Usage($sProgram); } #################################################################### # # Do some work. # #################################################################### print VersionToString(hex($sVersion), $sType), "\n"; 1; ###################################################################### # # VersionToString # ###################################################################### sub VersionToString { my ($sVersion, $sType) = @_; my $sState = ($sVersion >> 10) & 0x03; my $sStateString = "xx"; if ($sState == 0) { $sStateString = "ds"; } elsif ($sState == 1) { $sStateString = "rc"; } elsif ($sState == 2) { $sStateString = "sr"; } elsif ($sState == 3) { $sStateString = "xs"; } my $sString = ""; if (($sVersion & 0xfff) == 0x800) { if ($sType =~ /^cvs$/) { $sString = sprintf ( "V%d_%d_%d", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff ); } elsif ($sType =~ /^tar$/) { $sString = sprintf ( "%d.%d.%d", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff ); } elsif ($sType =~ /^program$/) { $sString = sprintf ( "%d.%d.%d", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff ); } } else { if ($sType =~ /^cvs$/) { $sString = sprintf ( "V%d_%d_%d_%s%d", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff, uc($sStateString), $sVersion & 0x3ff ); } elsif ($sType =~ /^tar$/) { $sString = sprintf ( "%d.%d.%d.%s%d", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff, $sStateString, $sVersion & 0x3ff ); } elsif ($sType =~ /^program$/) { $sString = sprintf ( "%d.%d.%d (%s%d)", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff, $sStateString, $sVersion & 0x3ff ); } } return $sString; } ###################################################################### # # Usage # ###################################################################### sub Usage { my ($sProgram) = @_; print STDERR "\n"; print STDERR "Usage: $sProgram [-t {cvs|program|tar}] -v version\n"; print STDERR "\n"; exit(1); } ================================================ FILE: utils/version_helper ================================================ #!/usr/bin/perl -w ###################################################################### # # $Id: 40c3c9381e39f6934a485d3cde86765789e61f42 $ # ###################################################################### # # Copyright 2006-2013 The WebJob Project, All Rights Reserved. # ###################################################################### # # Purpose: Manage version numbers. # ###################################################################### use strict; use File::Basename; use File::Path; use Getopt::Std; ###################################################################### # # Main Routine # ###################################################################### #################################################################### # # Punch in and go to work. # #################################################################### my ($sProgram); $sProgram = basename(__FILE__); #################################################################### # # Validation expressions. # #################################################################### my $sBuildNumberRegex = qq((?:\\d+|[+])); my $sMajorNumberRegex = qq((?:\\d+|[+])); my $sMinorNumberRegex = qq((?:\\d+|[+])); my $sPatchNumberRegex = qq((?:\\d+|[+])); my $sStateNumberRegex = qq((?:[0-3+]|[dx]s|rc|sr)); #################################################################### # # Get Options. # #################################################################### my (%hOptions); if (!getopts('b:f:M:m:p:s:', \%hOptions)) { Usage($sProgram); } #################################################################### # # A filename is required, and can be '-' or a regular file. # #################################################################### my ($sFileHandle, $sFilename); if (!exists($hOptions{'f'})) { Usage($sProgram); } else { $sFilename = $hOptions{'f'}; if (!defined($sFilename) || length($sFilename) < 1) { Usage($sProgram); } if (-f $sFilename) { if (!open(FH, "< $sFilename")) { print STDERR "$sProgram: File='$sFilename' Error='$!'\n"; exit(2); } $sFileHandle = \*FH; } else { if ($sFilename ne '-') { print STDERR "$sProgram: File='$sFilename' Error='File must be regular.'\n"; exit(2); } $sFileHandle = \*STDIN; } } #################################################################### # # A MajorNumber, '-M', is optional. # #################################################################### my $sMajorNumber; $sMajorNumber = (exists($hOptions{'M'})) ? $hOptions{'M'} : undef; if (defined($sMajorNumber) && $sMajorNumber !~ /^$sMajorNumberRegex$/) { print STDERR "$sProgram: MajorNumber='$sMajorNumber' Error='Invalid major number.'\n"; exit(2); } #################################################################### # # A MinorNumber, '-m', is optional. # #################################################################### my $sMinorNumber; $sMinorNumber = (exists($hOptions{'m'})) ? $hOptions{'m'} : undef; if (defined($sMinorNumber) && $sMinorNumber !~ /^$sMinorNumberRegex$/) { print STDERR "$sProgram: MinorNumber='$sMinorNumber' Error='Invalid minor number.'\n"; exit(2); } #################################################################### # # An PatchNumber, '-p', is optional. # #################################################################### my $sPatchNumber; $sPatchNumber = (exists($hOptions{'p'})) ? $hOptions{'p'} : undef; if (defined($sPatchNumber) && $sPatchNumber !~ /^$sPatchNumberRegex$/) { print STDERR "$sProgram: PatchNumber='$sPatchNumber' Error='Invalid patch number.'\n"; exit(2); } #################################################################### # # A StateNumber, '-s', is optional. # #################################################################### my $sStateNumber; $sStateNumber = (exists($hOptions{'s'})) ? $hOptions{'s'} : undef; if (defined($sStateNumber) && $sStateNumber !~ /^$sStateNumberRegex$/) { print STDERR "$sProgram: StateNumber='$sStateNumber' Error='Invalid state number.'\n"; exit(2); } if (defined($sStateNumber) && $sStateNumber eq "ds") { $sStateNumber = 0; } elsif (defined($sStateNumber) && $sStateNumber eq "rc") { $sStateNumber = 1; } elsif (defined($sStateNumber) && $sStateNumber eq "sr") { $sStateNumber = 2; } elsif (defined($sStateNumber) && $sStateNumber eq "xs") { $sStateNumber = 3; } #################################################################### # # A BuildNumber, '-b', is optional. # #################################################################### my $sBuildNumber; $sBuildNumber = (exists($hOptions{'b'})) ? $hOptions{'b'} : undef; if (defined($sBuildNumber) && $sBuildNumber !~ /^$sBuildNumberRegex$/) { print STDERR "$sProgram: BuildNumber='$sBuildNumber' Error='Invalid build number.'\n"; exit(2); } #################################################################### # # If any arguments remain, it's an error. # #################################################################### if (scalar(@ARGV) > 0) { Usage($sProgram); } #################################################################### # # Attempt to locate/identify the current version number. # #################################################################### my ($sOldVersion, $sVersionFmt); while (my $sLine = <$sFileHandle>) { if ($sLine =~ /^#define VERSION (0x[0-9A-Fa-f]{8})\s*/) { $sOldVersion = hex($1); $sVersionFmt = "define"; last; } elsif ($sLine =~ /^\s*(0x[0-9A-Fa-f]{8})\s*$/) { $sOldVersion = hex($1); $sVersionFmt = "string"; last; } elsif ($sLine =~ /^\s*(?:version\s+=\s+)?(0x[0-9A-Fa-f]{8})\s*$/) { $sOldVersion = hex($1); $sVersionFmt = "assign"; last; } else { next; } } close($sFileHandle); if (!defined($sOldVersion)) { print STDERR "$sProgram: Error='Failed to locate/identify current version number.'\n"; exit(2); } if (!defined($sVersionFmt)) { print STDERR "$sProgram: Error='Failed to determine version format.'\n"; exit(2); } #################################################################### # # Compute the new version number. # #################################################################### my ($sNewVersion); $sNewVersion = $sOldVersion; if (defined($sMajorNumber)) { if ($sMajorNumber =~ /^\+$/) { $sNewVersion += 0x10000000; $sNewVersion &= 0xf0000000; } else { if ($sMajorNumber < 0 || $sMajorNumber > 15) { print STDERR "$sProgram: MajorNumber='$sMajorNumber' Error='Invalid major number.'\n"; exit(2); } $sNewVersion = (($sMajorNumber & 0xf) << 28) + ($sNewVersion & 0x0fffffff); } } if (defined($sMinorNumber)) { if ($sMinorNumber =~ /^\+$/) { $sNewVersion += 0x00100000; $sNewVersion &= 0xfff00000; } else { if ($sMinorNumber < 0 || $sMinorNumber > 255) { print STDERR "$sProgram: MinorNumber='$sMinorNumber' Error='Invalid minor number.'\n"; exit(2); } $sNewVersion = (($sMinorNumber & 0xff) << 20) + ($sNewVersion & 0xf00fffff); } } if (defined($sPatchNumber)) { if ($sPatchNumber =~ /^\+$/) { $sNewVersion += 0x00001000; $sNewVersion &= 0xfffff000; } else { if ($sPatchNumber < 0 || $sPatchNumber > 255) { print STDERR "$sProgram: PatchNumber='$sPatchNumber' Error='Invalid patch number.'\n"; exit(2); } $sNewVersion = (($sPatchNumber & 0xff) << 12) + ($sNewVersion & 0xfff00fff); } } if (defined($sStateNumber)) { if ($sStateNumber =~ /^\+$/) { $sNewVersion += 0x00000400; $sNewVersion &= 0xfffffc00; } else { if ($sStateNumber < 0 || $sStateNumber > 255) { print STDERR "$sProgram: StateNumber='$sStateNumber' Error='Invalid state number.'\n"; exit(2); } $sNewVersion = (($sStateNumber & 0x3) << 10) + ($sNewVersion & 0xfffff3ff); } } if (defined($sBuildNumber)) { if ($sBuildNumber =~ /^\+$/) { $sNewVersion += 0x00000001; } else { if ($sBuildNumber < 0 || $sBuildNumber > 255) { print STDERR "$sProgram: BuildNumber='$sBuildNumber' Error='Invalid build number.'\n"; exit(2); } $sNewVersion = ($sBuildNumber & 0x3ff) + ($sNewVersion & 0xfffffc00); } } #################################################################### # # Generate update/commit/tag commands the user can run manually. # #################################################################### my $sOldVersionString = VersionToString($sOldVersion, "tar"); my $sNewVersionString = VersionToString($sNewVersion, "tar"); my $so = sprintf("0x%08x", $sOldVersion); my $sn = sprintf("0x%08x", $sNewVersion); my $sCommand = "perl -p -i.bak "; if ($sVersionFmt eq "macro") { $sCommand .= " -e 's/define VERSION $so/define VERSION $sn/g;' $sFilename"; } else { $sCommand .= " -e 's/$so/$sn/g;' $sFilename"; } print $sCommand, "\n"; $sCommand = "cvs commit -m \"Updated version number ($sOldVersionString --> $sNewVersionString).\""; print $sCommand, "\n"; $sCommand = "cvs tag " . VersionToString($sNewVersion, "vcs"); print $sCommand, "\n"; if (((($sNewVersion >> 10) & 0x03) == 2) && (($sNewVersion & 0x3ff) == 0)) { $sCommand = "cvs tag " . VersionToString($sNewVersion, "vcs_sr0"); print $sCommand, "\n"; } 1; ###################################################################### # # VersionToString # ###################################################################### sub VersionToString { my ($sVersion, $sType) = @_; my $sState = ($sVersion >> 10) & 0x03; my $sStateString = "xx"; if ($sState == 0) { $sStateString = "ds"; } elsif ($sState == 1) { $sStateString = "rc"; } elsif ($sState == 2) { $sStateString = "sr"; } elsif ($sState == 3) { $sStateString = "xs"; } my $sString = ""; if ($sType =~ /^vcs$/) { $sString = sprintf ( "V%d_%d_%d_%s%d", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff, uc($sStateString), $sVersion & 0x3ff ); } elsif ($sType =~ /^vcs_sr0$/) { $sString = sprintf ( "V%d_%d_%d", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff ); } elsif ($sType =~ /^tar$/) { $sString = sprintf ( "%d.%d.%d.%s%d", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff, $sStateString, $sVersion & 0x3ff ); } elsif ($sType =~ /^program$/) { $sString = sprintf ( "%d.%d.%d (%s%d)", ($sVersion >> 28) & 0x0f, ($sVersion >> 20) & 0xff, ($sVersion >> 12) & 0xff, $sStateString, $sVersion & 0x3ff ); } return $sString; } ###################################################################### # # Usage # ###################################################################### sub Usage { my ($sProgram) = @_; print STDERR "\n"; print STDERR "Usage: $sProgram [-M major] [-m minor] [-p patch] [-s state] [-b build] -f {file|-}\n"; print STDERR "\n"; exit(1); }