Repository: KoreLogicSecurity/mastiff
Branch: master
Commit: 04d569e4fa59
Files: 97
Total size: 283.3 KB

Directory structure:
gitextract_tt3ov715/

├── .gitattributes
├── .gitignore
├── MANIFEST.in
├── Makefile
├── PKG-INFO
├── README
├── README.CREDITS
├── README.INSTALL
├── README.LICENSE
├── README.PLUGINS
├── mas.py
├── mastiff/
│   ├── __init__.py
│   ├── conf.py
│   ├── core.py
│   ├── filetype.py
│   ├── plugins/
│   │   ├── __init__.py
│   │   ├── analysis/
│   │   │   ├── EXE/
│   │   │   │   ├── EXE-peinfo.py
│   │   │   │   ├── EXE-peinfo.yapsy-plugin
│   │   │   │   ├── EXE-resources.py
│   │   │   │   ├── EXE-resources.yapsy-plugin
│   │   │   │   ├── EXE-sig.py
│   │   │   │   ├── EXE-sig.yapsy-plugin
│   │   │   │   ├── EXE-singlestring.py
│   │   │   │   ├── EXE-singlestring.yapsy-plugin
│   │   │   │   └── __init__.py
│   │   │   ├── GEN/
│   │   │   │   ├── GEN-fileinfo.py
│   │   │   │   ├── GEN-fileinfo.yapsy-plugin
│   │   │   │   ├── GEN-fuzzy.py
│   │   │   │   ├── GEN-fuzzy.yapsy-plugin
│   │   │   │   ├── GEN-hex.py
│   │   │   │   ├── GEN-hex.yapsy-plugin
│   │   │   │   ├── GEN-mastiff-online.py
│   │   │   │   ├── GEN-mastiff-online.yapsy-plugin
│   │   │   │   ├── GEN-metascan.py
│   │   │   │   ├── GEN-metascan.yapsy-plugin
│   │   │   │   ├── GEN-strings.py
│   │   │   │   ├── GEN-strings.yapsy-plugin
│   │   │   │   ├── GEN-virustotal.py
│   │   │   │   ├── GEN-virustotal.yapsy-plugin
│   │   │   │   ├── GEN-yara.py
│   │   │   │   ├── GEN-yara.yapsy-plugin
│   │   │   │   └── __init__.py
│   │   │   ├── Office/
│   │   │   │   ├── Office-metadata.py
│   │   │   │   ├── Office-metadata.yapsy-plugin
│   │   │   │   ├── Office-pyOLEScanner.py
│   │   │   │   ├── Office-pyOLEScanner.yapsy-plugin
│   │   │   │   └── __init__.py
│   │   │   ├── PDF/
│   │   │   │   ├── PDF-metadata.py
│   │   │   │   ├── PDF-metadata.yapsy-plugin
│   │   │   │   ├── PDF-pdfid.py
│   │   │   │   ├── PDF-pdfid.yapsy-plugin
│   │   │   │   ├── PDF-pdfparser.py
│   │   │   │   ├── PDF-pdfparser.yapsy-plugin
│   │   │   │   └── __init__.py
│   │   │   ├── ZIP/
│   │   │   │   ├── ZIP-extract.py
│   │   │   │   ├── ZIP-extract.yapsy-plugin
│   │   │   │   ├── ZIP-zipinfo.py
│   │   │   │   ├── ZIP-zipinfo.yapsy-plugin
│   │   │   │   └── __init__.py
│   │   │   └── __init__.py
│   │   ├── category/
│   │   │   ├── EXE.yapsy-plugin
│   │   │   ├── PDF.yapsy-plugin
│   │   │   ├── __init__.py
│   │   │   ├── categories.py
│   │   │   ├── exe.py
│   │   │   ├── generic.py
│   │   │   ├── generic.yapsy-plugin
│   │   │   ├── office.py
│   │   │   ├── office.yapsy-plugin
│   │   │   ├── pdf.py
│   │   │   ├── zip.py
│   │   │   └── zip.yapsy-plugin
│   │   └── output/
│   │       ├── OUTPUT-raw.py
│   │       ├── OUTPUT-raw.yapsy-plugin
│   │       ├── OUTPUT-text.py
│   │       ├── OUTPUT-text.yapsy-plugin
│   │       └── __init__.py
│   ├── queue.py
│   └── sqlite.py
├── mastiff.conf
├── pylint.rc
├── setup.cfg
├── setup.py
├── skeleton/
│   ├── OUTPUT-skel.py
│   ├── OUTPUT-skel.yapsy-plugin
│   ├── analysis-ext-skel.py
│   ├── analysis-ext-skel.yapsy-plugin
│   ├── analysis-skel.py
│   ├── analysis-skel.yapsy-plugin
│   ├── category-skel.py
│   ├── category-skel.yapsy-plugin
│   └── output-skel.yapsy-plugin
├── tests/
│   ├── import-test.sh
│   ├── mastiff-test.sh
│   └── test.doc
└── utils/
    ├── version2string
    └── version_helper

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
* ident


================================================
FILE: .gitignore
================================================


================================================
FILE: MANIFEST.in
================================================
include *.py
include *.yapsy-plugin
include docs/*.pdf
include pylint.rc
include Makefile
include README
include README.CREDITS
include README.INSTALL
include README.LICENSE
include README.PLUGINS
include mastiff.conf
include skeleton/*.py
include skeleton/*.yapsy-plugin
include tests/*
include utils/*
exclude README.RELENG
recursive-exclude docs *.odt
recursive-include mastiff *.py *.yapsy-plugin


================================================
FILE: Makefile
================================================
# $Id: 77c80f02785dfc5ef2f764bfe7f487dc0c165278 $
#
# Makefile for installation of mastiff.
#

all: build

build::
	@ python setup.py build

check test:
	@ bash tests/import-test.sh `pwd`
	@ bash tests/mastiff-test.sh
	@ rm -rf work/

check-clean test-clean: clean
	@ rm -f tests/test-*.txt

clean:
	@ rm -f `find . -name "*.pyc" -o -name "*~"`
	@ rm -rf dist build mastiff.egg-info
	@ rm -f tests/*.txt

clean-all: check-clean dev-clean

dev:
	@ python setup.py develop

dev-clean: clean
	@ python setup.py develop --uninstall
	@ rm -f /usr/local/bin/mas.py

dist sdist::
	@ python setup.py sdist

install: build
	@ python setup.py install

lint:
	@ find . -name "*.py" -exec pylint --rcfile=pylint.rc {} \;

sign: dist
	@ version_number=`egrep '^version = 0x' mastiff/__init__.py | awk '{print $$3}'` ; \
	version_string=`utils/version2string -t tar -v $${version_number}` ; \
	dist_file="dist/mastiff-$${version_string}.tar.gz" ; \
	gpg --default-key 64615D14 -s -b $${dist_file}


================================================
FILE: PKG-INFO
================================================
Metadata-Version: 1.0
Name: mastiff
Version: 0.8.0.ds0
Summary: MASTIFF is a static analysis automation framework.
Home-page: http://www.korelogic.com
Author: Tyler Hudak
Author-email: mastiff-project@korelogic.com
License: Apache License V2.0
Description: MASTIFF is a static analysis framework that automates the
        process of extracting key characteristics from a number of different file
        formats. To ensure the framework remains flexible and extensible, a
        community-driven set of plug-ins is used to perform file analysis and data
        extraction. While originally designed to support malware, intrusion, and
        forensic analysis, the framework is well-suited to support a broader range of
        analytic needs. In a nutshell, MASTIFF allows analysts to focus on analysis
        rather than figuring out how to parse files.
Platform: Linux


================================================
FILE: README
================================================

REVISION

  $Id: 17f09461545f9d0409f9480a417c3831ae34539d $

OVERVIEW

  MASTIFF is a static analysis framework that automates the process of
  extracting key characteristics from a number of different file
  formats.  To ensure the framework remains flexible and extensible, a
  community-driven set of plug-ins is used to perform file analysis
  and data extraction.  While originally designed to support malware,
  intrusion, and forensic analysis, the framework is well-suited to
  support a broader range of analytic needs.  In a nutshell, MASTIFF
  allows analysts to focus on analysis rather than figuring out how to
  parse files.

  The MASTIFF Project is hosted at:

    https://git.korelogic.com/mastiff.git/

DOCUMENTATION

  General documentation is located in the docs directory.  See the
  README.INSTALL file for instructions on how to build, test, and
  install the framework.

LICENSE

  The terms and conditions under which this software is released are
  set forth in README.LICENSE.


================================================
FILE: README.CREDITS
================================================

REVISION

  $Id: 02e5406c2bbd4202e46796589395a4611897b806 $

CREDITS

  Tyler Hudak (author, maintainer)
  Klayton Monroe (contributor, maintainer)

SPONSORS

  DARPA Cyber Fast Track Program (2012)
  KoreLogic (2012-present)


================================================
FILE: README.INSTALL
================================================

REVISION

  $Id: daec28262cb37c5a4952618675b33e234e48773d $

OVERVIEW

  MASTIFF is a static analysis framework that automates the process of
  extracting key characteristics from a number of different file
  formats.  To ensure the framework remains flexible and extensible, a
  community-driven set of plug-ins is used to perform file analysis
  and data extraction.  While originally designed to support malware,
  intrusion, and forensic analysis, the framework is well-suited to
  support a broader range of analytic needs.  In a nutshell, MASTIFF
  allows analysts to focus on analysis rather than figuring out how to
  parse files.

  The MASTIFF Project is hosted at:

    https://git.korelogic.com/mastiff.git/

TECHNICAL REQUIREMENTS

  The following software must be installed for MASTIFF to work
  properly.

    - Python 2.6.6 or greater
    - Yapsy 1.10 or greater (http://yapsy.sourceforge.net/)
    - Python sqlite3 (http://docs.python.org/library/sqlite3)
    - Python setuptools (http://pypi.python.org/pypi/setuptools/)
    - Yara, libyara and yara-python (http://code.google.com/p/yara-project)

  A Python libmagic library is also required. MASTIFF supports two different
  libmagic libraries:

    - libmagic Python extensions (ftp://ftp.astron.com/pub/file/)
        This may be installed through the source code above or is the library
        installed as python-magic in most Linux code repositories.

     - Python-magic (https://github.com/ahupp/python-magic/)
           This may be installed through the source code above or via Python
           pip.

PREREQUISITES INSTALLATION

  The Python setuptools and magic libraries will need to be installed
  on your own.  For Debian/Ubuntu-based distributions, this can be
  accomplished with:

    $ sudo aptitude install python-setuptools
    $ sudo aptitude install python-magic

  On Gentoo-based distributions, there is no Python magic package.
  However, adding the python USE flag to the sys-apps/file package
  will create the correct Python libraries.

  Setuptools can be installed as follows:

    $ sudo emerge -av setuptools

  Yapsy will automatically download and install when the make program
  is run, or you can download and install it on your own.  Yapsy is
  also located in the Gentoo Portage repository.

    $ sudo emerge -av yapsy

  Note that the plug-ins utilized by MASTIFF may have their own
  prerequisites.

TESTING

  MASTIFF comes with a test set suite that can be used to determine if all
  prerequisites have been properly installed and MASTIFF is able to analyze
  files correctly. To run these tests, run:

	$ make test

  Two sets of tests will run.

  - Python imports for all MASTIFF core files and plug-ins will be checked to
    ensure they can be imported. Any that cannot will be displayed.

  - MASTIFF will examine 4 different files to ensure there are no issues.

  All output will go into the tests/ directory.

INSTALLATION

  If you wish to only test out MASTIFF, skip to the Development
  Testing section.

  MASTIFF utilizes the Python setuptools code for installation of the
  package.  The easiest way to install the package is:

    $ sudo make install

  This will install the package into the appropriate Python
  site-packages directory for your system.  It will also install
  mas.py, the main MASTIFF wrapper script into /usr/local/bin.

  If you do not have Yapsy installed, it will attempt to download and
  install it for you.

  If you install using this method, the only way to uninstall is to
  manually delete files.

  After installing MASTIFF, modify the mastiff.conf configuration file
  to ensure the options for plug-ins are correctly set for your analysis
  system.

DEVELOPMENT TESTING

  If instead you wish to only test it for development purposes, run
  the following command:

    $ sudo make dev

  This will install placeholders into the Python dist-packages that
  point to this directory.  Any modifications made to the code will
  automatically be reflected when running the software.  Additionally,
  mas.py will be placed in /usr/local/bin.

  To uninstall the dev environment, run:

    $ sudo make dev-clean

  This will remove all placeholders as well as /usr/local/bin/mas.py.

PLUG-IN REQUIREMENTS

  At the current release, the plug-ins utilized by MASTIFF require a
  number of additional libraries or programs to be installed.

    - ssdeep (http://ssdeep.sourceforge.net/)
    - pydeep (https://github.com/kbandla/pydeep)
    - Yara, libyara and yara-python must be installed,
      (http://code.google.com/p/yara-project)
    - simplejson (https://github.com/simplejson/simplejson)
    - Didier Stevens pdf-parser.py
      (http://blog.didierstevens.com/programs/pdf-tools/)
    - Didier Stevens' pdfid.py (http://blog.didierstevens.com/programs/pdf-tools/)
    - exiftool (http://www.sno.phy.queensu.ca/~phil/exiftool/)
    - pefile library (http://code.google.com/p/pefile/)
      NOTE: Do NOT install pefile from the Debian/Ubuntu repository! Install
      from source!
    - disitool.py (http://blog.didierstevens.com/programs/disitool/)
    - openssl binary (http://www.openssl.org/)
    - Giuseppe 'Evilcry' Bonfa's pyOLEScanner.py
      (https://github.com/Evilcry/PythonScripts/raw/master/pyOLEScanner.zip)
    - distorm (http://code.google.com/p/distorm/)

  Some of these programs may be able to be installed from your
  distribution's software repository, and some may need to be
  installed from source.  After these programs have been installed,
  be sure to check the MASTIFF configuration file and update all
  configuration options to point to the correct locations.

RUNNING MASTIFF

  The best way to run MASTIFF is to use the mas.py program.  This
  script has been written to provide you with the maximum number of
  options for using MASTIFF.  This script will be installed to
  /usr/local/bin when you install the package.

  mas.py can be run by only giving it a file or directory to analyze as an
  argument.

    $ mas.py /path/to/file2analyze

  If MASTIFF is given a directory, it will enumerate all files within that
  directory, and every subdirectory, and analyze them.

  Although the only required argument is the filename or directory to be
  analyzed, the following table lists available options.

  -c CONFIG_FILE, --conf=CONFIG_FILE
                        Use an alternate config file. The default is
                        './mastiff.conf'.
  -h, --help            Show the help message and exit.
  -l PLUGIN_TYPE, --list=PLUGIN_TYPE
                        List all available plug-ins of the specified type and
                        exit. Type must be one of 'analysis' or 'cat'.
  -o OVERRIDE, --option=OVERRIDE
                        Override a config file option. Configuration options
                        should be specified as 'Section.Key=Value' and should
                        be quoted if any whitespace is present. Multiple
                        overrides can be specified by using multiple '-o'
                        options.
  -p PLUGIN_NAME, --plugin=PLUGIN_NAME
                        Only run the specified analysis plug-in. Name must be
                        quoted if it contains whitespace.
  -q, --quiet           Only log errors.
  -t FTYPE, --type=FTYPE
                        Force file to be analyzed with plug-ins from the
                        specified category (e.g., EXE, PDF, etc.). Run with
                        '-l cat' to list all available category plug-ins.
  -V, --verbose         Print verbose logs.
  -v, --version         Show program's version number and exit.

  Queue Options:
    --append-queue      Append file or directory to job queue and exit.
    --clear-queue       Clear job queue and exit.
    --ignore-queue      Ignore the job queue and just process file.
    --list-queue        List the contents of the job queue and exit.
    --resume-queue      Continue processing the queue.


================================================
FILE: README.LICENSE
================================================

REVISION

  $Id: f19abdb0df9b2aadb274fb66a8f813edb7f508a0 $

OVERVIEW

  This document contains licensing information for The MASTIFF
  Project, which was established by Tyler Hudak of KoreLogic, Inc.
  in 2012.  Unless specifically excluded, all files in this project
  fall under the terms and conditions of the Apache License,
  Version 2.0 as stated below.  Excluded files or components that
  fall under other licenses are detailed below as well.

THE APACHE LICENSE VERSION 2.0 (MASTIFF)

  Copyright 2012-2013 The MASTIFF Project

  All rights reserved.

  1. Definitions.

     "License" shall mean the terms and conditions for use,
     reproduction, and distribution as defined by Sections 1 through 9
     of this document.

     "Licensor" shall mean the copyright owner or entity authorized by
     the copyright owner that is granting the License.

     "Legal Entity" shall mean the union of the acting entity and all
     other entities that control, are controlled by, or are under
     common control with that entity.  For the purposes of this
     definition, "control" means (i) the power, direct or indirect, to
     cause the direction or management of such entity, whether by
     contract or otherwise, or (ii) ownership of fifty percent (50%)
     or more of the outstanding shares, or (iii) beneficial ownership
     of such entity.

     "You" (or "Your") shall mean an individual or Legal Entity
     exercising permissions granted by this License.

     "Source" form shall mean the preferred form for making
     modifications, including but not limited to software source code,
     documentation source, and configuration files.

     "Object" form shall mean any form resulting from mechanical
     transformation or translation of a Source form, including but not
     limited to compiled object code, generated documentation, and
     conversions to other media types.

     "Work" shall mean the work of authorship, whether in Source or
     Object form, made available under the License, as indicated by a
     copyright notice that is included in or attached to the work (an
     example is provided in the Appendix below).

     "Derivative Works" shall mean any work, whether in Source or
     Object form, that is based on (or derived from) the Work and for
     which the editorial revisions, annotations, elaborations, or
     other modifications represent, as a whole, an original work of
     authorship.  For the purposes of this License, Derivative Works
     shall not include works that remain separable from, or merely
     link (or bind by name) to the interfaces of, the Work and
     Derivative Works thereof.

     "Contribution" shall mean any work of authorship, including the
     original version of the Work and any modifications or additions
     to that Work or Derivative Works thereof, that is intentionally
     submitted to Licensor for inclusion in the Work by the copyright
     owner or by an individual or Legal Entity authorized to submit on
     behalf of the copyright owner.  For the purposes of this
     definition, "submitted" means any form of electronic, verbal, or
     written communication sent to the Licensor or its
     representatives, including but not limited to communication on
     electronic mailing lists, source code control systems, and issue
     tracking systems that are managed by, or on behalf of, the
     Licensor for the purpose of discussing and improving the Work,
     but excluding communication that is conspicuously marked or
     otherwise designated in writing by the copyright owner as "Not a
     Contribution."

     "Contributor" shall mean Licensor and any individual or Legal
     Entity on behalf of whom a Contribution has been received by
     Licensor and subsequently incorporated within the Work.

  2. Grant of Copyright License.  Subject to the terms and conditions
     of this License, each Contributor hereby grants to You a
     perpetual, worldwide, non-exclusive, no-charge, royalty-free,
     irrevocable copyright license to reproduce, prepare Derivative
     Works of, publicly display, publicly perform, sublicense, and
     distribute the Work and such Derivative Works in Source or Object
     form.

  3. Grant of Patent License.  Subject to the terms and conditions of
     this License, each Contributor hereby grants to You a perpetual,
     worldwide, non-exclusive, no-charge, royalty-free, irrevocable
     (except as stated in this section) patent license to make, have
     made, use, offer to sell, sell, import, and otherwise transfer
     the Work, where such license applies only to those patent claims
     licensable by such Contributor that are necessarily infringed by
     their Contribution(s) alone or by combination of their
     Contribution(s) with the Work to which such Contribution(s) was
     submitted.  If You institute patent litigation against any entity
     (including a cross-claim or counterclaim in a lawsuit) alleging
     that the Work or a Contribution incorporated within the Work
     constitutes direct or contributory patent infringement, then any
     patent licenses granted to You under this License for that Work
     shall terminate as of the date such litigation is filed.

  4. Redistribution.  You may reproduce and distribute copies of the
     Work or Derivative Works thereof in any medium, with or without
     modifications, and in Source or Object form, provided that You
     meet the following conditions:

     (a) You must give any other recipients of the Work or Derivative
         Works a copy of this License; and

     (b) You must cause any modified files to carry prominent notices
         stating that You changed the files; and

     (c) You must retain, in the Source form of any Derivative Works
         that You distribute, all copyright, patent, trademark, and
         attribution notices from the Source form of the Work,
         excluding those notices that do not pertain to any part of
         the Derivative Works; and

     (d) If the Work includes a "NOTICE" text file as part of its
         distribution, then any Derivative Works that You distribute
         must include a readable copy of the attribution notices
         contained within such NOTICE file, excluding those notices
         that do not pertain to any part of the Derivative Works, in
         at least one of the following places: within a NOTICE text
         file distributed as part of the Derivative Works; within the
         Source form or documentation, if provided along with the
         Derivative Works; or, within a display generated by the
         Derivative Works, if and wherever such third-party notices
         normally appear.  The contents of the NOTICE file are for
         informational purposes only and do not modify the License.
         You may add Your own attribution notices within Derivative
         Works that You distribute, alongside or as an addendum to the
         NOTICE text from the Work, provided that such additional
         attribution notices cannot be construed as modifying the
         License.

     You may add Your own copyright statement to Your modifications
     and may provide additional or different license terms and
     conditions for use, reproduction, or distribution of Your
     modifications, or for any such Derivative Works as a whole,
     provided Your use, reproduction, and distribution of the Work
     otherwise complies with the conditions stated in this License.

  5. Submission of Contributions.  Unless You explicitly state
     otherwise, any Contribution intentionally submitted for inclusion
     in the Work by You to the Licensor shall be under the terms and
     conditions of this License, without any additional terms or
     conditions.  Notwithstanding the above, nothing herein shall
     supersede or modify the terms of any separate license agreement
     you may have executed with Licensor regarding such Contributions.

  6. Trademarks.  This License does not grant permission to use the
     trade names, trademarks, service marks, or product names of the
     Licensor, except as required for reasonable and customary use in
     describing the origin of the Work and reproducing the content of
     the NOTICE file.

  7. Disclaimer of Warranty.  Unless required by applicable law or
     agreed to in writing, Licensor provides the Work (and each
     Contributor provides its Contributions) on an "AS IS" BASIS,
     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
     implied, including, without limitation, any warranties or
     conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or
     FITNESS FOR A PARTICULAR PURPOSE.  You are solely responsible for
     determining the appropriateness of using or redistributing the
     Work and assume any risks associated with Your exercise of
     permissions under this License.

  8. Limitation of Liability.  In no event and under no legal theory,
     whether in tort (including negligence), contract, or otherwise,
     unless required by applicable law (such as deliberate and grossly
     negligent acts) or agreed to in writing, shall any Contributor be
     liable to You for damages, including any direct, indirect,
     special, incidental, or consequential damages of any character
     arising as a result of this License or out of the use or
     inability to use the Work (including but not limited to damages
     for loss of goodwill, work stoppage, computer failure or
     malfunction, or any and all other commercial damages or losses),
     even if such Contributor has been advised of the possibility of
     such damages.

  9. Accepting Warranty or Additional Liability.  While redistributing
     the Work or Derivative Works thereof, You may choose to offer,
     and charge a fee for, acceptance of support, warranty, indemnity,
     or other liability obligations and/or rights consistent with this
     License.  However, in accepting such obligations, You may act
     only on Your own behalf and on Your sole responsibility, not on
     behalf of any other Contributor, and only if You agree to
     indemnify, defend, and hold each Contributor harmless for any
     liability incurred by, or claims asserted against, such
     Contributor by reason of your accepting any such warranty or
     additional liability.

THE NEW BSD LICENSE (WebJob)

  This project includes software developed for The WebJob Project,
  which is distributed under the following terms and conditions:

  Copyright 2006-2013 The WebJob Project

  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:

  1. Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer.

  2. Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in
     the documentation and/or other materials provided with the
     distribution.

  3. Neither the names of the copyright holders nor the names of any
     contributors may be used to endorse or promote products derived
     from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: README.PLUGINS
================================================

REVISION

  $Id: 9a263fb024741bc9fa6fafd3b146d260e9db4d26 $

SKELETON PLUG-INS

  The project's skeleton directory contains three types of skeleton
  plug-ins that can be used to start coding your own plug-ins for the
  framework.  Just choose the skeleton code for the type of plug-in
  you would like to develop, modify a few lines, and start coding.
  Note that these files are intended to serve as examples and helpful
  hints on how to get started, not as definitive ways to create
  plug-ins.

  The three types skeleton plug-ins are:

  - category-skel: A skeleton category plug-in to define a new
    file type.

  - analysis-skel: A skeleton analysis plug-in to define a new
    type of analysis.  This code is for a Generic plug-in, but can be
    easily modified for any file-type category.

  - analysis-ext-skel: A skeleton analysis plug-in to define a new
    type of analysis that calls an external program.  This type of
    plug-in is excellent for acting as a wrapper script around another
    program.


================================================
FILE: mas.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
MASTIFF - MAlicious Static Inspection File Framework

This program implements the code necessary to statically analyze files within
a plugin-based framework.

"""

__version__ = "$Id: e3288d64e94fb2c155552a6922e77e347081d77f $"

import sys
import logging
import os
import os.path
#import signal

if sys.version_info < (2, 6, 6):
    sys.stderr.write("Mastiff requires python version 2.6.6")
    sys.exit(1)

from optparse import OptionParser, OptionGroup
import mastiff.core as Mastiff
from mastiff import get_release_string
import mastiff.queue as queue

def add_to_queue(job_queue, fname):
    """ Add file and/or directory to job queue. """
    
    log = logging.getLogger('Mastiff.queue')
    # check to see if we are dealing with a directory or a file and handle correctly
    if os.path.isdir(fname) is True:
        # This is a directory - walk it and add all its files
        log.info('Adding directory %s to queue.' % fname)
        for root, _, files in os.walk(fname):
            for new_file in [ os.path.abspath(root + os.sep + f) for f in files]:
                log.debug('Adding %s to job queue.' % new_file )
                job_queue.append(new_file)
    elif os.path.isfile(fname) is True:
        # dealing with a file - just add it to the queue
        log.debug('Adding file %s to job queue.' % fname)
        job_queue.append(fname)
    else:
        log.error('Submission is neither file or directory. Exiting.')
        sys.exit(1)
        
def analyze_file(fname, opts, loglevel):
    """ Analyze a file with MASTIFF. """
    
    log = logging.getLogger('Mastiff.analyze')
    log.info("Starting analysis on %s", fname)
        
    my_analysis = Mastiff.Mastiff(opts.config_file, loglevel=loglevel, override=opts.override)
    if opts.ftype is not None:
        log.info('Forcing file type to include "%s"', opts.ftype)
        my_analysis.set_filetype(fname=fname, ftype=opts.ftype)
    
    my_analysis.analyze(fname,  opts.plugin_name)
    
def main():
    """Parse options and analyze file."""

    usage = "usage: %prog [options] FILE|DIRECTORY"
    parser = OptionParser(
                     add_help_option = False,
                     version = "%prog " + get_release_string(),
                     usage = usage)
    parser.remove_option("--version")
    
    parser.add_option(
                     "--conf",
                     "-c",
                      action = "store",
                      default = "./mastiff.conf",
                      dest = "config_file",
                      help = "Use an alternate config file. The default is './mastiff.conf'.",
                      type = "string")
    parser.add_option(
                      "--help",
                      "-h",
                      action = "help",
                      help = "Show the help message and exit.")
    parser.add_option(
                      "--list",
                      "-l",
                      action = "store",
                      dest = "list_plugins",
                      help = "List all available plug-ins of the specified type and exit. Type must be one of 'analysis', 'cat', or 'output'.",
                      metavar = "PLUGIN_TYPE")
    parser.add_option(
                      "--option",
                      "-o",
                      action="append",
                      default = None,
                      dest = "override",
                      help = "Override a config file option. Configuration options should be specified as 'Section.Key=Value' and should be quoted if any whitespace is present. Multiple overrides can be specified by using multiple '-o' options.")
    parser.add_option(
                      "--plugin",
                      "-p",
                      action = "store",
                      default = None,
                      dest = "plugin_name",
                      help = "Only run the specified analysis plug-in. Name must be quoted if it contains whitespace.")
    parser.add_option(
                      "--quiet",
                      "-q",
                      action = "store_true",
                      default = False,
                      dest = "quiet",
                      help = "Only log errors.")
    parser.add_option(
                      "--type",
                      "-t",
                      action = "store",
                      default = None,
                      dest = "ftype",
                      help = "Force file to be analyzed with plug-ins from the specified category (e.g., EXE, PDF, etc.). Run with '-l cat' to list all available category plug-ins.",
                      type = "string")
    parser.add_option(
                      "--verbose",
                      "-V",
                      action = "store_true",
                      dest = "verbose",
                      default = False,
                      help = "Print verbose logs.")
    parser.add_option(
                      "--version",
                      "-v",
                      action = "version",
                      help = "Show program's version number and exit.")
    
    queue_group = OptionGroup(parser, "Queue Options")
    queue_group.add_option(
                      "--append-queue",
                      "",
                      action = "store_true",
                      dest = "append_queue",
                      default = False,
                      help = "Append file or directory to job queue and exit.")
    queue_group.add_option(
                      "--clear-queue",
                      "",
                      action = "store_true",
                      dest = "clear_queue",
                      default = False,
                      help = "Clear job queue and exit.")
    queue_group.add_option(
                      "--ignore-queue",
                      "",
                      action = "store_true",
                      dest = "ignore_queue",
                      default = False,
                      help = "Ignore the job queue and just process file.")   
    queue_group.add_option(
                      "--list-queue",
                      "",
                      action = "store_true",
                      dest = "list_queue",
                      default = False,
                      help = "List the contents of the job queue and exit.")
    queue_group.add_option(
                      "--resume-queue",
                      action = "store_true",
                      default = False,
                      dest = "resume_queue",
                      help = "Continue processing the queue.")
    parser.add_option_group(queue_group)

    (opts, args) = parser.parse_args()

    if (args is None or len(args) < 1) and opts.list_plugins is None \
    and opts.clear_queue is False and opts.resume_queue is False \
    and opts.list_queue is False:
        parser.print_help()
        sys.exit(1)

    if opts.verbose == True:
        loglevel = logging.DEBUG
    elif opts.quiet == True:
        loglevel = logging.ERROR
    else:
        loglevel = logging.INFO
        
    format_ = '[%(asctime)s] [%(levelname)s] [%(name)s] : %(message)s'        
    logging.basicConfig(format=format_)
    log = logging.getLogger("Mastiff")
    log.setLevel(loglevel)

    # check to see if we are running as root
    if os.geteuid() == 0:
        log.warning('You are running MASTIFF as ROOT! This may be DANGEROUS!')

    if opts.list_plugins is not None:
        plugs = Mastiff.Mastiff(opts.config_file)
        plugs.list_plugins(opts.list_plugins)
        sys.exit(0)

    # set up job queue
    job_queue = queue.MastiffQueue(opts.config_file)
    
    # process job queue specific options
    if opts.clear_queue is True:
        log.info('Clearing job queue and exiting.')
        job_queue.clear_queue()
        sys.exit(0)
    elif opts.list_queue is True:
        if len(job_queue) == 0:
            log.info("MASTIFF job queue is empty.")
        else:
            log.info("MASTIFF job queue has %d entries." % len(job_queue))
            print "\nFile Name\n---------\n%s" % (job_queue)            
        sys.exit(0)
        
    if len(args) > 0:
        fname = args[0]
    else:
        fname = None
        
    if opts.ignore_queue is True:        
        log.info('Ignoring job queue.')
        analyze_file(fname,  opts,  loglevel)
        sys.exit(0)

    # add file or directory to queue
    if fname is not None:
        add_to_queue(job_queue, fname)
        if opts.append_queue is True:
            sys.exit(0)    

    # Start analysis on the files in the queue until it is empty
    while len(job_queue) > 0:
        fname = job_queue.popleft()
        analyze_file(fname, opts, loglevel)        
        log.info('There are %d jobs in the queue.' % len(job_queue))

if __name__ == '__main__':

    main()


================================================
FILE: mastiff/__init__.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
MASTIFF - MAlicious Static Inspection File Framework

This program implements the code necessary to statically analyze files within
a plugin-based framework.

"""

"""
This file contains package-level variables and functions.
"""

__version__ = "$Id: b55ca3df0a5fa81dea4ab70cfcb713e0759c973b $"

version = 0x00800000

def get_release_number():
    """ Gets the current release version. """
    return version

def get_release_string():
    """Return the current release version."""
    major = (version >> 28) & 0x0f
    minor = (version >> 20) & 0xff
    patch = (version >> 12) & 0xff
    state = (version >> 10) & 0x03
    build = version & 0x03ff
    if state == 0:
        state_string = "ds"
    elif state == 1:
        state_string = "rc"
    elif state == 2:
        state_string = "sr"
    elif state == 3:
        state_string = "xs"
    if state == 2 and build == 0:
        return '%d.%d.%d' % (major, minor, patch)
    else:
        return '%d.%d.%d.%s%d' % (major, minor, patch, state_string, build)


================================================
FILE: mastiff/conf.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Functions to parse and maintain the Mastiff config file.

The Conf class is used to parse and maintain the Mastiff config file.

_init__(self, config_file=None, override=None): Initializes the config file and
sets up any overridden options.

get_var(section, var): Return a variable from a specified section.

get_bvar(section, var): Return a boolean variable from a specified section.

set_var(section, var, value): Set a variable in a specified section with a
                              given value.

get_section(section): Return a dictionary of items within the section.

list_config(): Prints all configuration variables read in.

dump_config(): Dump a copy of the config into the Mastiff log dir.

override_option(): Override an option from the config file.

"""

__version__ = "$Id: daa2ace9c5481298f0650b96fe31bb786bbc3c8e $"

import os
import sys
import logging
import ConfigParser

class Conf:
    """Parse and maintain the Mastiff configuration."""

    def __init__(self, config_file=None, override=None):
        """Initialize the class parameters."""

        log = logging.getLogger('Mastiff.Conf')

        self.config_file = os.path.abspath(config_file)

        self.config = ConfigParser.ConfigParser()
        self.set_defaults()

        # read from the default file locations and the file given
        # file given will be read last and will over-write any
        # previously read-in config files
        files_read = self.config.read(['/etc/mastiff/mastiff.conf',
                                        os.path.expanduser('~/.mastiff.conf'),
                                        config_file])

        if not files_read:
            log.error("Could not read any configuration files. Exiting.")
            sys.exit(1)
        else:
            if self.config.getboolean('Misc', 'verbose') == True:
                log.setLevel(logging.DEBUG)
                log.debug("Read config from %s", str(files_read))

        if override is not None:
            for opt in override:
                self.override_option(opt)

    def set_defaults(self):
        """
           Set default variables.
           If set later in a config file, these will be overwritten.
           Note: This is being done instead of a default config file to
           reduce the number of files needed.
        """
        self.config.add_section('Dir')
        self.set_var('Dir', 'log_dir', '/var/log/mastiff')
        #self.set_var('Dir', 'plugin_dir', '/usr/local/mastiff/plugins')
        self.config.add_section('Misc')
        self.set_var('Misc', 'verbose', 'off')

    def get_var(self, section, var):
        """Return a specified variable."""
        try:
            return self.config.get(section, var)
        except (ConfigParser.NoOptionError, ConfigParser.NoSectionError):
            log = logging.getLogger('Mastiff.Conf.GetVar')
            log.error('Could not find "%s": "%s"', section, var)
            return None

    def get_bvar(self, section, var):
        """Return a boolean variable."""
        try:
            return self.config.getboolean(section, var)
        except (ConfigParser.NoOptionError, ConfigParser.NoSectionError):
            log = logging.getLogger('Mastiff.Conf.GetVar')
            log.error('Could not find "%s": "%s"', section, var)
            return False

    def get_section(self, section):
        """Return a dictionary of items within a section."""
        try:
            options = self.config.items(section)
        except ConfigParser.NoSectionError:
            log = logging.getLogger('Mastiff.Conf.GetSection')
            log.error('Could not get section "%s".', section)
            return None

        opt_dict = dict()

        for pairs in options:
            opt_dict[pairs[0]] = pairs[1]

        return opt_dict

    def set_var(self, section, var, value):
        """Set a given variable with a specified value."""
        try:
            return self.config.set(section, var, value)
        except ConfigParser.NoSectionError:
            log = logging.getLogger('Mastiff.Conf.SetVar')
            log.error('Could not find "%s": "%s"', section, var)
            return None

    def override_option(self, override):
        """
           Override an option from the config file. Note that if the option
           does not exist, it will be added.
        """
        log = logging.getLogger('Mastiff.Conf.override')
        options = override.split('=')
        section = options[0].split('.')

        if len(options) != 2 or len(section) != 2:
            log.error('Invalid override option: %s' % override)
            return False

        log.info('Overriding option: %s.%s=%s' % (section[0], section[1], options[1]))
        if self.set_var(section[0], section[1], options[1]) is None:
            return False

    def list_config(self):
        """Print all variables read in."""
        print "Configuration Options:"
        for section in self.config.sections():
            print "%s" % (section)
            for (name, value) in self.config.items(section):
                print "\t%s:\t%s" % (name, value)
        return

    def dump_config(self):
        """ Dump a copy of the config into the Mastiff log dir. """

        log = logging.getLogger('Mastiff.Conf.Dump')
        out_dir = self.get_var('Dir', 'log_dir')

        try:
            with open(out_dir + os.sep + 'mastiff-run.config', 'w') as dump_file:
                self.config.write(dump_file)
        except ConfigParser.Error,  err:
            log.error('Unable to dump config file: %s', err)


================================================
FILE: mastiff/core.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
MASTIFF - MAlicious Static Inspection File Framework

This module implements the primary class for static analysis inspection.

Mastiff member variables:

cat_paths: List that contains the path to the category plug-ins.

plugin_paths: List that contains the paths to the analysis plug-ins.

filetype: Dictionary used to store the output from the file-type identification
functions.

file_name: full path to the file being analyzed.

hashes: Tuple of the MD5, SHA1 and SHA256 hashes of the file being analyzed.
This is also stored in the configuration file.

db: Sqlite3 Connection class to the database file.

cat_list: List that contains all of the category plug-ins to be used during
analysis.

activated_plugins: List that contains all of the plug-ins that have been
activated. This order of the plug-ins in this list is the order they will run.

cat_manager: Yapsy PluginManager class that manages the category plug-ins.

plugin_manager: Yapsy PluginManager class that manages the analysis plug-ins.

Mastiff member functions:

__init__(self, config_file=None, fname=None, loglevel=logging.INFO, override=None)
The initialization function of the class. This function will initialize all of the
member variables, set up logging, read in and store the configuration file, and
find and load all plug-ins.

init_file(self, fname)
This function validates the filename being analyzed
to ensure it exists and can be accessed, sets up the directory that all
output will be logged into, and adds initial file information into the
database.

set_filetype(self, fname=None, ftype=None)
Calls the file-type identification helper functions in mastiff/filetype.py,
and loops through all of the category plug-ins to determine which ones will
analyze the file.

validate(self, name, plugin)
Validates an analysis plug-in to ensure that it contains the correct functions.

activate_plugins(self, single_plugin=None)
Loops through all analysis plug-ins for category classes relevant to the file
type being examined and ensures they are valid. If validated, the analysis
plug-in is activated. This function also ensures that any pre-requisite plug-ins
have been activated.

analyze(self, fname=None, single_plugin=None)
Ensures the file type of the file is set up and loops through all activated
analysis plug-ins and calls their analyze() function.

list_plugins(self, type='analysis')
Helper function that loops through all available plug-ins and prints out their
name, path and description. The function can print out analysis or category
plug-in information.
"""

__version__ = "$Id: ace95027e1cc1f56614eaa0fc86d67b5c4aed8bb $"

import sys
import os
import logging
import hashlib
from shutil import copyfile
from operator import attrgetter

import simplejson

if sys.version_info < (2, 6, 6):
    sys.stderr.write("Mastiff requires python version 2.6.6")
    sys.exit(1)

try:
    from yapsy.PluginManager import PluginManager
except ImportError, err:
    print "Yapsy not installed or accessible: %s" % err
    sys.exit(1)

import mastiff.conf as Conf
import mastiff.filetype as FileType
import mastiff.sqlite as DB
import mastiff.plugins.category.categories as Cats
import mastiff.plugins.analysis as analysis
import mastiff.plugins.output as masOutput

class Mastiff:
    """Primary class for the static analysis inspection framework."""

    def __init__(self, config_file=None, fname=None, loglevel=logging.INFO, override=None):
        """Initialize variables."""

        # configure logging for Mastiff module
        format_ = '[%(asctime)s] [%(levelname)s] [%(name)s] : %(message)s'
        logging.basicConfig(format=format_)
        log = logging.getLogger("Mastiff")
        log.setLevel(loglevel)
        if log.handlers:
            log.handlers = []

        # read in config file
        self.config = Conf.Conf(config_file, override=override)

        # make sure base logging dir exists
        log_dir = self.config.get_var('Dir','log_dir')
        log_dir = os.path.abspath(os.path.expanduser(log_dir))
        if not os.path.isdir(log_dir):
            try:
                os.makedirs(log_dir)
            except OSError, err:
                log.error('Could not make %s: %s. Exiting.', log_dir, err)
                sys.exit(1)
        self.config.set_var('Dir', 'base_dir', log_dir)

        # set up file to log output to
        fh = logging.FileHandler(log_dir + os.sep + 'mastiff.log' )
        fh.setFormatter(logging.Formatter(format_))
        log.addHandler(fh)
        fh.setLevel(loglevel)

        # verbose logging set in the config and not command line?
        if self.config.get_bvar('Misc','verbose') == True and \
           loglevel != logging.ERROR:
            log.setLevel(logging.DEBUG)
            fh.setLevel(logging.DEBUG)

        # get path to category plugins
        self.cat_paths = [ os.path.dirname(Cats.__file__) ]
        self.output_paths = [ os.path.dirname(masOutput.__file__) ]

        # convert plugin paths to list
        self.plugin_paths = [ os.path.dirname(analysis.__file__)]
        # strip whitespace from dirs
        for tmp in str(self.config.get_var('Dir','plugin_dir')).split(','):
            if tmp:
                self.plugin_paths.append(os.path.expanduser(tmp.lstrip().rstrip()))
                
        # do the same for output plugins
        for tmp in str(self.config.get_var('Dir','output_plugin_dir')).split(','):
            if tmp:
                self.output_paths.append(os.path.expanduser(tmp.lstrip().rstrip()))

        self.filetype = dict()
        self.file_name = None
        self.hashes = None
        self.cat_list = list()
        self.activated_plugins = list()

        # Build the managers
        self.cat_manager = PluginManager()
        self.plugin_manager = PluginManager()
        self.output_manager = PluginManager()

        # Find and load all category plugins
        cat_filter = dict()
        self.cat_manager.setPluginPlaces(self.cat_paths)
        self.cat_manager.collectPlugins()

        # Import all of the modules for the categories so we can access
        # their classes.
        for pluginInfo in self.cat_manager.getAllPlugins():

            log.debug('Found category: %s', pluginInfo.name)
            try:
                mod_name = "mastiff.plugins.category.%s" % \
                           os.path.basename(pluginInfo.path)
                cat_mod = __import__(mod_name,
                                   fromlist=["mastiff.plugins.category"])
            except ImportError, err:
                log.error("Unable to import category %s: %s",
                          pluginInfo.name,
                          err)
                self.cat_manager.deactivatePluginByName(pluginInfo.name)
                continue
            else:
                # We were able to import it, activate it
                self.cat_manager.activatePluginByName(pluginInfo.name)
                log.debug("Activated category: %s", pluginInfo.name)

            # Cat is imported, add class to the category filter
            # cat_filter will be a dict in the form:
            #     { cat_name: cat_class }
            # and contains all the category plugins that have been activated
            cat_class = getattr(cat_mod,
                                pluginInfo.plugin_object.__class__.__name__)
            cat_filter.update({pluginInfo.plugin_object.cat_name: cat_class})

        #log.debug("Category Filters: %s", cat_filter)

        # Now collect and load all analysis plugins
        self.plugin_manager.setPluginPlaces(self.plugin_paths)
        self.plugin_manager.setCategoriesFilter( cat_filter )
        self.plugin_manager.collectPlugins()

        # Finally collect all output plugins
        self.output_manager.setPluginPlaces(self.output_paths)
        self.output_manager.collectPlugins()

        # set up database
        self.db = DB.open_db_conf(self.config)
        DB.create_mastiff_tables(self.db)

        # set up the output object
        self.output = dict()

        # init the filename if we have it
        if fname is not None:
            self.init_file(fname)

    def __del__(self):
        """
           Class destructor.
        """
        # Close down all logging file handles so we don't have any open file descriptors
        log = logging.getLogger("Mastiff")
        handles = list(log.handlers)
        for file_handle in handles:
            log.removeHandler(file_handle)
            file_handle.close()

    def init_file(self, fname):
        """
           Validate the filename to ensure it can be accessed and set
           up class variables.

           This function is called when a filename is given or can be
           called directly.
        """
        log = logging.getLogger("Mastiff.Init_File")

        if fname is None:
            return None

        try:
            with open(fname, 'rb') as my_file:
                data = my_file.read()
        except IOError, err:
            log.error("Could not open file: %s", err)
            return None

        self.file_name = fname

        # create tuple of md5, sha1 and sha256 hashes
        self.hashes = hashlib.md5(data).hexdigest(), \
                      hashlib.sha1(data).hexdigest(), \
                      hashlib.sha256(data).hexdigest()
        self.config.set_var('Misc', 'hashes', self.hashes)

        self.output[self.hashes] = dict()

        # update log_dir
        log_dir = os.path.abspath(os.path.expanduser(self.config.get_var('Dir','log_dir'))) + \
                  os.sep + \
                  self.hashes[0]
        self.config.set_var('Dir', 'log_dir', log_dir)

        # create log dir
        if not os.path.exists(log_dir):
            try:
                os.makedirs(log_dir)
            except OSError, err:
                log.error('Could not make %s: %s. Exiting.', log_dir, err)
                sys.exit(1)

        # lets set up the individual log file
        # we may miss out on a couple prior logs, but thats OK
        log = logging.getLogger('Mastiff')
        fh = logging.FileHandler(log_dir + os.sep + 'mastiff.log' )
        format_ = '[%(asctime)s] [%(levelname)s] [%(name)s] : %(message)s'
        fh.setFormatter(logging.Formatter(format_))
        log.addHandler(fh)
        fh.setLevel(logging.INFO)

        log = logging.getLogger("Mastiff.Init_File")
        log.info('Analyzing %s.', self.file_name)
        log.info("Log Directory: %s", log_dir)

        # copy file to the log directory
        if self.config.get_bvar('Misc', 'copy') is True:
            try:
                copyfile(self.file_name, log_dir + os.sep + os.path.basename(self.file_name) + '.VIR')
            except IOError, err:
                log.error('Unable to copy file: %s', err)
            log.debug('Copied file to log directory.')
        else:
            log.debug('Configuration set to not copy file.')

        # add entry to database if it exists
        if self.db is not None:
            log.debug('Adding entry to database.')
            DB.insert_mastiff_item(self.db, self.hashes)

        return self.hashes

    def activate_plugins(self, single_plugin=None):
        """
           Activate all plugins that are in the categories we selected.
           If single_plugin is given, only activate that plug-in.
           Note: File Information plug-in is ALWAYS run.
        """

        has_prereq = list()

        for cats in self.cat_list:

            log = logging.getLogger('Mastiff.Plugins.Activate')
            log.debug('Activating plugins for category %s.', cats)

            self.output[self.hashes][cats] = dict()

            for plugin in self.plugin_manager.getPluginsOfCategory(cats):

                # check if we are running a single plugin - file information always gets run
                if single_plugin is not None and single_plugin != plugin.name and plugin.name != 'File Information':
                    continue

                plugin.plugin_object.set_name(plugin.name)
                log.debug('Validating plugin "%s"', plugin.name)

                # if the plugin validates, try to activate it
                if self.validate(plugin.name, plugin.plugin_object) == True:
                    if plugin.plugin_object.prereq is not None:
                        # this plugin has a pre-req, can't activate yet
                        has_prereq.append([cats, plugin])
                    else:
                        log.debug('Activating "%s".', plugin.name)
                        self.plugin_manager.activatePluginByName(plugin.name, cats)
                        self.activated_plugins.append(plugin)
                else:
                    log.debug("Removing plugin %s %s.", plugin.name, cats)
                    self.plugin_manager.deactivatePluginByName(plugin.name,
                                                               cats)

        # now try to activate any plug-ins that have pre-reqs
        flag = True
        while flag is True:
            flag = False
            for plugins in has_prereq:
                # check to see if the pre-req in in the activated list
                inact = [p for p in self.activated_plugins if p.name == plugins[1].plugin_object.prereq]

                if len(inact) > 0:
                    # our pre-req has been activated, we can activate ourself
                    log.debug('Activating "%s". Pre-req fulfilled.', plugins[1].name)
                    self.plugin_manager.activatePluginByName(plugins[1].name, plugins[0])
                    self.activated_plugins.append(plugins[1])
                    has_prereq.remove(plugins)
                    flag = True

        # list out any plugins that were not activated due to missing pre-reqs
        for plugins in has_prereq:
            log.debug("Plugin %s not activated due to missing pre-req \"%s.\"" % \
                      (plugins[1].name, plugins[1].plugin_object.prereq ))

        # finally activate the output plugins
        for plugin in self.output_manager.getAllPlugins():
            plugin.plugin_object.set_name(plugin.name)
            log.debug('Activating Output Plug-in "{}"'.format(plugin.name))
            self.output_manager.activatePluginByName(plugin.name)
            #self.activated_plugins.append(plugin)


    def list_plugins(self, ctype='analysis'):
        """Print out a list of analysis or cat plugins."""

        if ctype == 'analysis':
            # analysis plug-ins
            print "Analysis Plug-in list:\n"
            print "%-25s\t%-15s\t%-25s\n%-50s" % \
                  ("Name", "Category", "Description", "Path")
            print '-' * 80

            for plugin in sorted(self.plugin_manager.getAllPlugins(),
                                  key=attrgetter('plugin_object.cat_name', 'name')):
                print "%-25s\t%-15s\t%-12s\n%-80s\n" % \
                (plugin.name, plugin.plugin_object.cat_name, \
                 plugin.description, plugin.path)

        elif ctype == 'cat':
            print "Category Plug-in list:\n"
            print "%-25s\t%-15s\t%-s" % ("Name", "FType", "Description")
            print '-' * 80
            # category plug-ins
            for plugin in sorted(self.cat_manager.getAllPlugins(),
                                 key=attrgetter('name')):
                print "%-25s\t%-15s\t%-s" % \
                      (plugin.name, plugin.plugin_object.cat_name,
                       plugin.description)
        elif ctype == 'output':
            print "Output Plug-in list:\n"
            print "%-25s\t%-s\n%s" % ("Name", "Description", "Path")
            print '-' * 80
            # category plug-ins
            for plugin in sorted(self.output_manager.getAllPlugins(),
                                 key=attrgetter('name')):
                print "%-25s\t%-s\n%-80s\n" % \
                      (plugin.name, plugin.description, plugin.path)
        else:
            print "Unknown plugin type."

    def set_filetype(self, fname=None, ftype=None):
        """
        Calls the filetype functions and loops through the category
        plug-ins to see which ones will handle this file.
        """

        log = logging.getLogger('Mastiff.FileType')

        if fname is None and self.file_name is None:
            log.error("No file to analyze has been specified. Exiting.")
            sys.exit(1)
        elif fname is not None and self.file_name is None:
            if self.init_file(fname) is None:
                log.error("ERROR accessing file. Exiting.")
                sys.exit(1)

        if self.cat_list:
            # if self.cat_list is already set, assume that we've already
            # gone through this function
            return self.filetype

        if ftype is not None:
            # we are forcing a file type to run
            log.info('Forcing category plug-in "%s" to be added.', ftype)
            self.cat_list.append(ftype)

        # Grab the magic file type of the file. This is done here so as not
        # to do it in every category plug-in.
        self.filetype['magic'] = FileType.get_magic(self.file_name)

        # Grab the TrID type
        trid_opts = self.config.get_section('File ID')
        self.filetype['trid'] = list()
        if trid_opts['trid']:
            self.filetype['trid'] = FileType.get_trid(self.file_name,
                                                  trid_opts['trid'],
                                                  trid_opts['trid_db'])

        # Cycle through all of the categories and see if they should be added
        # to the list of categories to be run.
        for pluginInfo in self.cat_manager.getAllPlugins():
            cat_name = pluginInfo.plugin_object.is_my_filetype(self.filetype,
                                                               self.file_name)
            log.debug('Checking cat %s for filetype.', pluginInfo.name)
            if cat_name is not None:
                # cat_list contains analysis plugin categories to be used
                self.cat_list.append(cat_name)
                log.debug('Adding %s to plugin selection list.', cat_name)

        # add file type to the DB
        if self.db is not None:
            DB.insert_mastiff_item(self.db, self.hashes, self.cat_list)

        return self.filetype

    def validate(self, name, plugin):
        """Return false if a plugin does not have the correct functions."""

        log = logging.getLogger('Mastiff.Plugins.Validate')

        try:
            callable(plugin.activate)
        except AttributeError:
            log.error("%s missing activate function.", name)
            return False

        try:
            callable(plugin.deactivate)
        except AttributeError:
            log.error("%s missing deactivate function.", name)
            return False

        try:
            callable(plugin.analyze)
        except AttributeError:
            log.error("%s missing analyze function.", name)
            return False
            
        return True

    def analyze(self, fname=None, single_plugin=None):
        """Perform analysis on a given filename."""

        log = logging.getLogger('Mastiff.Analysis')

        if fname is None and self.file_name is None:
            log.error("No filename specified. Exiting.")
            sys.exit(1)
        elif fname is not None and self.file_name is None:
            # first time seeing the file, initialize it
            if self.init_file(fname) is None:
                log.error("ERROR accessing file. Exiting.")
                return False

        # set the file_type
        ftype = self.set_filetype()
        log.info('File categories are %s.', self.cat_list)

        if not self.filetype:
            log.error("The file type has not been set. Exiting.")
            sys.exit(1)

        # activate the plugins
        self.activate_plugins(single_plugin)

        for plugin in self.activated_plugins:
            # skip if plugin is not activated
            if plugin.is_activated == False:
                continue

            log.debug('Calling plugin "%s".', plugin.name)

            # set the output results to be an attribute of the plugin so it can analyze it
            setattr(plugin.plugin_object, 'results', self.output[self.hashes])
            
            # analyze the plugin - if plugin is compliant with universal output
            # its output will be returned
            plug_out = plugin.plugin_object.analyze(self.config, self.file_name)

            if plug_out is not False and plug_out is not None and isinstance(plug_out, masOutput.page):
                # add the plugin output to its own entry
                self.output[self.hashes][plugin.plugin_object.cat_name][plugin.plugin_object.name] = plug_out
        
        # go through output plugins and output the data
        for plugin in self.output_manager.getAllPlugins():
            plugin.plugin_object.output(self.config, self.output)

        self.config.dump_config()
        
        log.info('Finished analysis for %s.', self.file_name)

# end class mastiff


================================================
FILE: mastiff/filetype.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
File Type Analysis Functions

The functions within this module provide the functionality to help determine
the type of file given to it.

This module now supports the use of two different type of libmagic Python libraries:
- The libmagic Python library maintained with file (ftp://ftp.astron.com/pub/file/).
  This is the version installed via most Debian-based repositories.
- ahupp's python-magic repostitory installed via pip.
  (https://github.com/ahupp/python-magic)

"""

__version__ = "$Id: 82df116d3435226d15057b63acbed2b77919a52d $"

import magic
import logging
import subprocess
import re
import os

try:
    import yara
except ImportError, error:
    print "Could not import yara: %s" % error

def get_magic(file_name):
    """ Determine the file type of a given file based on its magic result."""

    log = logging.getLogger('Mastiff.FileType.Magic')
    
    try:
        # try to use magic from the file source code
        magic_ = magic.open(magic.MAGIC_NONE)
        magic_.load()
        try:
            file_type = magic_.file(file_name)
        except:
            log.error('Could not determine magic file type.')
            return None
        magic_.close()
    except AttributeError:
        # Now we are trying ahupps magic library
        try:
            file_type = magic.from_file(file_name)
        except AttributeError:
            log.error('No valid magic libraries installed.')
            return None
        except MagicException:
            log.error('Cound not determing magic file type.')
            return None        

    log.debug('Magic file type is "%s"', file_type)

    return file_type

def get_trid(file_name, trid, trid_db):
    """ DEPRECATING: RECOMMENDED NOT TO USE
        TrID is a file identification tool created by Marco Pontello.
        Unfortunately, TrID does not have a Linux library we can use, so we
        will run the program and store its results.

        file_name: file to analyze
        trid = path to trid binary
        trid_db = path to trid database

        Returns a list of the hits from TrID. Each item of the returned list
        will contain a list with [ percentage, description ]
    """

    log = logging.getLogger('Mastiff.FileType.TrID')
    pattern = '^\s*([0-9\.]+)\% \([\w\.]+\) ([\S\s]+) \([0-9\/]+\)$'
    results = list()

    # if files don't exist, return empty list
    if not os.path.isfile(trid) or not os.path.isfile(trid_db):
        log.warning('TrID cannot be found. Skipping TrID file type detection.')
        return results

    trid_db = '-d:' + trid_db

    # TrID has a bug in it where it can't open a file it it begins with "./"
    # remove that
    if file_name.startswith('./'):
        file_name = file_name[2:]

    try:
        run = subprocess.Popen([trid] + [trid_db] + [file_name],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)
    except subprocess.CalledProcessError, err:
        log.error('Could not run TrID: %s', err)
        return results
    except OSError,  err:
        log.error('Could not run TrID: %s',  err)
        return results    

    (output, error) = run.communicate()
    if error is not None and len(error) > 0:
        log.error('Error running TrID: %s' % error)
        return results
            
    data = [ re.match(pattern, line) for line in output.split('\n') ]

    # create a list of hits
    # each item in results will be [ percentage, description ]
    results = [ [float(match.group(1)), match.group(2)] \
                for match in data \
                if match is not None ]

    log.debug('TrID types are: %s', results)

    return results

def yara_typecheck(filename, yara_rule):
    """ Check for file type based on yara rule.
         Returns True if found, False otherwise.
    """
    log = logging.getLogger('Mastiff.FileType.Yara')
    
    if yara_rule is None:
        return False
        
    try:
        rules = yara.compile(source=yara_rule)
    except yara.SyntaxError, err:
        log.error('Rule Error: %s', error)
        return False
    except:
        log.error("Error attempting to perform Yara filetype.")
        return False
            
    try:
        matches = rules.match(filename, timeout=10)        
    except yara.Error, err:
        log.error('Yara error: %s', err)
        return False 
        
    if len(matches) > 0:
        log.debug('File Type matches rule %s', matches[0].rule)
        return True
        
    return False

if __name__ == '__main__':
    import sys

    if len(sys.argv) > 1:
        print get_magic(sys.argv[1])


================================================
FILE: mastiff/plugins/__init__.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
   This file contains a number of helper functions for misc. tasks
   the plug-ins may want to use.
"""

__version__ = "$Id: 3fc4dad80994edc30d0dfd81ecadcca67bb486a9 $"

import httplib, mimetypes
import binascii

"""
    The following are taken from
    http://code.activestate.com/recipes/146306/
    and are used to allow the uploading of files to multipart forms.
"""
def post_multipart(host, method, selector, fields, files):
    """
    Post fields and files to an http host as multipart/form-data.
    fields is a sequence of (name, value) elements for regular form fields.
    files is a sequence of (name, filename, value) elements for data to be uploaded as files
    Return the server's response page.
    """
    content_type, body = encode_multipart_formdata(fields, files)
    if method.startswith('https') is True:
        h = httplib.HTTPSConnection(host)
    else:
        h = httplib.HTTP(host)
        
    h.putrequest('POST', selector)
    h.putheader("User-Agent", 'MASTIFF Statis Analysis Framework')
    h.putheader('Content-Type', content_type)
    h.putheader('Content-Length', str(len(body)))
    h.endheaders()
    h.send(body)
    myresponse = h.getresponse().read()
    return myresponse

def encode_multipart_formdata(fields, files):
    """
    fields is a sequence of (name, value) elements for regular form fields.
    files is a sequence of (name, filename, value) elements for data to be uploaded as files
    Return (content_type, body) ready for httplib.HTTP instance
    """
    BOUNDARY = '----------MASTIFF_STATIC_ANALYSIS_FRAMEWORK$'
    CRLF = '\r\n'
    L = []
    for (key, value) in fields:
        L.append('--' + BOUNDARY)
        L.append('Content-Disposition: form-data; name="%s"' % key)
        L.append('')
        L.append(value)
    for (key, filename, value) in files:
        L.append('--' + BOUNDARY)
        L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
        L.append('Content-Type: %s' % get_content_type(filename))
        L.append('')
        L.append(value)
    L.append('--' + BOUNDARY + '--')
    L.append('')
    body = CRLF.join(L)
    content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
    return content_type, body

def get_content_type(filename):
    """ Returns MIME type for the file. """
    return mimetypes.guess_type(filename)[0] or 'application/octet-stream'

def bin2hex(data):
    """
        Goes through data and turns any binary characters into its hex
        equivalent.
    """

    hexstring = ''
    for letter in data:
        if ord(letter) <= 31 or ord(letter) >= 127:
            hexstring += '\\x' + binascii.hexlify(letter)
        else:
            hexstring += letter

    return hexstring

def printable_str(string):
    """ Helper function to convert non-printable chars to its ASCII format """

    new_str = ''
    for char in string:
        if ord(char) >= 32 and ord(char) <= 126:
            new_str = new_str + char
        else:
            new_str = new_str + (r'\x%02x' % ord(char))

    return new_str


================================================
FILE: mastiff/plugins/analysis/EXE/EXE-peinfo.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
PE Info plugin

Plugin Type: EXE
Purpose:
  Dump information on the PE structure of the given executable. This is
  done using pefile's dump_info() API. It is not structured in any way.

  Sample code from the pefile and Didier Stevens pecheck.py was used or
  referenced for this plug-in.

Output:
   - peinfo-quick.txt - contains minimal information that analysts may
     find useful.

   - peinfo-full.txt - contains full information on the file.

Requirements:
   - pefile library (http://code.google.com/p/pefile/)

"""

__version__ = "$Id: 7dd537f22578be78ca7e142ea73a7ebe4e2163d5 $"

import logging
import os
import time
import sys

try:
    import pefile
except ImportError, err:
    print ("Unable to import pefile: %s" % err)

from mastiff.plugins import printable_str
import mastiff.plugins.category.exe as exe

class PEInfo(exe.EXECat):
    """Dumps PE information."""

    def __init__(self):
        """Initialize the plugin."""
        exe.EXECat.__init__(self)

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        try:
            pe = pefile.PE(filename)
        except:
            log.error('Unable to parse PE file: %s' % sys.exc_info()[1])
            
            return False

        if not self.output_file_quick(config.get_var('Dir','log_dir'), pe) or not self.output_file_full(config.get_var('Dir','log_dir'), pe):
            return False

        return True
        
    @staticmethod
    def _dump_section_headers(pe):
        """
              Small internal function to dump the section headers in a table. 
              Returns a string to do so.
        """
        section_string = ''
        section_flags = pefile.retrieve_flags(pefile.SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
        section_string += '\nNumber of Sections: %d\n' % pe.FILE_HEADER.NumberOfSections
        section_string += '{0:15} {1:8} {2:40}\n'.format('Section Name', 'Entropy', 'Flags')
        section_string += '-'*65 + '\n'
        for section in pe.sections:
            # thanks to the pefile example code for this
            flags = []
            for flag in section_flags:
                if getattr(section, flag[0]):
                    flags.append(flag[0])

            # the following line was taken from Didier Steven's pecheck.py code
            section_string += '{0:15} {1:<8.5} {2:40}\n'.format(''.join(filter(lambda c:c != '\0', str(section.Name))), \
                                                                                                        section.get_entropy(),
                                                                                                        ', '.join(flags))
        section_string += '\n'
        return section_string        

    def output_file_quick(self, outdir, pe):
        """Output short, useful information on file."""

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.quick')        

        try:
            outfile = open(outdir + os.sep + 'peinfo-quick.txt', 'w')
            outfile.write('PE Header Information\n\n')
            outfile.write('Quick Info:\n\n')
            try:
                outfile.write('TimeDateStamp: %s\n' % time.asctime(time.gmtime(pe.FILE_HEADER.TimeDateStamp)))
            except ValueError:
                outfile.write('TimeDataStamp: Invalid Time %x\n' % (pe.FILE_HEADER.TimeDateStamp))
            outfile.write('Subsystem: %s\n' % pefile.SUBSYSTEM_TYPE[pe.OPTIONAL_HEADER.Subsystem])

            outfile.write(self._dump_section_headers(pe))

            # any parsing warnings (often related to packers
            outfile.write('\nParser Warnings:\n')
            for warning in pe.get_warnings():
                outfile.write('- ' + warning + '\n')

            # file info - thx to Ero Carrera for sample code
            # http://blog.dkbza.org/2007/02/pefile-parsing-version-information-from.html
            outfile.write('\nFile Information:\n')
            if hasattr(pe, "FileInfo"):
                for fileinfo in pe.FileInfo:
                    if fileinfo.Key == 'StringFileInfo':
                        for string_entry in fileinfo.StringTable:
                            for entry in string_entry.entries.items():
                                outfile.write("{0:20}:\t{1:40}\n".format(printable_str(entry[0]), \
                                                            printable_str(entry[1])))
                    if fileinfo.Key == 'VarFileInfo':
                        try:
                            for var in fileinfo.Var:
                                outfile.write("{0:20}:\t{1:40}\n".format(printable_str(var.entry.items()[0][0]),
                                                                         printable_str(var.entry.items()[0][1])))
                        except:
                            # there are times when a VarFileInfo structure may be present, but empty
                            pass
            else:
                outfile.write('No file information present.\n')

            # imports
            outfile.write('\nImports:\n')
            if hasattr(pe, "DIRECTORY_ENTRY_IMPORT"):
                outfile.write('{0:20}\t{1:30}\t{2:10}\n'.format('DLL', 'API', 'Address'))
                outfile.write('-'*70 + '\n')
                for entry in pe.DIRECTORY_ENTRY_IMPORT:
                    for imp in entry.imports:
                        outfile.write('{0:20}\t{1:30}\t{2:10}\n'.format(entry.dll, imp.name, hex(imp.address)))
            else:
                outfile.write('No imports.\n')

            # exports
            outfile.write('\nExports:\n')
            if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
                outfile.write('{0:20}\t{1:10}\t{2:10}\n'.format('Name', 'Address', 'Ordinal'))
                outfile.write('-'*50 + '\n')
                for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols:
                    outfile.write('{0:20}\t{1:10}\t{2:10}\n'.format(exp.name, \
                                                                hex(pe.OPTIONAL_HEADER.ImageBase + exp.address),\
                                                                exp.ordinal))
            else:
                outfile.write('No Exports.\n')


            outfile.close()
        except IOError, err:
            log.error('Cannot write to peinfo.txt: %s' % err)
            return False
        except pefile.PEFormatError, err:
            log.error('Unable to parse PE file: %s' % err)
            return False

        return True

    def output_file_full(self, outdir, pe):
        """Output full information on file."""

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.full')

        try:
            outfile = open(outdir + os.sep + 'peinfo-full.txt', 'w')
            outfile.write('\nFull Information Dump:\n')
            outfile.write(self._dump_section_headers(pe))                                                                    
            outfile.write(pe.dump_info())
            outfile.close()
        except IOError, err:
            log.error('Cannot write to peinfo.txt: %s' % err)
            return False
        except:
            log.error('Unable to parse PE file.')
            return False

        return True


================================================
FILE: mastiff/plugins/analysis/EXE/EXE-peinfo.yapsy-plugin
================================================
[Core]
Name = PE Info
Module = EXE-peinfo

[Documentation]
Description = Dump information on the PE header and structure of an executable.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/EXE/EXE-resources.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
PE Resources Plug-in

Plugin Type: EXE
Purpose:
  This plug-in obtains information on any resources contained within
  the Windows EXE and extracts them.

  More information on how resources are stored can be found in the
  Microsoft PE and COFF Specification document.
  http://msdn.microsoft.com/library/windows/hardware/gg463125

  Thanks to Ero Carrera for creating the pefile library, whose code helped
  understand how to process resources.

Output:
   resources.txt - File containing a list of all resources in the EXE and any
                  associated information.
   log_dir/resource - Directory containing any extracted resource.

Pre-requisites:
   - pefile library (http://code.google.com/p/pefile/)

"""

__version__ = "$Id: 519a2014141003f89b18bb5c3de571729a952f8e $"

import logging
import os
import time

try:
    import pefile
except ImportError, err:
    print ("Unable to import pefile: %s" % err)

import mastiff.plugins.category.exe as exe

class EXE_Resources(exe.EXECat):
    """EXE Resources plugin code."""

    def __init__(self):
        """Initialize the plugin."""
        exe.EXECat.__init__(self)
        self.resources = list()
        self.pe = None
        self.output = dict()

    def analyze_dir(self, directory, prefix='', _type='', timedate=0):
        """ Analyze a resource directory and obtain all of its items."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.analyze')

        # save the timedate stamp
        timedate = directory.struct.TimeDateStamp

        for top_item in directory.entries:

            if hasattr(top_item, 'data'):
                # at the language level that contains all of our information
                resource = dict()
                resource['Id'] = prefix
                resource['Type'] = _type
                # store the offset as the offset within the file, not the RVA!
                try:
                    resource['Offset'] = self.pe.get_offset_from_rva(top_item.data.struct.OffsetToData)
                    resource['Size'] = top_item.data.struct.Size
                    resource['Lang'] = [ pefile.LANG.get(top_item.data.lang, '*unknown*'), \
                                                            pefile.get_sublang_name_for_lang( top_item.data.lang, top_item.data.sublang ) ]
                    resource['TimeDate'] = timedate
                except pefile.PEFormatError, err:
                    log.error('Error grabbing resource \"%s\" info: %s' %  (prefix, err))
                    return False

                self.resources.append(resource)
                log.debug('Adding resource item %s' % resource['Id'])
            elif hasattr(top_item, 'directory'):
                if top_item.name is not None:
                    # in a name level
                    if len(prefix) == 0:
                        newprefix = prefix + str(top_item.name)
                    else:
                        newprefix = ', '.join([prefix, str(top_item.name)])
                else:
                    # if name is blank, we are in a Type level
                    if len(prefix) == 0:
                        newprefix = 'ID ' + str(top_item.id)
                        _type = pefile.RESOURCE_TYPE.get(top_item.id)
                    else:
                        newprefix = ', '.join([prefix,  'ID ' + str(top_item.id)])

                # we aren't at the end, recurse
                self.analyze_dir(top_item.directory, prefix=newprefix, _type=_type)

    def extract_resources(self, log_dir, filename):
        """
           Extract any resources from the file and put them in
           the resources dir.
        """

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.extract')

        if len(self.resources) == 0:
            # no resources
            return False

        # create the dir if it doesn't exist
        log_dir = log_dir + os.sep + 'resources'
        if not os.path.exists(log_dir):
            try:
                os.makedirs(log_dir)
            except IOError,  err:
                log.error('Unable to create dir %s: %s' % (log_dir, err))
                return False

        try:
            my_file = open(filename, 'rb')
        except IOError, err:
            log.error('Unable to open file.')
            return False

        file_size = os.path.getsize(filename)

        # cycle through resources and extract them
        for res_item in self.resources:

            # check to make sure we won't go past the EOF
            if (res_item['Offset'] + res_item['Size']) > file_size:
                log.error('File is smaller than resource location. Could be a packed file.')
                continue

            my_file.seek(res_item['Offset'])
            data = my_file.read(res_item['Size'])
            out_name = res_item['Id'].replace('ID ', '_').replace(', ', '_').lstrip('_')

            if res_item['Type'] is not None and len(res_item['Type']) > 0:
                out_name += '_' + res_item['Type']

            with open(log_dir + os.sep + out_name, 'wb') as out_file:
                log.debug('Writing %s to %s.' % (res_item['Id'], out_name))
                out_file.write(data)
                out_file.close()

        my_file.close()
        return True

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        try:
            self.pe = pefile.PE(filename)
        except pefile.PEFormatError, err:
            log.error('Unable to parse PE file: %s' % err)
            return False

        if not hasattr(self.pe, 'DIRECTORY_ENTRY_RESOURCE'):
            log.info('No resources for this file.')
            return False

        # parse the directory structure
        self.analyze_dir(self.pe.DIRECTORY_ENTRY_RESOURCE)
        
        self.output['metadata'] = {  }
        self.output['data'] = dict()

        if len(self.resources) == 0:
            log.info('No resources could be found.')            
        else:
            # output data to file and extract resources
            self.gen_output(config.get_var('Dir','log_dir'))
            self.output_file(config.get_var('Dir','log_dir'))
            self.extract_resources(config.get_var('Dir','log_dir'), filename)

        return self.output
        
    def gen_output(self, outdir):
        """ Generate the output to send back. """
        
        self.output['data']['resources'] = list()
        self.output['data']['resources'].append([ 'Name/ID', 'Type', 'File Offset', 'Size', 'Language', 'Time Date Stamp'])
        
        for item in sorted(self.resources, key=lambda mydict: mydict['Offset']):

            lang = ', '.join(item['Lang']).replace('SUBLANG_', '').replace('LANG_', '')
            my_time = time.asctime(time.gmtime(item['TimeDate']))
            self.output['data']['resources'].append([ item['Id'], item['Type'], hex(item['Offset']), hex(item['Size']), lang, my_time ])
            
        return True

    def output_file(self, outdir):
        """Print output from analysis to a file."""

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output')

        try:
            outfile = open(outdir + os.sep + 'resources.txt', 'w')
            outfile.write('Resource Information\n\n')
        except IOError, err:
            log.error('Could not open resources.txt: %s' % err)
            return False

        outstr = '{0:20} {1:15} {2:15} {3:8} {4:<30} {5:<25}\n'.format( \
                                                                   'Name/ID',
                                                                   'Type',
                                                                   'File Offset',
                                                                   'Size',
                                                                   'Language',
                                                                   'Time Date Stamp')
        outfile.write(outstr)
        outfile.write('-' * len(outstr) + '\n')

        for item in sorted(self.resources, key=lambda mydict: mydict['Offset']):

            lang = ', '.join(item['Lang']).replace('SUBLANG_', '').replace('LANG_', '')
            my_time = time.asctime(time.gmtime(item['TimeDate']))

            outstr = '{0:20} {1:15} {2:<15} {3:<8} {4:30} {5:<25}\n'.format(item['Id'],
                                                             item['Type'],
                                                             hex(item['Offset']),
                                                             hex(item['Size']),
                                                             lang,
                                                             my_time)
            outfile.write(outstr)

        return True


================================================
FILE: mastiff/plugins/analysis/EXE/EXE-resources.yapsy-plugin
================================================
[Core]
Name = Resources
Module = EXE-resources

[Documentation]
Description = Obtain information on and extract PE resources.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/EXE/EXE-sig.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
PE Digital Signature

Plugin Type: EXE
Purpose:
  This plug-in extracts any digital signatures from a PE executable and converts
  it to both DER and text format.

  Extraction is performed using the disitool.py tool from Didier Stevens. Many
  thanks to him for permission to use it.

  Conversion to text is performed using the openssl program.

  Validation of the signature is not yet done.

Pre-requisites:
   - pefile library (http://code.google.com/p/pefile/)
   - disitool.py (http://blog.didierstevens.com/programs/disitool/)
   - openssl binary (http://www.openssl.org/)

Configuration file:

[Digital Signatures]
# Options to extract the digital signatures
#
# disitool - path to disitool.py script.
# openssl - path to openssl binary
disitool = /usr/local/bin/disitool.py
openssl = /usr/bin/openssl

Output:
   sig.der - DER version of Authenticode signature.
   sig.txt - Text representation of signature.

TODO:
   - Validate the signature.

"""

__version__ = "$Id: c0be897e44fd598577a3739b7b978b52a0e8c997 $"

import logging
import os
import subprocess
import sys
from cStringIO import StringIO

import pefile

# Change the following line to import the category class you for the files
# you wish to perform analysis on
import mastiff.plugins.category.exe as exe

class EXESig(exe.EXECat):
    """PE digital signature analysis plugin."""

    def __init__(self):
        """Initialize the plugin."""
        exe.EXECat.__init__(self)

    def activate(self):
        """Activate the plugin."""
        exe.EXECat.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        exe.EXECat.deactivate(self)

    def dump_sig_to_text(self, log_dir, openssl):
        """ Convert a DER signature to its text format and writes it out."""

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output_sig')
        der_file = log_dir + os.sep + 'sig.der'

        # check to see if file exists
        if os.path.exists(der_file) == False:
            log.error('Cannot find DER file: %s' % der_file)
            return False
        elif openssl is None or os.path.exists(openssl) is False:
            log.error('Cannot open openssl binary: %s' % openssl)
            return False

        cmd = [openssl, 'pkcs7', '-inform', 'DER', '-print_certs', '-text', '-in', der_file]        

        run = subprocess.Popen(cmd,
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE, 
                               close_fds=True)
        (output, error) = run.communicate()
        if error is not None and len(error) > 0:
            log.error('Error running openssl: %s' % error)
            return False

        if output is not None:
            with open(log_dir + os.sep + 'sig.txt', 'w') as out_file:
                log.debug('Signature converted to text.')
                out_file.write(output)
                out_file.close()

        return True

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        # get my config options
        sig_opts = config.get_section(self.name)

        # import disitool
        disitool_path = config.get_var(self.name, 'disitool')
        if disitool_path is None:
            log.error('disitool.py path is empty.')
            return False
        elif os.path.exists(disitool_path) == False:
            log.error('disitool.py does not exist: %s' % disitool_path)
            return False

        sys.path.append(os.path.dirname(disitool_path))
        try:
            try: 
                reload(disitool)
            except:
                import disitool
        except ImportError, err:
            log.error('Unable to import disitool: %s' % err)
            return False

        # extract sig
        # turn off stdout bc disitool.ExtractDigitalSignature is noisy
        try:
            old_stdout = sys.stdout
            sys.stdout = StringIO()
            sig = disitool.ExtractDigitalSignature(str(filename), \
                                           config.get_var('Dir','log_dir') + os.sep + 'sig.der')
            sys.stdout = old_stdout
        except pefile.PEFormatError, err:
            log.error('Unable to extract signature: %s' %err)
            return False

        if sig is None:
            log.info("No signature on the file.")
        else:
            log.info("Signature extracted.")
            if sig_opts['openssl'] is None:
                log.error('openssl binary not present. Not converting signature.')
            else:
                # convert the sig to text
                self.dump_sig_to_text(config.get_var('Dir','log_dir'),
                                      config.get_var(self.name, 'openssl'))
        return True


================================================
FILE: mastiff/plugins/analysis/EXE/EXE-sig.yapsy-plugin
================================================
[Core]
Name = Digital Signatures
Module = EXE-sig

[Documentation]
Description = Extract PE digital signatures.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/EXE/EXE-singlestring.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Single-byte string plug-in

Plugin Type: EXE
Purpose:

Attackers have begun to obfuscate embedded strings by moving a single byte
at a time into a character array. In assembler, it looks like:

mov mem, 0x68
mov mem+4, 0x69
mov mem+8, 0x21
...

Using a strings program, these strings will not be found. This script looks
for any strings embedded in this way and prints them out.  It does this by
looking through the file for C6 opcodes, which are the start of the
"mov mem/reg, imm" instruction.  It will then decode it, grab the value and
create a string from it.

Requirements:
- distorm3 (http://code.google.com/p/distorm/)

Output:
   None

"""

__version__ = "$Id: 6322146c8d971464c6f726ebdba3a3d7a2540028 $"

import logging
import re
import os

try:
    from distorm3 import Decode, Decode32Bits
except ImportError, err:
    print "EXE-SingleString: Could not import distorm3: %s" % error
    
import mastiff.plugins.category.exe as exe

# Change the class name and the base class
class SingleString(exe.EXECat):
    """Extract single-byte strings from an executable."""

    def __init__(self):
        """Initialize the plugin."""
        exe.EXECat.__init__(self)
        self.length = 3
        self.raw = False

    def activate(self):
        """Activate the plugin."""
        exe.EXECat.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        exe.EXECat.deactivate(self)

    def findMov(self, filename):
        """ look through the file for any c6 opcode (mov reg/mem, imm)
        when it finds one, decode it and put it into a dictionary """
        #log = logging.getLogger('Mastiff.Plugins.' + self.name + '.findMov')

        f = open(filename,'rb')
        offset = 0        
        instructs = {}

        mybyte = f.read(1)

        while mybyte:
            if mybyte == "\xc6":
                # found a mov op - decode and record it
                f.seek(offset)
                mybyte = f.read(16)
                # p will come back as list of (offset, size, instruction, hexdump)
                p = Decode(offset, mybyte, Decode32Bits)

                # break up the mnemonic
                ma = re.match('(MOV) ([\S\s]+), ([x0-9a-fA-F]+)', p[0][2])
                if ma is not None:
                    instructs[offset] = [ma.group(1), ma.group(2), ma.group(3), p[0][1]] # mnemonic, size

                #log.debug( "MOV instructions detected: %x %s %d" % (offset,p[0][2],p[0][1]) )

                f.seek(offset+1)

            mybyte = f.read(1)
            offset = offset + 1

        f.close()
        return instructs

    def decodeBytes(self, instructs):
        """ Take in a dict of instructions - parse through each instruction and grab the strings """
        #log = logging.getLogger('Mastiff.Plugins.' + self.name + '.decodeBytes')

        curString = ""
        curOffset = 0
        strList = []
        usedBytes = []

        for off in sorted(instructs.keys()):

            if off not in usedBytes:
                # set up the new offset if needed
                if curOffset == 0:
                    curOffset = off

                while off in instructs:
                    usedBytes.append(off)
                    hexVal = int(instructs[off][2], 16)
                    opLen = instructs[off][3]

                    # is hexVal out of range?
                    if hexVal < 32 or hexVal > 126 and (hexVal != 10 or hexVal != 13 or hexVal != 9):
                        # end of string
                        #log.debug("%x non-string char - new string: %d: %s" % (curOffset, hexVal,curString))
                        strList.append([curOffset, curString])
                        curOffset = off + opLen
                        curString = ""
                    else:
                        #add to string
                        if not self.raw and hexVal == 10:
                            # line feed
                            curString = curString + "\\r"
                        elif not self.raw and hexVal == 13:
                            # return
                            curString = curString + "\\n"
                        elif not self.raw and hexVal == 9:
                            # tab
                            curString = curString + "\\t"
                        else:
                            curString = curString + chr(hexVal)

                    off = off + opLen

                strList.append([curOffset, curString])
                curOffset = 0
                curString = ""

            usedBytes.append(off)

        return strList

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        self.length = config.get_var(self.name, 'length')
        if self.length is None:
            self.length = 3

        self.raw = config.get_bvar(self.name, 'raw')

        # find the bytes in the file
        instructs = self.findMov(filename)

        # now lets get the strings
        strlist = self.decodeBytes(instructs)

        self.output_file(config.get_var('Dir','log_dir'), strlist)

        return True

    def output_file(self, outdir, strlist):
        """Print output from analysis to a file."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output_file')

        # if the string is of the right len, print it
        outstr = ""
        for string in strlist:
            if len(string[1]) >= int(self.length):
                outstr = outstr + '0x%x: %s\n' % (string[0], string[1])

        if len(outstr) > 0:
            try:
                outfile = open(outdir + os.sep + 'single-string.txt', 'w')
            except IOError, err:
                log.debug("Cannot open single-string.txt: %s" % err)
                return False

            outfile.write(outstr)
            outfile.close()
        else:
            log.debug('No single-byte strings found.')

        return True


================================================
FILE: mastiff/plugins/analysis/EXE/EXE-singlestring.yapsy-plugin
================================================
[Core]
Name = Single-Byte Strings
Module = EXE-singlestring

[Documentation]
Description = Extract single-byte strings.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/EXE/__init__.py
================================================


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-fileinfo.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
File Info plugin

Plugin Type: Generic
Purpose:
  This plug-in obtains the file information, such as the name and file size
  and stores it into the database.

Database:
  A new table named files will be added to the database. This table contains
  the following fields:

  id - Primary Key
  sid - The id # of the file in the mastiff table.
  filename - The filename, including path, of the file being analyzed.
  size - The file size in bytes.
  firstseen -  GMT date of when it was first seen (in UNIX timestamp).
  lastseen - GMT date of when it was last seen (in UNIX timestamp).
  times - Number of times this file has been analyzed.

Output:
   Data is only sent to the database. No files are created.

"""

__version__ = "$Id: bc5c3cee7ede3183312b586a2e800bddc31bca1e $"

import os
import time
import logging
import sqlite3

import mastiff.plugins.category.generic as gen
import mastiff.sqlite as DB

class GenFileInfo(gen.GenericCat):
    """File Information plugin code."""

    def __init__(self):
        """Initialize the plugin."""
        gen.GenericCat.__init__(self)
        self.page_data.meta['filename'] = 'file_info'

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        data = dict()
        data['filename'] = filename
        data['size'] = os.stat(filename).st_size
        data['time'] = time.time()
        data['hashes'] = config.get_var('Misc',  'hashes')

        self.gen_output(config, data)

        self.output_db(config, data)
        return self.page_data

    def gen_output(self, config, data):
        """ Add the output into the local page structure. """
        info_table = self.page_data.addTable('File Information')
        info_table.addheader([('name', str), ('info', str)], printHeader=False)

        info_table.addrow(['File Name', data['filename']])
        info_table.addrow(['Size', data['size']])
        info_table.addrow(['Time Analyzed', data['time']])

        hash_table = self.page_data.addTable('File Hashes')
        hash_table.addheader([('Algorithm', str), ('Hash', str)])
        hash_table.addrow(['MD5', data['hashes'][0]])
        hash_table.addrow(['SHA1', data['hashes'][1]])
        hash_table.addrow(['SHA256', data['hashes'][2]])

    def output_db(self, config, data):
        """Print output from analysis to a file."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name)

        db = DB.open_db_conf(config)
        if db is None:
            return False
            
        db.text_factory = str

        # If the 'files' table does now exist, add it
        if DB.check_table(db,  'files')  == False:
            log.debug('Adding table files')
            fields = [ 'id INTEGER PRIMARY KEY',
                                   'sid INTEGER',
                                  'filename TEXT',
                                  'size INTEGER',
                                  'firstseen INTEGER',
                                  'lastseen INTEGER',
                                  'times INTEGER']
            if DB.add_table(db, 'files',  fields) is None:
                return False
            db.commit()

        cur = db.cursor()
        sqlid = DB.get_id(db,  data['hashes'])

        if sqlid is None:
            log.error('%s hashes do not exist in the database',  data['filename'])
            return False

        # see if the filename already exists in the db
        try:
            cur.execute('SELECT id, times FROM files WHERE filename=? AND sid=?',
                         (data['filename'], sqlid, ))
        except sqlite3.Error, err:
            log.error('Could not query filename table: %s',  err)
            return None
        results = cur.fetchone()
        if results is not None:
            # filename is already in there. just update the lastseen item
            log.debug('%s is already in the database for hashes. Updating times.',
                      data['filename'])
            try:
                cur.execute('UPDATE files SET lastseen=?, times=? WHERE id=?',
                                     (int(data['time']), results[1]+1, results[0], ))
                db.commit()
            except sqlite3.OperationalError, err:
                log.error('Could not update times: %s',  err)
                return False
            return True

        # file info is not in the database, add it
        try:
            cur.execute('INSERT INTO files (sid, filename, size, firstseen, lastseen, times) \
                                 VALUES (?, ?, ?, ?, ?, ?)',
                                    (sqlid,  data['filename'], data['size'],
                                    int(data['time']),  int(data['time']), 1,  ))
            db.commit()
        except sqlite3.Error,  err:
            log.error('Could not insert filename into files: %s',  err)
            return False

        return True


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-fileinfo.yapsy-plugin
================================================
[Core]
Name = File Information
Module = GEN-fileinfo

[Documentation]
Description = File Information Retrieval Plug-in
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-fuzzy.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Fuzzy Hashing plug-in

Plugin Type: Generic
Purpose:
  This plug-in generates the fuzzy hash of the given file.
  Also compares the fuzzy hashes against all of hashes already
  generated in the database.

Requirements:
  - ssdeep (http://ssdeep.sourceforge.net/)
  - pydeep (https://github.com/kbandla/pydeep)

Output:
   - fuzzy.txt - File listing the fuzzy hash of the file and any files that
     match.
   - The 'fuzzy' field will get added to the files table in the DB to store
     the fuzzy hash.

"""

__version__ = "$Id: 1e313a680096a1bea3ff4e5ed5f497a2ca29cd57 $"

import logging

try:
    import pydeep
except ImportError, error:
    print 'Gen-fuzzy: Could not import pydeep: %s'.format(error)

import mastiff.sqlite as DB
import sqlite3
import mastiff.plugins.category.generic as gen

class GenFuzzy(gen.GenericCat):
    """Fuzzy hashing plugin."""

    def __init__(self):
        """Initialize the plugin."""
        gen.GenericCat.__init__(self)
        self.page_data.meta['filename'] = 'fuzzy'
        # we will be adding to the file information hashes, so make sure it runs before us
        self.prereq = 'File Information'

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')
        log.info('Generating fuzzy hash.')

        try:
            my_fuzzy = pydeep.hash_file(filename)
        except pydeep.error, err:
            log.error('Could not generate fuzzy hash: %s', err)
            return False

        if self.output_db(config, my_fuzzy) is False:
            return False

        fuzz_results = list()
        if config.get_bvar(self.name, 'compare') is True:
            fuzz_results = self.compare_hashes(config, my_fuzzy)

        self.output_file(config, my_fuzzy, fuzz_results)

        return self.page_data

    def compare_hashes(self, config, my_fuzzy):
        """
           Compare the current hash to all of the fuzzy
           hashes already collected.
        """
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.compare')
        db = DB.open_db_conf(config)
        conn = db.cursor()

        log.info('Comparing fuzzy hashes.')

        fuzz_results = list()
        my_md5 = config.get_var('Misc', 'hashes')[0]
        query = 'SELECT md5, fuzzy FROM mastiff WHERE fuzzy NOT NULL'
        try:
            # compare current hash for all fuzzy hashes
            for results in conn.execute(query):
                percent = pydeep.compare(my_fuzzy, results[1])
                if percent > 0 and my_md5 != results[0]:
                    fuzz_results.append([results[0], percent])
        except sqlite3.OperationalError, err:
            log.error('Could not grab other fuzzy hashes: %s', err)
            return None
        except pydeep.error, err:
            log.error('pydeep error: %s', err)
            return None

        return fuzz_results

    def output_file(self, config, my_fuzzy, fuzz_results):
        """ Writes output to a file. """

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output_file')

        if self.results['Generic']['File Information'] is None:
            # File Information is not present, cannot continue
            log.error('Missing File Information plug-in output. Aborting.')
            return False

        # add fuzzy hashes to the hashes already generated
        if self.results['Generic']['File Information'] is not None:
            # adding a new data onto an existing table
            my_table = self.results['Generic']['File Information']['File Hashes']
            my_table.addrow(['Fuzzy Hash', my_fuzzy])

        fuzz_table = self.page_data.addTable('Similar Fuzzy Hashes')

        if fuzz_results is not None and len(fuzz_results) > 0:
            fuzz_table.addheader([('MD5', str), ('Percent', str)])

            for (md5,  percent) in fuzz_results:
                fuzz_table.addrow([md5, percent])
        elif config.get_bvar(self.name, 'compare') is True:
            # This only gets printed if we actually compared
            fuzz_table.addheader([('Data', str)], printHeader=False)
            fuzz_table.addrow(['No other fuzzy hashes were related to this file.'])

        return True

    def output_db(self, config, my_fuzzy):
        """ Add fuzzy hash to the DB."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.DB_output')

        # open up the DB and extend the mastiff table to include fuzzy hashes
        db = DB.open_db_conf(config)

        # there is a possibility the mastiff table is not available yet
        # check for that and add it
        if DB.check_table(db,  'files')  == False:
            log.debug('Adding table "files"')
            fields = [ 'id INTEGER PRIMARY KEY',
                                   'sid INTEGER',
                                  'filename TEXT',
                                  'size INTEGER',
                                  'firstseen INTEGER',
                                  'lastseen INTEGER',
                                  'times INTEGER']
            if DB.add_table(db, 'files',  fields) is None:
                return False
            db.commit()

        if not DB.add_column(db, 'mastiff', 'fuzzy TEXT DEFAULT NULL'):
            log.error('Unable to add column.')
            return False

        conn = db.cursor()
        # update our hash
        sqlid = DB.get_id(db, config.get_var('Misc', 'Hashes'))
        query = 'UPDATE mastiff SET fuzzy=? WHERE id=?'
        try:
            conn.execute(query, (my_fuzzy, sqlid, ))
            db.commit()
        except sqlite3.OperationalError, err:
            log.error('Unable to add fuzzy hash: %s', err)
            return False

        db.close()
        return True


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-fuzzy.yapsy-plugin
================================================
[Core]
Name = Fuzzy Hashing
Module = GEN-fuzzy

[Documentation]
Description = Fuzzy Hashing Plug-in
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-hex.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Hex Dump plugin

Plugin Type: Generic
Purpose:
    This plug-in creates a hex view of the file being analyzed.
    
Output:
   hexdump.txt - Contents of the file displayed as hex and ASCII characters.

"""

__version__ = "$Id: b5381b6505e0ffbd3d2a8beba9fabba187a9b1b2 $"

import os
import logging

# Change the following line to import the category class you for the files
# you wish to perform analysis on
import mastiff.plugins.category.generic as gen

# Change the class name and the base class
class GEN_Hex(gen.GenericCat):
    """Hex Plug-in Code."""

    def __init__(self):
        """Initialize the plugin."""
        gen.GenericCat.__init__(self)

    def activate(self):
        """Activate the plugin."""
        gen.GenericCat.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        gen.GenericCat.deactivate(self)

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')
        
        # make sure we are enabled
        if config.get_bvar(self.name, 'enabled') is False:
            log.info('Disabled. Exiting.')
            return True
        
        try:
            in_file = open(filename, 'rb')
        except IOError, err:
            log.error('Unable to open file.')
            return False
            
        offset = 0
        in_size = os.stat(filename).st_size
        out_string = ''
        
        while offset < in_size:
            try:
                chars = in_file.read(16)
            except IOError, err:
                log.error('Cannot read data from file: %s' % err)
                in_file.close()
                return False
                
            alpha_string = ''            
            out_string = out_string + '%08x: ' % offset
            
            for byte in chars:
                out_string = out_string + "%02x " % (ord(byte))
                alpha_string = alpha_string + self.is_ascii(byte)
                
            if len(chars) < 16:
                # we are at the end of the file - need to adjust so things line up                
                out_string = out_string + ' '*((16-len(chars))*3)                
            
            # add on the alpha version of the string
            out_string = out_string + ' |' + alpha_string + '|\n'
            offset += len(chars)                
        
        in_file.close()
        
        return self.output_file(config.get_var('Dir','log_dir'), out_string)
        #return True
        
    def is_ascii(self, letter):
        """ Returns the letter if it is a printable ascii character, period otherwise. """
        if 31 < ord(letter) < 127:
            return letter
        return '.'

    def output_file(self, outdir, data):
        """Print output from analysis to a file."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
 
        try:            
            outfile = open(outdir + os.sep + 'hexdump.txt', 'w')
            outfile.write(data)
            outfile.close()
        except IOError, err:
            log.error('Could not open resources.txt: %s' % err)
            return False
            
        return True


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-hex.yapsy-plugin
================================================
[Core]
Name = Hex Dump
Module = GEN-hex

[Documentation]
Description = Creates a hex dump of the file.
Author = Tyler Hudak
Version = 0.1
Website = www.korelogic.com


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-mastiff-online.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
MASTIFF Online Submission Plug-in

Plugin Type: Generic
Purpose:
  This plug-in provides an interface to upload a file to MASTIFF Online.

Output:
   None
"""

__version__ = "$Id: 80ab7046885b0c48bf287c08e87fcb08e78be0df $"

import logging
import mastiff.plugins as plugins
import simplejson as json
import os
import sys

# Change the following line to import the category class you for the files
# you wish to perform analysis on
import mastiff.plugins.category.generic as gen

# Change the class name and the base class
class GenMastiffOnline(gen.GenericCat):
    """MASTIFF Online plugin code."""

    def __init__(self):
        """Initialize the plugin."""
        gen.GenericCat.__init__(self)
        self.page_data.meta['filename'] = 'MASTIFF-online'

    def activate(self):
        """Activate the plugin."""
        gen.GenericCat.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        gen.GenericCat.deactivate(self)

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')
        
        # get terms of service acceptance
        tos = config.get_bvar(self.name,  'accept_terms_of_service')
        if tos is None or tos is False:
            log.info('Terms of service not accepted. Accept to enable MASTIFF Online submission.')
            return self.page_data
        
        myjson = None
        
        submit = config.get_bvar(self.name,  'submit')
        if submit is False:
            log.info('Not configured to send to MASTIFF Online.')
            return self.page_data
            
        # send data to MASTIFF Online server
        host = 'mastiff-online.korelogic.com'
        method = 'https'
        selector="/cgi/dispatcher.cgi/UploadMOSample"
        fields = [('accept_terms_of_service',  'true')]
        file_to_send = open(filename, "rb").read()        
        files = [("upload", os.path.basename(filename), file_to_send)]
        log.debug('Sending sample to MASTIFF Online.')
        response = plugins.post_multipart(host, method, selector, fields, files)

        # what gets returned isn't technically JSON, so we have to manipulate it a little bit
        try:
            myjson = json.loads(response[60:-14].replace('\'','\"'))
        except json.scanner.JSONDecodeError, err:
            log.error('Error processing response: {}'.format(err))
        except:
            e = sys.exc_info()[0]
            log.error('Error processing incoming response: {}.'.format(e))       
        
        if myjson is not None:
            self.gen_output(myjson)
            
        return self.page_data

    def gen_output(self, myjson):
        """Place the results into a Mastiff Output Page."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name)

        mytable = self.page_data.addTable('MASTIFF Online')
        mytable.addheader([('name', str), ('data', str)], printHeader=False)
        mytable.addrow(['Sample Uploaded On', myjson['sample_uploaded_on']])

        if myjson['sample_state'] == 'todo':
            mytable.addrow(['Status', 'In queue'])            
        elif myjson['sample_state'] == 'done':
            mytable.addrow(['Status', 'Completed'])
        else:
            mytable.addrow(['Status', myjson['sample_state']])
            
        mytable.addrow(['URL', 'https://mastiff-online.korelogic.com/index.html?sample_hash_md5=' + myjson['sample_hash_md5']])        
        

        return True


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-mastiff-online.yapsy-plugin
================================================
[Core]
Name = MASTIFF Online
Module = GEN-mastiff-online

[Documentation]
Description = MASTIFF Online Submission Plug-in
Author = Tyler Hudak
Version = 0.1
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-metascan.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Metascan Online Submission plugin

Plugin Type: Generic
Purpose:
  This plug-in determines if the file being analyzed has been analyzed on
  www.metascan-online.com previously.

  Information on the Metascan Online API can be found at:
  https://www.metascan-online.com/en/public-api

Requirements:
  - A Metascan Online API key is required to be entered into the configuration file.
    This can be obtained from www.metascan-online.com.

  - The simplejson module must be present. (https://github.com/simplejson/simplejson)

Configuration Options:

  api_key: Your API key from metascan-online.com. Leave this blank to disable the
  plug-in.

  submit [on|off]: Whether you want to submit files to the site or not.

Output:
   The results from Metascan Online retrieval or submission will be placed into
   metascan-online.txt.

"""

__version__ = "$Id: f8b6fe885be9b46a67dd7bc27e74c40d7a9eeff6 $"

import logging
import simplejson
import urllib2
import os
import socket

import mastiff.plugins.category.generic as gen

class GenMetascan(gen.GenericCat):
    """MetaScan Online plugin code."""

    def __init__(self):
        """Initialize the plugin."""
        self.api_key = None
        gen.GenericCat.__init__(self)

    def retrieve(self, sha256):
        """
           Retrieve results for this hash from Metascan Online.
        """

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.retrieve')

        url = "https://hashlookup.metascan-online.com/v2/hash/" + sha256
        headers = { 'apikey' : self.api_key}

        # set up request
        log.debug('Submitting request to Metascan Online.')

        try:
            req = urllib2.Request(url, headers=headers)
            response = urllib2.urlopen(req, timeout=30)
        except urllib2.HTTPError, err:
            log.error('Unable to contact URL: %s', err)
            return None
        except urllib2.URLError, err:
            log.error('Unable to open connection: %s', err)
            return None
        except socket.timeout, err:
            log.error('Timeout when contacting URL: %s', err)
            return None
        except:
            log.error('Unknown Error when opening connection.')
            return None

        json = response.read()
        try:
            response_dict = simplejson.loads(json)
        except simplejson.decoder.JSONDecodeError:
            log.error('Error in Metascan Online JSON response. Are you submitting too fast?')
            return None
        else:
            log.debug('Response received.')
            return response_dict

    def submit(self, config, filename):
        """
            Submit a file to Metascan Online for analysis.

            Note: This function will likely fail if a proxy is used.
        """
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.submit')

        try:
            outdir = config.get_var('Dir', 'log_dir')
            mo_file = open(outdir + os.sep + 'metascan-online.txt', 'w')
        except IOError, err:
            log.error('Unable to open %s for writing: %s',
                      outdir + 'metascan-online.txt', err)
            return False

        # make sure we are allowed to submit
        if config.get_bvar(self.name, 'submit') == False:
            log.info('Submission disabled. Not sending file.')
            mo_file.write('File does not exist on Metascan Online.\n')
            mo_file.write('Submission is disabled, not sending file.\n')
            mo_file.close()
            return False

        log.info('File had not been analyzed by Metascan Online.')
        log.info('Sending file to Metascan Online.')

        # send file to Metascan Online
        url = "https://scan.metascan-online.com/v2/file"
        headers = { 'apikey' : self.api_key, 'filename': os.path.basename(filename)}

        try:
            req = urllib2.Request(url, headers=headers)
            file_to_send = open(filename, "rb").read()
            response = urllib2.urlopen(req, data=file_to_send, timeout=30)
            json = simplejson.loads(response.read())
        except urllib2.HTTPError, err:
            log.error('Unable to contact URL: %s', err)
            return None
        except urllib2.URLError, err:
            log.error('Unable to open connection: %s', err)
            return None
        except socket.timeout, err:
            log.error('Timeout when contacting URL: %s', err)
            return None
        except:
            log.error('Unknown Error when sending file.')
            return None

        # write to file
        mo_file.write('File uploaded and processing.\n')
        mo_file.write('Link: https://www.metascan-online.com/en/scanresult/file/%s\n' % json['data_id'])
        mo_file.close()

        return True

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        self.api_key = config.get_var(self.name, 'api_key')
        if self.api_key is None or len(self.api_key) == 0:
            log.error('No Metascan Online API Key - exiting.')
            return False

        sha256 = config.get_var('Misc', 'hashes')[2]

        response = self.retrieve(sha256)
        if response is None:
            # error occurred
            log.error('Did not get a response from Metascan Online. Exiting.')
            return False

        if sha256.upper() in response and response[sha256.upper()] == "Not Found":
            # The file has not been submitted
            self.submit(config, filename)
        else:
            # write response to file
            self.output_file(config.get_var('Dir', 'log_dir'), response)

        return True

    def output_file(self, outdir, response):
        """Format the output from Metascan Online into a file. """
        log = logging.getLogger('Mastiff.Plugins.' + self.name + 'output_file')

        try:
            mo_file = open(outdir + os.sep + 'metascan-online.txt', 'w')
        except IOError, err:
            log.error('Unable to open %s for writing: %s',
                      outdir + 'metascan-online.txt', err)
            return False

        out_str = ''
        result_str = ''

        out_str += 'Metascan Online Results for %s\n' % response['file_info']['md5']
        out_str += 'Last scan date: %s\n' % response['scan_results']['start_time']

        foundAV = 0

        if response['scan_results']['scan_all_result_i'] > 0:
            result_str += '{0:22} {1:24} {2:40}\n'.format('AV', 'Version', 'Results')

            for av_key in sorted(response['scan_results']['scan_details'].keys(), key=lambda s: s.lower()):

                # scan_result_i should be 1-9 (10 is engine updating)
                if 10 > response['scan_results']['scan_details'][av_key]['scan_result_i'] > 0 :
                    threat_name = response['scan_results']['scan_details'][av_key]['threat_found'].encode('utf-8')
                    if threat_name == u'':
                        threat_name = u'Unknown Threat'

                    result_str += '{0:22} {1:24} {2:40}\n'.format(av_key, \
                                             response['scan_results']['scan_details'][av_key]['def_time'], \
                                             threat_name)
                    foundAV += 1

        out_str += 'Total positive results: %d/%d\n' % (foundAV, response['scan_results']['total_avs'])
        out_str += 'Link to metascan-online.com:\nhttps://www.metascan-online.com/en/scanresult/file/%s\n\n' % response['data_id']

        mo_file.write(out_str)
        mo_file.write(result_str)

        mo_file.close()
        return True


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-metascan.yapsy-plugin
================================================
[Core]
Name = Metascan Online
Module = GEN-metascan

[Documentation]
Description = MetaScan Online Submission Plug-in
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-strings.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Embedded Strings Extraction Plugin

Plugin Type: Generic
Purpose:
  Execute the 'strings' program and obtain embedded ASCII and UNICODE
  strings within the given filename.  These will be returned in a
  dictionary where the key is the decimal offset of the string
  within the file and the value is a list of string type (U or A)
  and the string itself.

Configuration Options:

  strcmd = Path to the strings binary

  DO NOT CHANGE THE FOLLOWING OPTIONS UNLESS YOU KNOW WHAT YOU ARE DOING.
  str_opts = Options to send to strings every time its called.
                   This should be set to "-a -t d" (without quotes).
  str_uni = Options to send to strings to obtain UNICODE strings.
                 This should be set to "-e l" (without quotes).

Output:
   Output will be put into a file given a directory and the strings
   dictionary.
"""

__version__ = "$Id: 8970ce879282a3479538dd5d159f65ab4ad1092f $"

import subprocess
import re
import logging
import os

import mastiff.plugins.category.generic as gen

class GenStrings(gen.GenericCat):
    """Extract embedded strings."""

    def __init__(self):
        """Initialize the plugin."""
        gen.GenericCat.__init__(self)
        self.strings = {}
        self.page_data.meta['filename'] = 'strings'
        self.prereq = 'File Information'

    def _insert_strings(self, output, str_type):
        """Insert output from strings command into self.strings list."""

        for line in output.split('\n'):
            m = re.match('\s*([0-9]+)\s+(.*)', line)
            if m is not None and m.group(2):
                self.strings[int(m.group(1))] = [str_type, m.group(2)]

    def analyze(self, config, filename):
        """
        Run the strings command on the given filename and extract ASCII
        and UNICODE strings. The formatted output is stored in self.strings.
        """
        # make sure we are activated
        if self.is_activated == False:
            return None

        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        # get my config options
        str_opts = config.get_section(self.name)

        if not str_opts['strcmd'] or \
           not os.path.isfile(str_opts['strcmd']) or \
           not os.access(str_opts['strcmd'], os.X_OK):
            log.error('%s is not accessible. Skipping.')
            return None

        if not str_opts['str_opts'] or not str_opts['str_uni_opts']:
            log.error('Strings options do not exist. Please check config. Exiting.')
            return None

        # obtain ASCII strings
        run = subprocess.Popen([str_opts['strcmd']] + \
                               str_opts['str_opts'].split() + [ filename ],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)
        (output, error) = run.communicate()
        if error is not None and len(error) > 0:
            log.error('Error running program: %s' % error)
            return False

        self._insert_strings(output,'A')

        # obtain Unicode strings
        run = subprocess.Popen([str_opts['strcmd']] +
                               str_opts['str_opts'].split() + str_opts['str_uni_opts'].split() + [ filename ],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)
        (output, error) = run.communicate()
        if error is not None and len(error) > 0:
            log.error('Error running program: %s' % error)
            return False

        self._insert_strings(output,'U')

        #self.gen_output(config.get_var('Dir','log_dir'))
        self.gen_output()
        log.debug ('Successfully grabbed strings.')

        return self.page_data

    def gen_output(self):
        """Place the results into a Mastiff Output Page."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name)

        # self.page_data was previously initialized
        # add a table to it
        str_table = self.page_data.addTable('Embedded Strings')

        if len(self.strings) == 0:
            log.warn("No embedded strings detected.")
            str_table.addheader([('Message', str)], printHeader=False)
            str_table.addrow(['No embedded strings detected.' ])
            return True

        str_table.addheader([('Offset', str), ('Type', str), ('String', str)])
        for k in sorted(self.strings.iterkeys()):
            str_table.addrow([ '{:0x}'.format(k), self.strings[k][0], self.strings[k][1] ])

        return True


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-strings.yapsy-plugin
================================================
[Core]
Name = Embedded Strings Plugin
Module = GEN-strings

[Documentation]
Description = Embedded Strings Plugin
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-virustotal.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
VirusTotal Submission plugin

Plugin Type: Generic
Purpose:
  This plug-in determines if the file being analyzed has been analyzed on
  www.virustotal.com previously.

  Information on the VT API can be found at:
  https://www.virustotal.com/documentation/public-api/

Requirements:
  - A VirusTotal API key is required to be entered into the configuration file.
    This can be obtained from virustotal.com.

  - The simplejson module must be present. (https://github.com/simplejson/simplejson)

Configuration Options:

  api_key: Your API key from virustotal.com. Leave this blank to disable the
  plug-in.

  submit [on|off]: Whether you want to submit files to VT or not.

Output:
   The results from VirusTotal retrieval or submission will be placed into
   virustotal.txt.

Note:
   Unless special arrangements are made, VT will not let you send more than 4
   queries in a 1 minute timeframe. You may receive errors if you do.

"""

__version__ = "$Id: 8603d09770a593e2a2f9c03f2fa34aa6f6440112 $"

import logging
import simplejson
import urllib
import urllib2
import os
import socket

import mastiff.plugins as plugins
import mastiff.plugins.category.generic as gen

class GenVT(gen.GenericCat):
    """VirusTotal plugin code."""

    def __init__(self):
        """Initialize the plugin."""
        self.api_key = None
        gen.GenericCat.__init__(self)

    def retrieve(self,  md5):
        """
           Retrieve results for this hash from VT.
           This code based on the code from the VT API documentation.
        """

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.retrieve')

        url = "https://www.virustotal.com/vtapi/v2/file/report"
        parameters =  dict()
        parameters['apikey'] = self.api_key
        # set resource to the MD5 hash of the file
        parameters['resource'] = md5

        # set up request
        log.debug('Submitting request to VT.')

        data = urllib.urlencode(parameters)
        try:
            req = urllib2.Request(url, data)
            response = urllib2.urlopen(req)
        except urllib2.HTTPError, err:
            log.error('Unable to contact URL: %s',  err)
            return None
        except urllib2.URLError, err:
            log.error('Unable to open connection: %s', err)
            return None
        except:
            log.error('Unknown Error when opening connection.')
            return None

        json = response.read()
        try:
            response_dict = simplejson.loads(json)
        except simplejson.decoder.JSONDecodeError:
            log.error('Error in VT JSON response. Are you submitting too fast?')
            return None
        else:
            log.debug('Response received.')
            return response_dict

    def submit(self, config, filename):
        """
            Submit a file to VT for analysis.
            This code based on the code from the VT API documentation.

            Note: This function will likely fail if a proxy is used.
        """
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.submit')

        try:
            outdir = config.get_var('Dir', 'log_dir')
            vt_file  = open(outdir + os.sep + 'virustotal.txt', 'w')
        except IOError,  err:
            log.error('Unable to open %s for writing: %s',
                      outdir + 'virustotal.txt',  err)
            return False

        # make sure we are allowed to submit
        if config.get_bvar(self.name, 'submit') == False:
            log.info('Submission disabled. Not sending file.')
            vt_file.write('File does not exist on VirusTotal.\n')
            vt_file.write('Submission is disabled, not sending file.\n')
            vt_file.close()
            return False

        log.info('Sending file to VirusTotal')

        # send file to VT
        host = "www.virustotal.com"
        method = 'https'
        selector = "/vtapi/v2/file/scan"
        fields = [("apikey", config.get_var(self.name, 'api_key'))]
        file_to_send = open(filename, "rb").read()
        files = [("file", os.path.basename(filename), file_to_send)]
        try:
            json = simplejson.loads(plugins.post_multipart(host, method, selector,
                                                           fields, files))
        except socket.error, err:
            log.error('Unable to send file: %s' % err)
            return False

        # check for success
        if json['response_code'] != 1:
            # error
            log.error('Could not submit to VT:\n%s', json['verbose_msg'])
            return False

        # write to file
        vt_file.write(json['verbose_msg'] + '\n')
        vt_file.write('Link:\n' + json['permalink'] + '\n')
        vt_file.close()

        return True

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        self.api_key = config.get_var(self.name,  'api_key')
        if self.api_key is None or len(self.api_key) == 0:
            log.error('No VirusTotal API Key - exiting.')
            return False

        md5 = config.get_var('Misc',  'hashes')[0]

        response = self.retrieve(md5)
        if response is None:
            # error occurred
            log.error('Did not get a response from VT. Exiting.')
            return False

        # response of 1 means it has been scanned on VT before
        # response of 0 means that is has not
        if response['response_code'] != 1:
            # The file has not been submitted
            self.submit(config, filename)
        else:
            # write response to file
            self.output_file(config.get_var('Dir',  'log_dir'), response)

        return True

    def output_file(self, outdir, response):
        """Format the output from VT into a file. """
        log = logging.getLogger('Mastiff.Plugins.' + self.name + 'output_file')

        try:
            vt_file  = open(outdir + os.sep + 'virustotal.txt',  'w')
        except IOError,  err:
            log.error('Unable to open %s for writing: %s',
                      outdir + 'virustotal.txt',  err)
            return False

        vt_file.write('VirusTotal Results for %s\n' % response['md5'])
        vt_file.write('Last scan date: %s\n' % response['scan_date'])
        vt_file.write('Total positive results: %d/%d\n' % \
                      (response['positives'],  response['total']))
        vt_file.write('Link to virustotal.com:\n%s\n\n' % response['permalink'])

        if response['positives'] > 0:
            vt_file.write('{0:25} {1:15} {2:40}\n'.format('AV', 'Version', 'Results'))

            for av_key in sorted(response['scans'].keys(), key=lambda s: s.lower()):

                if response['scans'][av_key]['detected'] == True:
                    out_str = '{0:25} {1:15} {2:40}\n'
                    out_str = out_str.format(av_key, \
                                             response['scans'][av_key]['version'], \
                                             response['scans'][av_key]['result'])
                    vt_file.write(out_str)

        vt_file.close()
        return True


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-virustotal.yapsy-plugin
================================================
[Core]
Name = VirusTotal
Module = GEN-virustotal

[Documentation]
Description = VirusTotal.com Submission Plug-in
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-yara.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Yara Plugin

Plugin Type: Generic
Purpose:
  This plug-in allows the use of Yara plug-ins to be run on the file being
  analyzed. Yara rules are specified through a configuration option and all
  rules will be applied to the file.

Requirements:
  - Yara, libyara and yara-python must be installed.
    http://code.google.com/p/yara-project

Configuration Options:
[yara]
  yara_sigs = Base path to Yara signatures. This path will be recursed
              to find additional signatures. Files with ".yar" or ".yara" will
              be used.
              Leave blank to disable the plug-in.

Output:
   yara.txt - Output listing all matches found. This file will not be present
              if no matches were found.

Database:

  A new table named 'yara' will be created with the following fields:

    id INTEGER PRIMARY KEY = Primary key
    sid INTEGER DEFAULT NULL = ID of file being analyzed
    rule_name TEXT DEFAULT NULL = Name of the Yara rule matched
    meta TEXT DEFAULT NULL = Yara meta information
    tag TEXT DEFAULT NULL = Yara tag information
    rule_file TEXT DEFAULT NULL = Full path to rule file match is from
    file_offset INTEGER DEFAULT NULL = Offset in analyzed file match was found
    string_id TEXT DEFAULT NULL = ID of match variable from Yara rule
    data TEXT DEFAULT NULL = Data Yara rule matched on

  Only new information will be added to the database.
  The database is _NOT_ checked to see if old information is present.

NOTE:

  Since the Yara output can contain data that is in binary, any potential binary
  data is converted to hex. Within the string, the binary data will be
  represented as "backslash-xXX" with the XX being the hex equivalent.

  Please ensure all of your rules work in Yara before using them
  in mas.py.

"""

__version__ = "$Id: 0f0233e8220e4ca4a6677253006de25ecdb365f6 $"

import logging
import os
import sqlite3

try:
    import yara
except ImportError, error:
    print "GenYara: Could not import yara: %s" % error

import mastiff.sqlite as DB
import mastiff.plugins.category.generic as gen
import mastiff.plugins as plugins

class GenYara(gen.GenericCat):
    """Yara signature plug-in."""

    def __init__(self):
        """Initialize the plugin."""
        gen.GenericCat.__init__(self)
        self.filename = ""

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')
        self.filename = filename

        # get my config options
        plug_opts = config.get_section(self.name)
        if plug_opts is None:
            log.error('Could not get %s options.', self.name)
            return False
        elif len(plug_opts['yara_sigs']) == 0:
            log.debug('No yara_sigs parameter. Disabling plug-in.')
            return False

        # find all yara signature files
        sig_files = self.get_sigs(plug_opts['yara_sigs'])
        if sig_files is None or len(sig_files) == 0:
            log.debug('No signature files detected. Exiting plug-in.')
            return True

        # create sig dict of all files found.
        # namespace is the file name of the rule
        sig_dict = dict()
        for files in sig_files:
            sig_dict[files] = files

        # compile rules and run against file
        try:
            rules = yara.compile(filepaths=sig_dict)
        except yara.SyntaxError, err:
            log.error('Rule error: %s', err)
            return False

        # generate matches        
        try:
            matches = rules.match(self.filename, callback=self._debug_print)
        except yara.Error, err:
            log.error('Yara error: %s', err)
            return False        

        if len(matches) > 0:
            self.output_file(config.get_var('Dir','log_dir'), matches)
            self.output_db(config, matches)

        return True

    def get_sigs(self, sig_dir):
        """
           Recurse through a directory for Yara signature files.
           Files should end in ".yar" or "yara".
           Returns a list of signature files, None on errors.
        """
        # sanity check the path
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.get_sigs')
        if not os.path.isdir(os.path.expanduser(sig_dir)) \
        or not os.path.exists(os.path.expanduser(sig_dir)):
            log.error('%s is not a directory or does not exist.' % sig_dir)
            return None

        sig_files = list()

        # walk the directory
        for items in os.walk(os.path.expanduser(sig_dir)):
            # find each yara sig file in the dir
            for files in items[2]:
                if files.endswith('.yar') or \
                files.endswith('.yara'):
                    sig_files.append(items[0] + os.sep + files)

        return sig_files

    def _debug_print(self, data):
        """ Debug printing of Yara matches."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.match')

        if data['matches'] == True:
            for match in data['strings']:
                log.debug('Match: %s: %s' % (data['rule'], plugins.bin2hex(match[2])))

        return yara.CALLBACK_CONTINUE


    def output_file(self, outdir, matches):
        """Prints any Yara matches to a file named yara.txt."""

        out_file = open(outdir + os.sep + 'yara.txt', 'w')
        if len(matches) == 0:
            out_file.write('No Yara matches.')
        else:
            out_file.write('Yara Matches for %s\n' % self.filename)
            for item in matches:
                out_file.write('\nRule Name: %s\n' % item.rule)
                out_file.write('Yara Meta: %s\n' % item.meta)
                out_file.write('Yara Tags: %s\n' % item.tags)
                out_file.write('Rule File: %s\n' % item.namespace)
                out_file.write('Match Info:\n')
                for y_match in item.strings:
                    out_file.write('\tFile Offset: %d\n' % y_match[0])
                    out_file.write('\tString ID: %s\n' % y_match[1])
                    out_file.write('\tData: %s\n\n' % plugins.bin2hex(y_match[2]))
                out_file.write('*'*79 + '\n')

        out_file.close()

        return True

    def output_db(self, config, matches):
        """ Output any matches to the database. """
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output_db')

        db = DB.open_db_conf(config)
        if db is None:
            return False

        # add the table 'yara' if it doesn't exist
        if DB.check_table(db, 'yara') == False:
            fields = ['id INTEGER PRIMARY KEY',
                      'sid INTEGER DEFAULT NULL',
                      'rule_name TEXT DEFAULT NULL',
                      'meta TEXT DEFAULT NULL',
                      'tag TEXT DEFAULT NULL',
                      'rule_file TEXT DEFAULT NULL',
                      'file_offset INTEGER DEFAULT NULL',
                      'string_id TEXT DEFAULT NULL',
                      'data TEXT DEFAULT NULL' ]
            if not DB.add_table(db, 'yara', fields ):
                log.error('Unable to add "yara" database table.')
                return False

        sqlid = DB.get_id(db, config.get_var('Misc', 'hashes'))
        sel_query = 'SELECT count(*) FROM yara '
        sel_query += 'WHERE sid=? AND rule_name=? AND meta=? AND tag=? AND '
        sel_query += 'rule_file=? AND file_offset=? AND string_id=? AND data=? '
        query = 'INSERT INTO yara '
        query += '(sid, rule_name, meta, tag, rule_file, file_offset, string_id, data) '
        query += 'VALUES (?, ?, ?, ?, ?, ?, ?, ?)'

        cur = db.cursor()

        # go through all matches and insert into DB if needed
        try:
            for item in matches:
                for y_match in item.strings:
                    match_insert = ( sqlid, item.rule, str(item.meta), \
                                    str(item.tags), item.namespace, \
                                    y_match[0], y_match[1], plugins.bin2hex(y_match[2]), )
                    # check to see if its already in there
                    cur.execute(sel_query, match_insert)
                    if cur.fetchone()[0] == 0:
                        # not in the db already, add it in
                        log.debug('Adding %s match to database.' % (item.rule))
                        cur.execute(query, match_insert)
            db.commit()
        except sqlite3.Error, err:
            log.error('SQL error when adding item to DB: %s' % err)
            return False


        db.close()
        return True


================================================
FILE: mastiff/plugins/analysis/GEN/GEN-yara.yapsy-plugin
================================================
[Core]
Name = yara
Module = GEN-yara

[Documentation]
Description = Yara Signature Plug-in
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/GEN/__init__.py
================================================


================================================
FILE: mastiff/plugins/analysis/Office/Office-metadata.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Office MetaData Plug-in

Plugin Type: PDF
Purpose:
  Extracts any metadata from an Office document using exiftool
  (http://www.sno.phy.queensu.ca/~phil/exiftool/).

Output:
   metadata.txt - Contains selected pieces of metadata.

Requirements:
  The exiftool binary is required for this plug-in. The binary can be downloaded
  from http://www.sno.phy.queensu.ca/~phil/exiftool/.

Configuration Options:
[Office Metadata]
exiftool = Path to exiftool program
"""

__version__ = "$Id: 036849ac813bffb3d941d7ec24f8911f0a5f7da0 $"

import subprocess
import logging
import os

import mastiff.plugins.category.office as office

class OfficeMetadata(office.OfficeCat):
    """Office Metadata plug-in."""

    def __init__(self):
        """Initialize the plugin."""
        office.OfficeCat.__init__(self)
        self.page_data.meta['filename'] = 'office-metadata'

    def analyze(self, config, filename):
        """
        Obtain the command and options from the config file and call the
        external program.
        """
        # make sure we are activated
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        # get my config options
        plug_opts = config.get_section(self.name)
        if plug_opts is None:
            log.error('Could not get %s options.', self.name)
            return False

        # verify external program exists and we can call it
        if not plug_opts['exiftool'] or \
           not os.path.isfile(plug_opts['exiftool']) or \
           not os.access(plug_opts['exiftool'], os.X_OK):
            log.error('%s is not accessible. Skipping.', plug_opts['exiftool'])
            return False

        # run your external program here
        run = subprocess.Popen([plug_opts['exiftool']] + \
                               [ filename ],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)
        (output, error) = run.communicate()
        if error is not None and len(error) > 0:
            log.error('Error running program: {}'.format(error))
            return False

        metadata = dict()
        keywords = [ 'Author', 'Code Page', 'Comments', 'Company',
                     'Create Date', 'Current User', 'Error',
                     'File Modification Date/Time', 'File Type',
                     'Internal Version Number', 'Keywords',
                     'Last Modified By', 'Last Printed', 'MIME Type',
                     'Modify Date', 'Security', 'Software', 'Subject',
                     'Tag PID GUID', 'Template', 'Title', 'Title Of Parts',
                     'Total Edit Time', 'Warning']

        # set up output table
        new_table = self.page_data.addTable(title='Office Document Metadata')

        # grab only data we are interested in
        for line in output.split('\n'):
            if line.split(' :')[0].rstrip() in keywords:
                metadata[line.split(':')[0].rstrip()] = line.split(' :')[1].rstrip().lstrip(' ')

        if len(metadata) == 0:
            # no data
            log.warn("No PDF metadata detected.")
            new_table.addheader([('Message', str)], printHeader=False)
            new_table.addrow(['No Office metadata detected.' ])
        else:
            # set up output table
            new_table.addheader([('Data', str), ('Value', str)])
            # sort and add to table
            for key in sorted(metadata.iterkeys()):
                new_table.addrow([key, metadata[key]])

        log.debug ('Successfully ran %s.', self.name)
        return self.page_data


================================================
FILE: mastiff/plugins/analysis/Office/Office-metadata.yapsy-plugin
================================================
[Core]
Name = Office Metadata
Module = Office-metadata

[Documentation]
Description = Extract Office metadata from document.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/Office/Office-pyOLEScanner.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
pyOLEScanner.py Plug-in

Plugin Type: Office
Purpose:
  This plugin runs Giuseppe 'Evilcry' Bonfa's pyOLEScanner.py script.
  pyOLEScanner.py examines an Office document and looks for
  specific instances of malicious code.

Pre-requisites:
   - pyOLEScanner.py must be downloaded. It can be found at:
   https://github.com/Evilcry/PythonScripts/raw/master/pyOLEScanner.zip

Output:
   office-analysis.txt - File containing output from scan.
   deflated_doc/ - If Office document is an Office 2007 or later document,
                   it will be deflated and extracted into this directory.

Configuration Options:
[Office Metadata]
exiftool = Path to exiftool program

NOTE:
- An Error such as "('An Error Occurred:', 'no such table: BWList')" in the
  output file is normal and can be ignored.
- For OfficeX files, an error:

     Starting Deflate Procedure
     An error occurred during deflating

  may occur when the script is unable to unzip the archive.

"""

__version__ = "$Id: 4cff51f78ebe3e9404a8c73b1a0512383d600e1d $"

import subprocess
import logging
import os
import sys

import mastiff.plugins.category.office as office

class OfficepyOLEScanner(office.OfficeCat):
    """
       Wrapper for Giuseppe 'Evilcry' Bonfa's pyOLEScanner.py office analysis
       plug-in.
    """

    def __init__(self):
        """Initialize the plugin."""
        office.OfficeCat.__init__(self)

    def analyze(self, config, filename):
        """
        Obtain the command and options from the config file and call the
        external program.
        """
        # make sure we are activated
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')            

        # get my config options
        plug_opts = config.get_section(self.name)
        if plug_opts is None:
            log.error('Could not get %s options.', self.name)
            return False

        # verify external program exists and we can call it
        if not plug_opts['olecmd'] or \
           not os.path.isfile(plug_opts['olecmd']) or \
           not os.access(plug_opts['olecmd'], os.X_OK):
            log.error('%s is not accessible. Skipping.', plug_opts['olecmd'])
            return False

        # we need to change dir to log_dir as pyOLEScanner.py places files in
        # the directory we run in
        my_dir = os.getcwd()        
        if os.path.isabs(filename) is False:            
            # we need to update the filename to point to the right file
            filename = my_dir + os.sep + filename            
            
        os.chdir(config.get_var('Dir','log_dir'))

        run = subprocess.Popen([sys.executable] + [plug_opts['olecmd']] + \
                               [ filename ],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)
        (output, error) = run.communicate()
        if error is not None and len(error) > 0:
            log.error('Error running program: %s' % error)
            os.chdir(my_dir)
            return False

        # ole2.sqlite is created by pyOLEScanner.py, but is not usable to us
        # so lets delete it
        try:
            if os.path.isfile('ole2.sqlite'):
                os.remove('ole2.sqlite')
                log.debug('Deleted ole2.sqlite.')
        except OSError, err:
            log.error('Unable to delete ole2.sqlite: %s', err)            

        # change directories back
        os.chdir(my_dir)

        self.output_file(config.get_var('Dir','log_dir'), output)
        log.debug ('Successfully ran %s.', self.name)

        return True

    def output_file(self, outdir, data):
        """Place the data into a file."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name)

        try:
            out_file = open(outdir + os.sep + "office-analysis.txt",'w')
        except IOError, err:
            log.error('Write error: %s', err)
            return False

        out_file.write(data)
        out_file.close()
        return True


================================================
FILE: mastiff/plugins/analysis/Office/Office-pyOLEScanner.yapsy-plugin
================================================
[Core]
Name = Office pyOLEScanner
Module = Office-pyOLEScanner

[Documentation]
Description = pyOLEScanner plug-in based on Giuseppe 'Evilcry' Bonfa's code.
Author = Tyler Hudak/Giuseppe 'Evilcry' Bonfa
Version = 1.0
Website = www.korelogic.com / https://github.com/Evilcry/PythonScripts/raw/master/pyOLEScanner.zip
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/Office/__init__.py
================================================


================================================
FILE: mastiff/plugins/analysis/PDF/PDF-metadata.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
PDF MetaData Plug-in

Plugin Type: PDF
Purpose:
  Extracts any metadata from a PDF using exiftool (http://www.sno.phy.queensu.ca/~phil/exiftool/)

Output:
   metadata.txt - Contains selected pieces of extracted metadata.

Requirements:
  The exiftool binary is required for this plug-in. The binary can be downloaded
  from http://www.sno.phy.queensu.ca/~phil/exiftool/.

TODO:
  Exiftool will miss some metadata, especially if the Info object is present but
  not specified. Future versions of this plug-in will brute force the metadata,
  but PDF-parsing code needs to be written (or import pdf-parser.py).

Configuration Options:
[PDF Metadata]
exiftool = Path to exiftool program
"""

__version__ = "$Id: 0ba78966f263ce6cb3ec0447e392d8c544baa55f $"

import subprocess
import logging
import os

import mastiff.plugins.category.pdf as pdf

class PDFMetadata(pdf.PDFCat):
    """PDF Metadata plug-in."""

    def __init__(self):
        """Initialize the plugin."""
        pdf.PDFCat.__init__(self)
        self.page_data.meta['filename'] = 'pdf-metadata'

    def analyze(self, config, filename):
        """
        Obtain the command and options from the config file and call the
        external program.
        """
        # make sure we are activated
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        # get my config options
        plug_opts = config.get_section(self.name)
        if plug_opts is None:
            log.error('Could not get %s options.', self.name)
            return False

        # verify external program exists and we can call it
        if not plug_opts['exiftool'] or \
           not os.path.isfile(plug_opts['exiftool']) or \
           not os.access(plug_opts['exiftool'], os.X_OK):
            log.error('%s is not accessible. Skipping.', plug_opts['exiftool'])
            return False

        # run your external program here
        run = subprocess.Popen([plug_opts['exiftool']] + \
                               [ filename ],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)
        (output, error) = run.communicate()
        if error is not None and len(error) > 0:
            log.error('Error running program: {}'.format(error))
            return False

        metadata = dict()
        keywords = [ 'Creator', 'Create Date', 'Title', 'Author', 'Producer',
                     'Modify Date', 'Creation Date', 'Mod Date', 'Subject',
                     'Keywords', 'Author', 'Metadata Date', 'Description',
                     'Creator Tool', 'Document ID', 'Instance ID', 'Warning']

        # grab only data we are interested in
        for line in output.split('\n'):
            if line.split(' :')[0].rstrip() in keywords:
                metadata[line.split(':')[0].rstrip()] = line.split(' :')[1].rstrip()

        new_table = self.page_data.addTable(title='PDF Document Metadata')

        if len(metadata) == 0:
            # no data
            log.warn("No PDF metadata detected.")
            new_table.addheader([('Message', str)], printHeader=False)
            new_table.addrow(['No PDF metadata detected.' ])
        else:
            # set up output table
            new_table.addheader([('Data', str), ('Value', str)])
            # sort and add to table
            for key in sorted(metadata.iterkeys()):
                new_table.addrow([key, metadata[key]])

        log.debug ('Successfully ran %s.', self.name)

        return self.page_data


================================================
FILE: mastiff/plugins/analysis/PDF/PDF-metadata.yapsy-plugin
================================================
[Core]
Name = PDF Metadata
Module = PDF-metadata

[Documentation]
Description = Extract PDF metadata from document.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/PDF/PDF-pdfid.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
pdfid.py Plug-in

Plugin Type: PDF
Purpose:
  Run Didier Stevens' pdfid.py script against a PDF and place the results into
  a file.

Output:
   pdfid.txt - Output of pdfid.py.

Requirements:
   The pdfid.py script must be installed.

Configuration Options:

   [pdfid]
   pdfid_cmd - Path to the pdfid.py script. Must be executable.
   pdfid_opts - Options to give to the script. Can be empty.

"""

__version__ = "$Id: a83e6c90f42bdd7ada3f1393dc749b5b61668c4e $"

import subprocess
import logging
import os
import sys

import mastiff.plugins.category.pdf as pdf

class PDFid(pdf.PDFCat):
    """Run Didier Stevens pdfid.py"""

    def __init__(self):
        """Initialize the plugin."""
        pdf.PDFCat.__init__(self)
        self.page_data.meta['filename'] = 'pdf-id'

    def analyze(self, config, filename):
        """
        Obtain the command and options from the config file and call the
        external program.
        """
        # make sure we are activated
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        # get my config options
        plug_opts = config.get_section(self.name)
        if plug_opts is None:
            log.error('Could not get %s options.',  self.name)
            return False

        # verify external program exists and we can call it
        if not plug_opts['pdfid_cmd'] or \
           not os.path.isfile(plug_opts['pdfid_cmd']) or \
           not os.access(plug_opts['pdfid_cmd'], os.X_OK):
            log.error('%s is not accessible. Skipping.',  plug_opts['pdfid_cmd'])
            return False
        elif len(plug_opts['pdfid_cmd']) == 0:
            log.debug('Plug-in disabled.')
            return False

        # options cannot be empty - at least have a blank option
        if 'pdfid_opts' not in plug_opts:
            plug_opts['pdfid_opts'] = ''
        elif len(plug_opts['pdfid_opts']) == 0:
            plug_opts['pdfid_opts'] = ''
        else:
            plug_opts['pdfid_opts'] = plug_opts['pdfid_opts'].split()

        # run pdfid.py here
        try:
            run = subprocess.Popen([plug_opts['pdfid_cmd']] + \
                               list(plug_opts['pdfid_opts']) + \
                               [ filename ],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)
            (output, error) = run.communicate()
        except:
            log.error('Error executing pdfid.py: {}'.format(sys.exc_info()[0]))
            return False

        if error is not None and len(error) > 0:
            log.error('Error running program: {}'.format(error))
            return False

        # parse through output
        if 'PDF Header' in output.split('\n')[1]:
            # By default, pdfid.py displays the PDF header as the first. This is different enough from the
            # other data extracted it should be in its own table.
            header_table = self.page_data.addTable(title='PDF Header')
            header_table.addheader([('Name', str), ('Value', str)], printHeader=False)
            header_table.addrow(output.split('\n')[1].lstrip().split(': '))


        # grab the rest of the data
        if 'PDF Header' in output.split('\n')[1]:
            pdf_objects = [ x.lstrip().split() for x in output.split('\n')[2:] ]
        else:
            pdf_objects = [ x.lstrip().split() for x in output.split('\n')[1:] ]

        new_table = self.page_data.addTable(title='PDF Objects')
        new_table.addheader([('Object___Name', str), ('Count', int)])
        [ new_table.addrow([my_obj[0], my_obj[1]]) for my_obj in pdf_objects if my_obj ]

        log.debug ('Successfully ran %s.', self.name)

        return self.page_data


================================================
FILE: mastiff/plugins/analysis/PDF/PDF-pdfid.yapsy-plugin
================================================
[Core]
Name = pdfid
Module = PDF-pdfid

[Documentation]
Description = Run Didier Stevens' pdfid.py script
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/PDF/PDF-pdfparser.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
PDF-pdfparser

Plugin Type: PDF
Purpose:
  This plug-in uses Didier Stevens pdf-parser.py code to perform two tasks:

  - Writes an uncompressed copy of the PDF to a file named uncompressed-pdf.txt
  - Searches the PDF for keywords in objects, specified by the
    self.interesting_objects list, and writes those objects, and any they
    reference, to a file in pdf-objects/.

  All rights for pdf-parser.py belong to Didier Stevens.

Requirements:
  - Didier Stevens pdf-parser.py must be installed.
    (http://blog.didierstevens.com/programs/pdf-tools/)

Configuration Options:

[pdf-parser]
pdf_cmd = Path to pdf-parser.py

"""

__version__ = "$Id: e784c089c5df767e0b92109f46fd67ec540973a3 $"

import os
import subprocess
import logging
import re

import mastiff.queue as queue
import mastiff.plugins.category.pdf as pdf

class PDFparser(pdf.PDFCat):
    """Plug-in to run Didier Stevens pdf-parser.py script."""

    def __init__(self):
        """Initialize the plugin."""
        pdf.PDFCat.__init__(self)

        # list of objects we want to search for
        self.interesting_objects = [ 'JavaScript', 'JS', 'OpenAction', 'AA' ]

    def analyze(self, config, filename):
        """
        Obtain the command and options from the config file and call the
        external program.
        """
        # make sure we are activated
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        # get my config options
        plug_opts = config.get_section(self.name)
        if plug_opts is None:
            log.error('Could not get %s options.', self.name)
            return False

        # verify external program exists and we can call it
        if not plug_opts['pdf_cmd'] or \
           not os.path.isfile(plug_opts['pdf_cmd']) or \
           not os.access(plug_opts['pdf_cmd'], os.X_OK):
            log.error('%s is not accessible. Skipping.', plug_opts['pdf_cmd'])
            return False

        self.uncompress(config, plug_opts, filename)
        self.get_objects(config, plug_opts, filename)

        log.debug ('Successfully ran %s.', self.name)
        return True

    def output_object(self, plug_opts, pdf_file, obj_num, reasons, log_dir):
        """
           Run pdf-parser to extract a given obj_num and place
           it into the log_dir directory, in the form obj-#.txt.
        """
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.outobj')

        # create the dir if it doesn't exist
        log_dir = log_dir + os.sep + 'pdf-objects'
        if not os.path.exists(log_dir):
            try:
                os.makedirs(log_dir)
            except IOError,  err:
                log.error('Unable to create dir %s: %s' % (log_dir, err))
                return False

        # if we get the obj_num in the form "12 0", remove the gen #
        if ' ' in obj_num:
            # contains whitespace
            obj_num = obj_num.split(' ')[0]

        filename = log_dir + os.sep + 'obj-' + obj_num + '.txt'

        # have pdf-parser extract the object for us
        options = list(['-o ' + obj_num, '-f', '-w'])
        run = subprocess.Popen([plug_opts['pdf_cmd']] + \
                               options + \
                               [ pdf_file ],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)
        (output, error) = run.communicate()
        if error is not None and len(error) > 0:
            log.error('Unable to extract object %s.' % obj_num)
            return False

        # output the file - we don't use the pdf-parser.py -d option as
        # there are times it errors out when attempting to dump an object
        with open(filename, 'w') as out_file:
            out_file.write('Object %s\n' % obj_num)
            out_file.write('Flagged due to:\n')
            for why in reasons:
                out_file.write('\t%s\n' % why)
            out_file.write('\n')
            out_file.write(output)

        return True

    def get_objects(self, config, plug_opts, filename):
        """ Search through the PDF for objects associated with malicious
            activity and extract those into their own file.
        """
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.get_objects')
        log.info('Extracting interesting objects.')

        #objects = list()
        objects = dict()

        for keyword in self.interesting_objects:
            # let pdf-parser.py grab the object containing our keywords
            run = subprocess.Popen([plug_opts['pdf_cmd']] + \
                                             ['--search=' + keyword ] +
                                             [ filename ],
                                             stdin=subprocess.PIPE,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE, 
                                             close_fds=True)
            (output, error) = run.communicate()
            # skip anything that gives us an error
            if error is not None and len(error) > 0:
                continue

            # go through pdf-parser output and grab any objects and
            # their referenced objects to dump
            for line in output.split('\n'):
                obj_match = re.match('obj\s+([0-9]+\s+[0-9]+)', line)
                ref_match = re.search('Referencing: ([0-9]+\s+[0-9\s,R]+)', line)

                if obj_match is not None:
                    # obj # #
                    cur_obj = obj_match.group(1)
                    if cur_obj not in objects.keys():
                        objects[cur_obj] = list()
                    objects[cur_obj].extend(['Keyword: %s' % keyword ])
                    log.debug('Adding object %s for keyword %s' % (cur_obj, keyword))
                elif ref_match is not None:
                    # Referenced by: object list
                    for ref_obj in \
                    [ x.lstrip()[:-2] for x in ref_match.group(1).split(',')]:
                        if ref_obj not in objects.keys():
                            # item not created yet
                            objects[ref_obj] = list()
                        if 'Referenced by %s' % cur_obj not in objects[ref_obj]:
                            # make sure we didn't add already
                            objects[ref_obj].extend(['Referenced by %s' % cur_obj ])
                            log.debug('Adding object %s from reference "%s"' % (ref_obj, cur_obj))

        # output collected objects to file
        for my_obj in objects.keys():
            self.output_object(plug_opts,
                               filename,
                               my_obj,
                               objects[my_obj],
                               config.get_var('Dir', 'log_dir'))

    def uncompress(self, config, plug_opts,  filename):
        """ Uncompress the PDF using pdf-parser.py """
        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.uncompress')
        log.info('Uncompressing PDF.')
        
        feedback = config.get_bvar(self.name,  'feedback')
        if feedback is True:
            job_queue = queue.MastiffQueue(config.config_file)
        else:
            job_queue = None        

        # run pdf-parser with -w (raw) and -f (decompress) opts
        run = subprocess.Popen([plug_opts['pdf_cmd']] + \
                               ['-w', '-f' ] +
                               [ filename ],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               close_fds=True)

        (output, error) = run.communicate()
        if error is not None and len(error) > 0:
            log.error('Unable to uncompress PDF: %s.' % filename)
            return False

        self.output_file(config.get_var('Dir', 'log_dir'), output)
        
        if job_queue is not None and feedback is True and not filename.endswith('uncompressed-pdf.txt'):
            log.info('%s' % filename)
            log.info('Adding uncompressed PDF to queue.')
            job_queue.append(config.get_var('Dir', 'log_dir') + os.sep + "uncompressed-pdf.txt")

    def output_file(self, outdir, data):
        """Place the data into a file."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name)

        try:
            out_file = open(outdir + os.sep + "uncompressed-pdf.txt",'w')
        except IOError, err:
            log.error('Write error: %s', err)
            return False

        out_file.write(data)
        out_file.close()
        return True


================================================
FILE: mastiff/plugins/analysis/PDF/PDF-pdfparser.yapsy-plugin
================================================
[Core]
Name = pdf-parser
Module = PDF-pdfparser

[Documentation]
Description = Use Didier Stevens pdf-parser.py to uncompress PDF and find interesting objects.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/PDF/__init__.py
================================================


================================================
FILE: mastiff/plugins/analysis/ZIP/ZIP-extract.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Zip archive extract plug-in.

Plugin Type: ZIP
Purpose:
  Extract all of the files within the archive into a directory.

  If the filename contains an absolute path or '..'s, they are removed before
  extraction occurs.

Configuration Options:

  enabled = [on|off]: Whether you want to submit files to VT or not.

Output:
   Extracts all of the files in the archive to log_dir/zip_contents.

"""

__version__ = "$Id: ed40be29fdba1a1b71bcb47d5c5933a737f2a4b2 $"

import logging
import os
import zipfile
import struct

import mastiff.plugins.category.zip as zip
import mastiff.queue as queue

class ZIP_Extract(zip.ZipCat):
    """Zip archive extraction plug-in."""

    def __init__(self):
        """Initialize the plugin."""
        zip.ZipCat.__init__(self)

    def activate(self):
        """Activate the plugin."""
        zip.ZipCat.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        zip.ZipCat.deactivate(self)

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False

        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        feedback = config.get_bvar(self.name,  'feedback')

        if feedback is True:
            job_queue = queue.MastiffQueue(config.config_file)
        else:
            job_queue = None

        # make sure we are enabled
        if config.get_bvar(self.name, 'enabled') is False:
            log.info('Disabled. Exiting.')
            return True

        try:
            my_zip = zipfile.ZipFile(filename, 'r', allowZip64=True)
        except (zipfile.BadZipfile, IOError, struct.error), err:
            log.error('Unable to open zip file: {}'.format(err))
            return False

        log_dir = config.get_var('Dir', 'log_dir')
        log_dir += os.sep + 'zip_contents'
        try:
            os.mkdir(log_dir)
        except OSError, err:
            # dir already exists, skip
            pass

        # grab password if one exists
        pwd = config.get_var(self.name, 'password')
        if pwd is not None and len(pwd) > 0:
            log.info('Password \"{}\" will be used for this zip.'.format(pwd))

        # cycle through files and extract them
        for file_member in my_zip.namelist():            

            # if its an absolute directory, remove os.sep
            if file_member[0:1] == os.sep:
                log.info('Zip member \"{}\" contains absolute path. Stripping.'.format(file_member))
                zipfile_name = os.path.normpath(file_member[1:])
            
            try:
                zipfile_name = unicode(os.path.normpath(file_member))
            except UnicodeDecodeError:
                 zipfile_name = unicode(os.path.normpath(file_member), errors='replace')

            # warn about the ..'s, normpath above removes them
            if os.pardir in file_member:
                log.warning('File contains ..s: {}'.format(file_member))

            # we can't just blindly extract in case there are absolute paths or '..'s
            # so we read in the file, create any directories, and write it out
            try:
                log.debug(u'Creating directory {}.'.format(os.path.dirname(zipfile_name)))
                os.makedirs(log_dir + os.sep + os.path.dirname(zipfile_name))
            except OSError, err:
                log.debug(u'Directory {} already exists.'.format(os.path.dirname(zipfile_name)))

            if len(os.path.basename(file_member)) == 0:
                try:
                    log.debug('{} is just a directory. Not creating file.'.format(file_member))
                except UnicodeEncodeError:
                    log.debug('{} is just a directory. Not creating file.'.format(file_member.encode('utf-8')))
                continue                

            log.info(u'Extracting {}.'.format(zipfile_name))

            try:
                in_file = my_zip.open(file_member, 'r', pwd=pwd)
                data = in_file.read()
                in_file.close()
            except RuntimeError, err:
                log.error('Problem extracting: {}'.format(err.message.encode('utf-8')))
                continue
            except (IOError, zipfile.BadZipfile) as err:
                log.error('Problem extracting {}.'.format(file_member))
                log.error('Possible obfuscation or corruption: {}'.format(err.message))
                continue

            try:
                outfile = open(log_dir + os.sep + zipfile_name, 'w')
                outfile.write(data)
                outfile.close()

            except IOError, err:
                log.error('Could not write file: {}'.format(err))
                return False

            # now feed back to mastiff if asked to
            if job_queue is not None and feedback is True:
                log.info('Adding {} to queue.'.format(zipfile_name.encode('utf-8')))
                job_queue.append(log_dir + os.sep + zipfile_name)

        my_zip.close()

        return True


================================================
FILE: mastiff/plugins/analysis/ZIP/ZIP-extract.yapsy-plugin
================================================
[Core]
Name = ZipExtract
Module = ZIP-extract

[Documentation]
Description = Extract zip archive contents.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/ZIP/ZIP-zipinfo.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Zipinfo Analysis Plug-in

Plugin Type: ZIP
Purpose:
  This plug-in extracts metadata information stored within a zip archive
  for the analysis.

  Alot of information was taken from
  http://www.pkware.com/documents/casestudies/APPNOTE.TXT.

TO DO:
  - Decode external attributes.
  - Decode extra data.

Output:
   zipinfo.txt - File containing all of the metadata.

"""

__version__ = "$Id: eabccb2f29d8d5bd52fc2fb77e8e180ed3a4e875 $"

import os
import logging
import zipfile
import codecs
import struct

import mastiff.plugins.category.zip as zip

class ZIP_Info(zip.ZipCat):
    """Class to extract zip metadata and place it into a file."""

    def __init__(self):
        """Initialize the plugin."""
        zip.ZipCat.__init__(self)
        self.page_data.meta['filename'] = 'zipinfo'

    def activate(self):
        """Activate the plugin."""
        zip.ZipCat.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        zip.ZipCat.deactivate(self)

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        # grab the info out of the file
        try:
            my_zip = zipfile.ZipFile(filename, 'r')
            info_list = my_zip.infolist()
        except (zipfile.BadZipfile, IOError, struct.error), err:
            log.error('Unable to open or process zip file: {}'.format(err))
            return False

        info_table = self.page_data.addTable(title='Zip Archive Information')
        info_table.addheader([('Data', str), ('Value', str)], printHeader=False)

        info_table.addrow(['File Name', os.path.basename(filename) ])

        if my_zip.comment is None or len(my_zip.comment) == 0:
            info_table.addrow(['Comment', 'This file has no comment.'])
        else:
            # ignore any unprintable unicode characters
            info_table.addrow(['Comment', unicode("%s" % (my_zip.comment),  errors='ignore')])

        if len(my_zip.filelist) > 0:
            self.quick_info(info_list)
            self.full_info(info_list)
        else:
            info_table.addrow(['Warning', 'Zip archive has no files.'])

        my_zip.close()

        return self.page_data

    def quick_info(self, info_list):
        """ Obtain quick directory listing of the archive with some information."""

        quick_table = self.page_data.addTable('Quick Info')
        quick_table.addheader([('Modification___Date', str), ('File___Size', int), ('File___Name', str)])

        for file_info in info_list:
            date_str = "%02d/%02d/%d %02d:%02d:%02d" % \
            (file_info.date_time[1], file_info.date_time[2], file_info.date_time[0], \
             file_info.date_time[3], file_info.date_time[4], file_info.date_time[5])

            # if file is encrypted, flag it
            try:
                filename = unicode(file_info.filename)
            except UnicodeDecodeError, err:
                filename = unicode(file_info.filename, 'utf-8', 'replace')

            if file_info.flag_bits & 0x1 == 0x1:
                filename = '* ' + filename

            quick_table.addrow([date_str, file_info.file_size, filename])

        return

    def _version_created(self, version):
        """ Return a string containing the system that created the archive.
             Taken from http://www.pkware.com/documents/casestudies/APPNOTE.TXT
        """
        sys_list = ["MS-DOS, OS/2, FAT/VFAT/FAT32", "Amiga", "OpenVMS",  "UNIX",
                    "VM/CMS",  "Atari ST",  "OS/2 H.P.F.S.",  "Macintosh",
                    "Z-System",  "CP/M",  "Windows NTFS",  "MVS (OS/390 - Z/OS)",
                    "VSE",  "Acorn Risc",  "VFAT",  "alternative MVS",  "BeOS",
                    "Tandem",  "OS/400",  "OS X Darwin",  "Unknown"]
        if version > 20:
            version = 19

        return sys_list[version]

    def _flag_bits(self, flag_bits, method):
        """ Returns a string containing the explanation of the flag bits. """

        output = ""
        if flag_bits & 0x1 == 0x1:
            output += " "*24 + "- This file is encrypted.\n"

        if method == 6:
            # Imploding
            if flag_bits & 0x2 == 0x2:
                output += " "*24 + "- 8K sliding dictionary used for compression.\n"
            else:
                output += " "*24 + "- 4K sliding dictionary used for compression.\n"
            if flag_bits & 0x4 == 0x4:
                output += " "*24 + "- 3 Shannon-Fano trees used for sliding dictionary.\n"
            else:
                output += " "*24 + "- 2 Shannon-Fano trees used for sliding dictionary.\n"
        elif method == 8 or method == 9:
            # Deflating
            if flag_bits & 0x6 == 0:
                output += " "*24 + "- Normal (-en)"
            elif flag_bits & 0x6 == 0x2:
                output += " "*24 + "- Maximum (-exx/-ex)"
            elif flag_bits & 0x6 == 0x4:
                output += " "*24 + "- Fast (-ef)"
            elif flag_bits & 0x6 == 0x6:
                output += " "*24 + "- Super Fast (-es)"
            else:
                output += " "*24 + "- UNKNOWN"
            output += " compression option was used.\n"
        elif method == 14:
            # LZMA
            if flag_bits & 0x02 == 0x02:
                output += " "*24 + "- EOS marker indicates end of compressed data stream.\n"

        if flag_bits & 8 == 8:
            output += " "*24 + "- Correct values for CRC-32 and sizes are in data descriptor.\n"

        if flag_bits & 32 == 32:
            output += " "*24 + "- File is compressed patched data.\n"

        if flag_bits & 64 == 64:
            output += " "*24 + "- Strong encryption is used.\n"

        if flag_bits & 2048 == 2048:
            output += " "*24 + "- Filename and comments must be encoded in UTF-8.\n"

        if flag_bits & 8192 == 8192:
            output += " "*24 + "- Central Directory encrypted."

        return output

    def _compression_method(self, method):
        """ Returns a string describing the compression method used. """

        methods = [ 'no compression', 'Shrunk',
                   'Reduced with compression factor 1',
                  'Reduced with compression factor 2',
                  'Reduced with compression factor 3',
                  'Reduced with compression factor 4', 'Imploded',
                  'Tokenizing compression algorithm', 'Deflated',
                  'Enhanced Deflating using Deflate64(tm)',
                  'PKWARE Data Compression Library Imploding (old IBM TERSE)',
                  'Reserved by PKWARE', 'BZIP2 algorithm', 'Reserved by PKWARE',
                  'LZMA (EFS)', 'Reserved by PKWARE', 'Reserved by PKWARE',
                  'Reserved by PKWARE', 'IBM TERSE (new)',
                  'IBM LZ77 z Architecture (PFS)', 'WavPack compressed',
                  'PPMd version I, Rev 1',  'UNKNOWN']

        if method == 97:
            method = 20
        elif method == 98:
            method = 21
        elif method > 19:
            method = 22

        return methods[method]

    def _internal_attribs(self, attrib):
        """ Returns a string describing the internal attributes."""

        output = ""
        if attrib & 0x01 == 0x01:
            output += " "*24 + "- File is apparently ASCII or text.\n"

        """ NOTE: bit 0x0002 means that a 4 byte variable record length
             field is present, but this info doesn't seem useful in this case.
        """
        return output

    def full_info(self, info_list):
        """ Obtain a full set of information for each file within the archive. """

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.fileinfo')

        full_table = self.page_data.addTable('Zip Archive File Info')

        try:
            for file_info in info_list:
                my_headers = list()
                my_output = list()

                my_headers.append(('File___Name', str))
                try:
                    my_output.append(unicode(file_info.filename))
                except UnicodeDecodeError, err:
                    my_output.append(unicode(file_info.filename, errors='replace' ))


                date_str = "%02d/%02d/%d %02d:%02d:%02d" % \
                (file_info.date_time[1], file_info.date_time[2], file_info.date_time[0], \
                file_info.date_time[3], file_info.date_time[4], file_info.date_time[5])

                my_headers.append(('Last___Modification___Date', str))
                my_output.append(date_str)

                #(file_info.compress_type, self._compression_method(file_info.compress_type))
                my_headers.append(('Compression___Type', str))
                my_output.append("%d - %s" % (file_info.compress_type, self._compression_method(file_info.compress_type)))

                my_headers.append(('File___Comment', str))
                if file_info.comment is None or len(file_info.comment) == 0:
                    my_output.append('None')
                else:
                    my_output.append(u"%s\n" % file_info.comment)

                #(self._version_created(file_info.create_system), file_info.create_system)
                my_headers.append(('Creation___System', str))
                my_output.append("%s (%d)" % (self._version_created(file_info.create_system), file_info.create_system))

                my_headers.append(('PKZIP___creation___version', str))
                my_output.append(file_info.create_version)

                my_headers.append(('Version___to___extract', str))
                my_output.append(file_info.extract_version)

                my_headers.append(('Flag___bits', str))
                my_output.append("0x%x\n%s" % (file_info.flag_bits, self._flag_bits(file_info.flag_bits, file_info.compress_type).rstrip('\n')))

                my_headers.append(('Volume___number', str))
                my_output.append(file_info.volume)

                my_headers.append(('Internal___attributes', str))
                my_tmpstr = self._internal_attribs(file_info.internal_attr)
                if len(my_tmpstr) > 0:
                    my_output.append("0x%x\n%s" % (file_info.internal_attr, my_tmpstr))
                else:
                    my_output.append("0x%x" % (file_info.internal_attr))

                my_headers.append(('External___attributes', str))
                my_output.append("0x%x" % file_info.external_attr)

                my_headers.append(('CRC32', str))
                my_output.append(file_info.CRC)

                my_headers.append(('Header___offset', str))
                my_output.append(file_info.header_offset)

                my_headers.append(('Compressed___size', str))
                my_output.append(file_info.compress_size)

                my_headers.append(('Uncompress___size', str))
                my_output.append(file_info.file_size)

                my_headers.append(('Extra___Data', str))

                if file_info.extra is not None:
                    my_output.append('This file entry contains extra data. Not supported yet.')
                else:
                    my_output.append('No extra data.')

                # add the header if necessary
                if full_table.header is None:
                    full_table.addheader(my_headers, printVertical=True)
                full_table.addrow(my_output)

        except ImportError:
            log.error('Error obtaining file information from archive for {}.'.format(file_info.filename.encode('utf-8','backslashreplace')))

        return

    def output_file(self, outdir, data):
        """Print output from analysis to a file."""

        log = logging.getLogger('Mastiff.Plugins.' + self.name + '.output')

        try:
            outfile = codecs.open(outdir + os.sep + 'zipinfo-old.txt', 'w',  encoding='utf-8')
            outfile.write(data)
            outfile.close()
        except IOError, err:
            log.error('Could not open zipinfo.txt: {}'.format(err))
            return False

        return True


================================================
FILE: mastiff/plugins/analysis/ZIP/ZIP-zipinfo.yapsy-plugin
================================================
[Core]
Name = ZipInfo
Module = ZIP-zipinfo

[Documentation]
Description = Extract zip metadata and file information.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/analysis/ZIP/__init__.py
================================================


================================================
FILE: mastiff/plugins/analysis/__init__.py
================================================


================================================
FILE: mastiff/plugins/category/EXE.yapsy-plugin
================================================
[Core]
Name = Windows Executable Category
Module = exe

[Documentation]
Description = Windows Executable Category Plugin
Author = Tyler Hudak
Website = www.korelogic.com
Version = 1.0
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/category/PDF.yapsy-plugin
================================================
[Core]
Name = Adobe PDF Category
Module = pdf

[Documentation]
Description = Adobe PDF Category Plugin
Author = Tyler Hudak
Website = www.korelogic.com
Version = 1.0
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/category/__init__.py
================================================


================================================
FILE: mastiff/plugins/category/categories.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
The base category classes for each of the file types analyzed by
mastiff.
"""

__version__ = "$Id: e7abe9b27e953709d06c590305ce0c16eaa36c34 $"

from yapsy.IPlugin import IPlugin
import mastiff.plugins.output as output

class MastiffPlugin(IPlugin):
    """The base plugin class every category class should inherit."""

    def __init__(self, name=None):
        """Initialize the Mastiff plugin class."""
        IPlugin.__init__(self)
        self.name = name
        self.prereq = None
        self.yara_filetype = None
        self.page_data = output.page()
        self.page_data.meta['filename'] = 'CHANGEME'

    def activate(self):
        """Power rings activate! Form of Mastiff Plugin!"""
        IPlugin.activate(self)

    def analyze(self, config, filename, output=None):
        pass

    def deactivate(self):
        """Deactivate plugin."""
        IPlugin.deactivate(self)

    def set_name(self, name=None):
        """
           Yapsy does not provide an easy way to get or set our own
           name, so here's a function to do so.
        """
        self.name = name
        return self.name


================================================
FILE: mastiff/plugins/category/exe.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Windows Executable File Category Plugin

File Type: Windows Executable Programs
Purpose:
  This file contains the code for the category class "exe", which
  allows plugins to be created to be run on Windows executable files.

Output:
   None

__init__(): MANDATORY: Any initialization code the category requires. It must
            also call the __init__ for the MastiffPlugin class.

is_my_filetype(id_dict, file_name): MANDATORY: This function will return
            the cat_name if the given id_dict contains one of the
            file types this category can examine, or the yara rule matches the
            file type. The file_name is also given so additional tests can be
            performed, if required. None should be returned if it does not 
            analyze this type.
"""

__version__ = "$Id: 609d6d02a651ff56ef7b7da434603e150b723876 $"

import struct
import mastiff.plugins.category.categories as categories
import mastiff.filetype as FileType

class EXECat(categories.MastiffPlugin):
    """Category class for Windows executables."""

    def __init__(self, name=None):
        """Initialize the category."""
        categories.MastiffPlugin.__init__(self, name)
        self.cat_name = 'EXE'
        self.my_types = [ 'PE32 executable',
                          'MS-DOS executable',
                          'Win32 Executable',
                          'Win32 EXE'
                          ]
        self.yara_filetype = """rule isexe {
	    strings:
		    $MZ = "MZ"        
	    condition:
		    $MZ at 0 and uint32(uint32(0x3C)) == 0x00004550
        }"""

    def is_exe(self, filename):
        """ Look to see if the filename has the header format we expect,"""

        with open(filename, 'rb') as exe_file:
            header = exe_file.read(2)
            if header != 'MZ':
                return False

            exe_file.seek(0x3c)
            offset = struct.unpack('<i', exe_file.read(4))
            if offset[0] > 1024:
                # seems a bit too far - we'll stop just in case
                return False

            exe_file.seek(offset[0])
            pe_header = exe_file.read(2)
            if pe_header != 'PE':
                return False

        return True

    def is_my_filetype(self, id_dict, file_name):
        """Determine if magic string is appropriate for this category."""

        # check magic string first
        try:
            if [ type_ for type_ in self.my_types if type_ in id_dict['magic']]:
                return self.cat_name
        except:
            return None

        # run Yara type check
        if FileType.yara_typecheck(file_name, self.yara_filetype) is True:
            return self.cat_name

        # perform a manual check
        if self.is_exe(file_name):
            return self.cat_name

        return None


================================================
FILE: mastiff/plugins/category/generic.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Generic File Category Plugin

File Type: Any files
Purpose:
  This file contains the code for the category class "generic", which
  allows plugins to be created to be run on any file.

Output:
   None

__init__(): MANDATORY: Any initialization code the category requires. It must
            also call the __init__ for the MastiffPlugin class.

is_my_filetype(id_dict, file_name): MANDATORY: This function will return
            the cat_name if the given id_dict pertains to one of the
            file types this category can examine. The file_name is also given
            so additional tests can be performed, if required. None should be
            returned if it does not analyze this type.
"""

__version__ = "$Id: 58d893fbc4b026eb0104912013663e1562446620 $"

import mastiff.plugins.category.categories as categories

class GenericCat(categories.MastiffPlugin):
    """Category class for any file."""

    def __init__(self, name=None):
        """Initialize the category."""
        categories.MastiffPlugin.__init__(self, name)
        self.cat_name = 'Generic'
        self.my_types = []

    def is_my_filetype(self, id_dict, file_name):
        """Generic plugins are run against every file, so always return the
           cat_name."""
        return self.cat_name


if __name__ == '__main__':
    # testing code
    genclass = GenericCat()
    print genclass.cat_name


================================================
FILE: mastiff/plugins/category/generic.yapsy-plugin
================================================
[Core]
Name = Generic Category
Module = generic

[Documentation]
Description = Generic Files Category Plugin
Author = Tyler Hudak
Website = www.korelogic.com
Version = 1.0
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/category/office.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Microsoft Office File Category Plugin

File Type: Microsoft Office Documents
Purpose:
  This file contains the code for the category class "office", which
  allows plugins to be created to be run on Microsoft Office documents.

Output:
   None

__init__(): MANDATORY: Any initialization code the category requires. It must
            also call the __init__ for the MastiffPlugin class.
"""

__version__ = "$Id: 55366bcaec0c51d2372ef988b3eef4141f351416 $"

import mastiff.plugins.category.categories as categories
import mastiff.filetype as FileType

class OfficeCat(categories.MastiffPlugin):
    """Category class for Microsoft Office files."""

    def __init__(self, name=None):
        """Initialize the category."""
        categories.MastiffPlugin.__init__(self, name)
        self.cat_name = 'Office'
        self.my_types = [ 'CDF V2 Document', # PPT, DOC, XLS
                         'Composite Document File V2',
                          'Microsoft Word',
                          'Microsoft Office Word',
                          'Microsoft Excel',
                          'Microsoft PowerPoint',
                          'Microsoft Office Document'
                          ]
        self.yara_filetype = """rule isOleDoc {
	    condition:
		    ( uint32(0x0) == 0xe011cfd0 and uint32(0x4) == 0xe11ab1a1 ) or
		    // some old beta versions have this signature
		    ( uint32(0x0) == 0x0dfc110e and uint32(0x4) == 0x0e11cfd0 )
        }"""

    def is_my_filetype(self, id_dict, file_name):
        """Determine if magic string is appropriate for this category."""

        try:
            if [ type_ for type_ in self.my_types if type_ in id_dict['magic']]:
                return self.cat_name
        except:
            return None

        # run Yara type check
        if FileType.yara_typecheck(file_name, self.yara_filetype) is True:
            return self.cat_name

        return None


================================================
FILE: mastiff/plugins/category/office.yapsy-plugin
================================================
[Core]
Name = Microsoft Office Category
Module = office

[Documentation]
Description = Microsoft Office Category Plugin
Author = Tyler Hudak
Website = www.korelogic.com
Version = 1.0
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/category/pdf.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Adobe PDF Category Plugin

File Type: Adobe PDF files
Purpose:
  This file contains the code for the category class "pdf", which
  allows plugins to be created to be run on any file.

Output:
   None

__init__(): MANDATORY: Any initialization code the category requires. It must
            also call the __init__ for the MastiffPlugin class.
"""

__version__ = "$Id: 310cf87b738bb0ecdf968865c63e94ed0af9d83a $"

import mastiff.plugins.category.categories as categories
import mastiff.filetype as FileType

class PDFCat(categories.MastiffPlugin):
    """Category class for Adobe PDFs."""

    def __init__(self, name=None):
        """Initialize the category."""
        categories.MastiffPlugin.__init__(self, name)
        self.cat_name = 'PDF'
        self.my_types = [ 'PDF document',
                                    'Adobe Portable Document Format' ]
        self.yara_filetype = """rule ispdf {
	    strings:
		    $PDF = "%PDF-"
	    condition:
		    $PDF in (0..1024)
        }"""

    def is_my_filetype(self, id_dict, file_name):
        """Determine if magic string is appropriate for this category."""

        # check the magic string for our file type
        try:
            if [ type_ for type_ in self.my_types if type_ in id_dict['magic'] ]:
                return self.cat_name
        except:
            return None

        # run Yara type check
        if FileType.yara_typecheck(file_name, self.yara_filetype) is True:
            return self.cat_name

        # the PDF header may be in the first 1024 bytes of the file
        # libmagic and TrID may not pick this up
        with open(file_name, 'r') as pdf_file:
            data = pdf_file.read(1024)

        if '%PDF-' in data:
            return self.cat_name

        return None


================================================
FILE: mastiff/plugins/category/zip.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Zip File Category Plugin

File Type: Zip Archive
Purpose:
  This file contains the category class to analyze Zip archives.
Output:
   None

"""

__version__ = "$Id: a59af7dd53c334712c50d1d05787a63da5e448a6 $"

import zipfile
import mastiff.plugins.category.categories as categories
import mastiff.filetype as FileType

class ZipCat(categories.MastiffPlugin):
    """ Category class for Zip documents."""

    def __init__(self, name=None):
        """Initialize the category."""
        categories.MastiffPlugin.__init__(self, name)

        self.cat_name = 'ZIP'
        self.my_types = [ 'Zip archive', 'ZIP compressed archive' ]
        self.yara_filetype = """rule iszip {
	    condition:
		    uint32(0x0) == 0x04034b50
        }"""

    def is_my_filetype(self, id_dict, file_name):
        """Determine if the magic string is appropriate for this category"""

        # Use the python library first
        try:
            # there are times where is_zipfile returns true for non-zipfiles
            # so we have to try and open it as well
            if zipfile.is_zipfile(file_name) is True:
                return self.cat_name
        except:
            return None

        # check magic string next
        try:
            if [ type_ for type_ in self.my_types if type_ in id_dict['magic']]:
                return self.cat_name
        except TypeError:
            return None

        # run Yara type check
        if FileType.yara_typecheck(file_name, self.yara_filetype) is True:
            return self.cat_name

        return None


================================================
FILE: mastiff/plugins/category/zip.yapsy-plugin
================================================
[Core]
Name = Zip Archive Category Plugin
Module = zip

[Documentation]
Description = Zip Archive Category Plugin
Author = Tyler Hudak
Website = www.korelogic.com
Version = 1.0
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/output/OUTPUT-raw.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Raw Output Plug-In

This output plug-in writes the output in its raw repr() state to a file.
"""

__version__ = "$Id: 4c5a3bcd2b75a26af7638c27124b544b3ce3d8f0 $"

import logging
import mastiff.plugins.output as masOutput

class OUTPUTRaw(masOutput.MastiffOutputPlugin):
    """Raw output plugin.."""

    def __init__(self):
        """Initialize the plugin."""
        masOutput.MastiffOutputPlugin.__init__(self)

    def activate(self):
        """Activate the plugin."""
        masOutput.MastiffOutputPlugin.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        masOutput.MastiffOutputPlugin.deactivate(self)

    def output(self, config, output):
        log = logging.getLogger('Mastiff.Plugins.Output.' + self.name)
        if config.get_bvar(self.name, 'enabled') is False:
            log.debug('Disabled. Exiting.')
            return True

        log.info('Writing raw output.')
        try:
            raw_file = open(config.get_var('Dir', 'log_dir')+'/output_raw.txt', 'w')
        except IOError, err:
            log.error('Could not open output_raw.txt file for writing: {}'.format(err))
            return False

        raw_file.write(repr(output))
        raw_file.close()
        return True


================================================
FILE: mastiff/plugins/output/OUTPUT-raw.yapsy-plugin
================================================
[Core]
Name = Raw Output
Module = OUTPUT-raw

[Documentation]
Description = Dumps output in its raw structure format.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/output/OUTPUT-text.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Text Output Plug-In

This output plug-in writes the output to a text file.
"""

__version__ = "$Id: 3ba469857b0e052b44f17b90268cbfeace7145cf $"

import logging
import mastiff.plugins.output as masOutput

def renderText(page_format, logdir, filename, datastring):
    """ Places the datastring previously created into the appropriate file or files. """

    log = logging.getLogger('Mastiff.Plugins.Output.OUTPUTtext.renderText')
    # print out the formatted text for the plug-in
    if page_format == 'single':
        # all data is on one page, open up one file for it
        out_filename = logdir + '/output_txt.txt'
        mode = 'a'
        # add a separater between plug-in output
        datastring += '*'*80 + '\n'
    elif page_format == 'multiple':
        # data should be broken up into individual files.
        # this will be set for each file
        out_filename = logdir + '/' + filename + '.txt'
        mode = 'w'
    else:
        log.error('Invalid format type for output plugin: {}'.format(format))
        return False

    try:
        txt_file = open(out_filename, mode)
    except IOError, err:
        log.error('Could not open {} file for writing: {}'.format(out_filename, err))
        return False

    txt_file.write(datastring.encode('utf-8', 'replace'))
    txt_file.close()

def _extend(data, length=0):
    """ Returns a unicode string that is left justified by the length given. """
    if data is None:
        return u""

    try:
        outstr = data.ljust(length)
    except AttributeError:
        outstr = str(data).ljust(length)
    except UnicodeEncodeError:        
        outstr = data.decode('utf-8').ljust(length)

    if isinstance(outstr, unicode):
        return outstr
    else:
        return unicode(outstr, 'utf-8', 'replace')

def processPage(plugin, page, page_format):
    """ Processes a page of data and puts it into the correct format. """

    txtstr = unicode('', 'utf-8')
    if page_format == 'single':
        txtstr += '\n{} Plug-in Results\n\n'.format(plugin)

    # loop through each table in the page
    for tabledata in sorted(page, key=lambda page: page[2]):
        (title, mytable, index) = tabledata

        # first we need to go through the table and find the max length for each column
        col_widths = [ len(getattr(col_name, 'name').replace(masOutput.SPACE, ' ')) for col_name in mytable.header ]

        # check to see if it should be printed like a horizontal or vertical table
        if mytable.printVertical is False:
            outlist = list()

            for row in mytable:
                # modify the col_widths to set a maximum length of each column to 60 characters
                row_lens = list()

                for col in row[1:]:
                    try:
                        row_lens.append(min(60, len(col)))
                    except TypeError:
                        # if this isn't a str or unicode value, explicitly convert it
                        row_lens.append(min(60, len(str(col))))

                col_widths = map(max, zip(col_widths, row_lens))

            # format the header
            if mytable.printHeader is not False:
                txtstr +=  "  ".join((getattr(val, 'name')).replace(masOutput.SPACE, ' ').ljust(length) for val, length in zip(mytable.header, col_widths)) + '\n'
                txtstr += '  '.join([ '-'*val for val in col_widths ])

            # format the data
            for row in mytable:
                # combine the row values together and extend them as needed
                # this may be a confusing statement, but its fast!
                #outlist.append("".join(map(lambda x: _extend(x[0], x[1]+2), zip(row[1:], col_widths))))
                outlist.append("".join([_extend(x[0], x[1]+2) for x in zip(row[1:], col_widths) ]))

            txtstr += '\n'
            txtstr += "\n".join(outlist)
            txtstr += '\n\n'

        else:
            outlist = list()

            # get max column width + 2
            max_col = max(col_widths) + 2

            # pre-justify header
            newheader = [ getattr(data,'name').replace(masOutput.SPACE, ' ').ljust(max_col) for data in mytable.header ]

            # this adds a slight speed increase for large output
            myappend = outlist.append

            # go through each row of data and join the header and values together
            for row in mytable:
                #myappend("\n".join(map(lambda x: x[0] + _extend(x[1], 0), zip(newheader, row[1:]))))
                myappend("\n".join([ x[0] + _extend(x[1], 0) for x in zip(newheader, row[1:])]))
                myappend("\n\n")

            txtstr += "".join(outlist)
            txtstr += '\n'

    return txtstr

class OUTPUTtext(masOutput.MastiffOutputPlugin):
    """Text output plugin.."""

    def __init__(self):
        """Initialize the plugin."""
        masOutput.MastiffOutputPlugin.__init__(self)

    def activate(self):
        """Activate the plugin."""
        masOutput.MastiffOutputPlugin.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        masOutput.MastiffOutputPlugin.deactivate(self)

    def output(self, config, data):
        log = logging.getLogger('Mastiff.Plugins.Output.' + self.name)
        if config.get_bvar(self.name, 'enabled') is False:
            log.debug('Disabled. Exiting.')
            return True

        log.info('Writing text output.')

        txtstr = unicode('', 'utf-8')
        page_format = config.get_var(self.name, 'format')

        # we need to output the File Information plugin first as it contains the
        # summary information on the analyzed file
        try:
            log.debug('Writing file information.')
            txtstr += processPage('File Information', data[data.keys()[0]]['Generic']['File Information'], page_format)
            renderText(page_format, config.get_var('Dir', 'log_dir'), data[data.keys()[0]]['Generic']['File Information'].meta['filename'], txtstr)
            txtstr = unicode('', 'utf-8')
        except KeyError:
            log.error('File Information plug-in data missing. Aborting.')
            return False

        # loop through category data
        for cats, catdata in data[data.keys()[0]].iteritems():
            if page_format == 'single':
                catstr = '{} Category Analysis Results'.format(cats)
                log.debug('Writing {} results.'.format(cats))
                txtstr += '{}\n'.format(catstr) + '-'*len(catstr) + '\n'

            # loop through plugin data and generate the output text
            for plugin, pages in catdata.iteritems():
                if cats == 'Generic' and plugin == 'File Information':
                    continue

                # process the page into its output string
                txtstr += processPage(plugin, pages, page_format)

                # render the text into the appropriate location
                renderText(page_format, config.get_var('Dir', 'log_dir'), pages.meta['filename'], txtstr)
                txtstr = ''

        return True


================================================
FILE: mastiff/plugins/output/OUTPUT-text.yapsy-plugin
================================================
[Core]
Name = Text Output
Module = OUTPUT-text

[Documentation]
Description = Dumps output in text format.
Author = Tyler Hudak
Version = 1.0
Website = www.korelogic.com
License = Apache License, Version 2.0


================================================
FILE: mastiff/plugins/output/__init__.py
================================================
#!/usr/bin/env python

__version__ = "$Id: e4ef370e46aed6093a66918da42c5f2b1665cf83 $"

import collections
import time
from yapsy.IPlugin import IPlugin

BASEHEADER = collections.namedtuple('BASEHEADER', 'name type')
BASEROW = collections.namedtuple('BASEROW', 'ROWINDEX')

# the data types we accept for rows.
# TODO: Extensive testing on time to be able to represent the multitude of time formats
#             Maybe have our own class?
DATATYPES = [int, str, float, unicode, time.struct_time]

# characters that spaces should be replaced with
SPACE='___'

class TableError(Exception):
    """ Table Exception class """
    pass

class PageError(Exception):
    """ Page Exception class """
    pass

class table(object):
    """
        Base constructor for table of data.
        A table contains a header and rows of data.
        - The header is just a single row that contains the description of the data.
        - You may only add one row of data at a time
    """
    def __init__(self, header=None, data=None, title=None):
        """
            Initialize the table.
            self.header: List containing the column names in BASEHEADER named tuple type.
            self.rowdef: Named tuple based on BASEROW. Names are based on header def.
            self.title: String describing the contents of the table.
            self.rows: List of self.rowdef named tuples. Contains the table data.
            self.INDEX: Used for row order. Currently automatically generated.

            Input:
            - header: List containing the data definition.
            - data: List containing the initial row of data to initialize.
            - title: String containing the title for the table.
        """
        self.INDEX = 0
        self.header = None
        self.printHeader = True
        self.printVertical = False
        self.rowdef = None
        self.addheader(header)
        self.title = title
        self.rows = list()
        if data is not None:
            self.addrow(data)
        return

    def __str__(self):
        """ Return a string containing a quickly formatted view of the table. """
        outstring = ''
        if self.title is not None and self.title != '':
            outstring += self.title + '\n'
        if self.header is not None:
            for item in self.header:
                outstring += str(item.name) + '\t'
            outstring += '\n'
        if self.rows is not None and len(self.rows) > 0:
            for rows in sorted(self.rows, key=lambda x: x[0]):
                outstring += '\t'.join([ str(x) for x in rows[1:] ]) + '\n'

        return outstring

    def __repr__(self):
        return '<table [' + repr(self.header) + '], ' + repr(self.rows) + '>'

    def __iter__(self):
        """
            Generator to go through table rows.
            Returns the row tuple of the item.
        """
        for item in self.rows:
            yield item

    def addtitle(self, title=None):
        """ Add a title to the table. """
        if title is not None:
            self.title = title
        else:
            self.title = ''

    def addheader(self, header=None, printHeader=True, printVertical=False):
        """ Add a header to the table.
            The header defines the format of the table and should be a list
            composed of the names of the fields in the table, and their type

            After created, the header is used to construct the named tuple for
            all the rows in the table.
        """
        if header is not None:
            self.header = list()
            if isinstance(header, list):
                rowdef = tuple()
                for (item, itemtype) in header:

                    # make sure itemtype is a valid data type
                    if itemtype not in DATATYPES:
                        raise TypeError('Data type is not a valid type for MASTIFF output.')

                    self.header.append(BASEHEADER(item, str))
                    rowdef = rowdef + (item, )

            else:
                raise TypeError('Headers must be of type list.')

            if printHeader is False:
                self.printHeader = False
            if printVertical is True:
                self.printVertical = True

            # if we have a rowdef, create the row def tuple
            if len(rowdef) > 0:
                self.rowdef = collections.namedtuple('ROWTUPLE', BASEROW._fields + (rowdef ))

    def addrow(self, row):
        """
            Add a row of data to the table.
            A header must be defined prior to adding any rows of data.
            Input:
                - row: Iterable containing row of data to add to the table. (best if list or tuple used)
                        Each item in the iterable will be placed into a separate column in the table.
        """

        # make sure we have a header defined
        if self.header is None:
            raise TableError('Header is needed before rows can be added.')

        if self.rows is None:
            self.rows = list()

        # go through the data and add to the table
        if row is not None:
            # The data should be an iterable.
            try:
                if len(row) != len(self.header):
                    raise TableError('Row length ({0}) does not equal header length ({1}).'.format(len(row), len(self.header)))
        
                # Currently the index (row position in the table) is by the order the data is received
                # TODO: Take in an index
                rowlist = [self.INDEX]
                self.INDEX += 1

                for item in row:
                    rowlist.append(item)

                # create and add named tuple into self.rows
                self.rows.append(self.rowdef._make(rowlist))
            except TypeError:
                raise TypeError('Invalid type given for data.')

class page(object):
    """
        A page is a container for multiple tables of data.
        Tables will be listed in the order they are added, unless an index is specified
        when the table is added.
    """
    def __init__(self):
        self.tables = dict()
        self.meta = dict()
        self.meta['filename'] = 'CHANGEME'
        self.counter = 0

    def __getitem__(self, title):
        """ Overload the getitem operator to return a specified table. """
        try:
            return self.tables[title]['table']
        except KeyError:
            raise KeyError('Table {} does not exist.'.format(title))

    def __iter__(self):
        """
            Generator to go through the list of tables, sorted by index.
            Yields a list of [ title, table, index ]
        """
        for title in self.tables:
            yield [ title, self.tables[title]['table'], self.tables[title]['index'] ]

    def __str__(self):
        outstring = ''
        for mytable in sorted(self.tables.iteritems(), key=lambda (k, v): v['index']):
            outstring += str(mytable[1]['table'])

        return outstring

    def __repr__(self):
        return '<page [' + repr(self.tables) + '] >'

    def addTable(self, title, header=None, index=None):
        if title is None or title == '':
            raise PageError('New tables must have a title.')

        if index is None:
            index = self.counter

        newTable = table(header=header, title=title)
        self.tables[title] = { 'table': newTable, 'index': index }
        self.counter += 1
        return newTable

class MastiffOutputPlugin(IPlugin):
    """The base plugin class every output plugin should inherit."""

    def __init__(self, name=None):
        """Initialize the Mastiff plugin class."""
        IPlugin.__init__(self)
        self.name = name

    def activate(self):
        """Power rings activate! Form of Mastiff Plugin!"""
        IPlugin.activate(self)

    def deactivate(self):
        """Deactivate plugin."""
        IPlugin.deactivate(self)

    def output(self, config, data):
        """ Output function. Should be overwritten by plugins. """
        return False

    def set_name(self, name=None):
        """
           Yapsy does not provide an easy way to get or set our own
           name, so here's a function to do so.
        """
        self.name = name
        return self.name


================================================
FILE: mastiff/queue.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
   The queue module is used to add a job queue to MASTIFF. The MastiffQueue
   class uses the MASTIFF SQLite database to keep track of any files that are
   required to be analyzed. It works as a LIFO queue and has no priorities.

   This module was originally taken from Thiago Arruda's public domain Python
   job queue at http://flask.pocoo.org/snippets/88/ and has had some minor
   modifications made to make it in-line with MASTIFF.
"""

__version__ = "$Id"

import os, sqlite3, os.path
import sys
from cPickle import loads, dumps
from time import sleep
try:
    from thread import get_ident
except ImportError:
    from dummy_thread import get_ident

import mastiff.conf as Conf
import logging

class MastiffQueue(object):
    """ Class to implement a LIFO job queue in a SQLite Database. """

    _create = (
            'CREATE TABLE IF NOT EXISTS queue '
            '('
            '  id INTEGER PRIMARY KEY AUTOINCREMENT,'
            '  file BLOB'
            ')'
            )
    _count = 'SELECT COUNT(*) FROM queue'
    _iterate = 'SELECT id, file FROM queue'
    _append = 'INSERT INTO queue (file) VALUES (?)'
    _write_lock = 'BEGIN IMMEDIATE'
    _popleft_get = (
            'SELECT id, file FROM queue '
            'ORDER BY id LIMIT 1'
            )
    _popleft_del = 'DELETE FROM queue WHERE id = ?'
    _peek = (
            'SELECT file FROM queue '
            'ORDER BY id LIMIT 1'
            )
    _peek_all = (
            'SELECT file FROM queue '
            'ORDER BY id'
             )

    def __init__(self, config):
        """ Initialize the class. """

        #Read the config file and find where the DB is
        log = logging.getLogger('Mastiff.Queue.init')

        conf = Conf.Conf(config)
        self.path = os.path.abspath(conf.get_var('Dir', 'log_dir') + os.sep + conf.get_var('Sqlite', 'db_file'))
        log.debug('Setting up queue table at %s' % self.path)

        # create the dir if it doesn't exist
        if not os.path.isdir(os.path.dirname(self.path)):
            try:
                os.makedirs(os.path.dirname(self.path))
            except OSError, err:
                log.error('Could not make %s: %s. Exiting.', self.path, err)
                sys.exit(1)

        if not os.path.exists(self.path) or not os.path.isfile(self.path):
            # does not exist, create
            try:
                sqlite3.connect(self.path)
            except sqlite3.OperationalError, err:
                log.error('Cannot access sqlite DB: %s.', err)

        self._connection_cache = {}
        with self._get_conn() as conn:
            # create the database if required
            conn.execute(self._create)

    def __len__(self):
        """ Allows len(queue) to return the number of items to be processed. """
        with self._get_conn() as conn:
            my_len = conn.execute(self._count).next()[0]
        return my_len

    def __iter__(self):
        """ Iterable object. """
        with self._get_conn() as conn:
            for my_id, obj_buffer in conn.execute(self._iterate):
                yield loads(str(obj_buffer))
                
    def __str__(self):
        """ Return contents of database. """
        return '\n'.join(self)

    def _get_conn(self):
        """ Returns a connection to the database. """
        my_id = get_ident()
        if my_id not in self._connection_cache:
            self._connection_cache[my_id] = sqlite3.Connection(self.path, timeout=60)
        return self._connection_cache[my_id]

    def append(self, obj):
        """ Add a job to the queue. """
        obj_buffer = buffer(dumps(obj, 2))
        with self._get_conn() as conn:
            conn.execute(self._append, (obj_buffer,))

    def popleft(self, sleep_wait=False):
        """
           Pops a job off the queue and returns it. It will return the next item
           in the queue, or None is none exist. By default, the function will not
           wait if it cannot access the queue table or there is nothing.
        """
        keep_pooling = True
        wait = 0.1
        max_wait = 2
        tries = 0
        with self._get_conn() as conn:
            my_id = None
            while keep_pooling:
                conn.execute(self._write_lock)
                cursor = conn.execute(self._popleft_get)
                try:
                    my_id, obj_buffer = cursor.next()
                    keep_pooling = False
                except StopIteration:
                    conn.commit() # unlock the database
                    if not sleep_wait:
                        keep_pooling = False
                        continue
                    tries += 1
                    sleep(wait)
                    wait = min(max_wait, tries/10 + wait)
            if id:
                conn.execute(self._popleft_del, (my_id,))
                return loads(str(obj_buffer))
        return None

    def peek(self):
        """ Return the next item in the queue, but do not remove it. """
        with self._get_conn() as conn:
            cursor = conn.execute(self._peek)
            try:
                return loads(str(cursor.next()[0]))
            except StopIteration:
                return None
                
    def clear_queue(self):
        """ Clear the job queue. """
        while self.__len__() > 0:
            self.popleft(sleep_wait=False)            


================================================
FILE: mastiff/sqlite.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
sqlite.py

This file contains helper functions used to assist MASTIFF plug-ins in placing
data into a sqlite database.

"""

__version__ = "$Id: 1ca5305893915a251426468100fa9688b59332d7 $"

import logging
import os
import re

import sqlite3

def open_db(db_name):
    """ Return a sqlite3 Connection object for the given database name.
          If the file does not exist, it will attempt to create it.
    """

    log = logging.getLogger('Mastiff.DB.open')
    if not os.path.exists(db_name) or not os.path.isfile(db_name):
        log.warning('%s does not exist. Will attempt to create.', db_name)

    try:
        db = sqlite3.connect(db_name)
    except sqlite3.OperationalError, err:
        log.error('Cannot access sqlite DB: %s.', err)
        db  = None
        
    db.text_factory = str

    return db

def open_db_conf(config):
    """
       Read the DB information from a MASTIFF config file.
       Return a Sqlite Connection or None.
    """
    log = logging.getLogger('Mastiff.DB.open_db_conf')
    log_dir = config.get_var('Dir','base_dir')
    mastiff_db = config.get_var('Sqlite', 'db_file')

    if mastiff_db is None or log_dir is None or len(mastiff_db) == 0:
        log.error('Unable to open DB.')
        return None

    # db_file can be a full path - if it is, then use it
    dirname = os.path.expanduser(os.path.dirname(mastiff_db))
    if len(dirname) > 0 and os.path.exists(dirname) == True:
        return open_db(mastiff_db)

    return open_db(os.path.expanduser(log_dir) + os.sep + mastiff_db)

def sanitize(string):
    """
       Sanitize a string that cannot be sent correctly to sqlite3.
       Returns a string only containing letters, numbers, whitespace
       or underscore.
    """
    return re.sub(r'[^a-zA-Z0-9_\s]', '', string)

def check_table(db, table):
    """ Return True is a table exists, False otherwise"""
    conn = db.cursor()

    # sqlite3 won't let us use table names as variables, so we have to
    # use string substitution
    query = 'SELECT * FROM ' + sanitize(table)
    try:
        conn.execute(query)
        return True
    except sqlite3.OperationalError:
        # table doesn't exist
        return False

def add_table(db, table, fields):
    """
        Add a table to a database.
        Table is a string of the table name.
        fields is a list of columns in the form 'column_name column_type'
        Returns True if successful, False otherwise.
    """
    conn = db.cursor()

    if check_table(db, table):
        # Table already exists
        return True

    query = 'CREATE TABLE ' + sanitize(table) + '('
    for item in fields:
        query = query + sanitize(item) + ','
    query = query[:-1] + ')'

    try:
        conn.execute(query)
        db.commit()
    except sqlite3.OperationalError, err:
        log = logging.getLogger('Mastiff.DB.add_table')
        log.error('Could not add table %s: %s', table, err)
        return False

    return True

def add_column(db, table, col_def):
    """
       Alter an existing table by adding a column to it.
       db is a sqlite3 db connection
       table is the table name
       col_def is the column definition
    """
    log = logging.getLogger('Mastiff.DB.add_column')
    if check_table(db, table) == False:
        log.error('Table %s does not exist.', table)
        return False

    conn = db.cursor()

    query = 'ALTER TABLE ' + table + ' ADD COLUMN ' + col_def
    try:
        conn.execute(query)
        db.commit()
    except sqlite3.OperationalError, err:
        # dup column name errors are fine
        if 'duplicate column name' not in str(err):
            log.error('Could not add column: %s', err)
            return False
    else:
        log.debug('Extended %s with column def "%s".', table, col_def)

    return True

def create_mastiff_tables(db):
    """
        Create the tables in the MASTIFF database to store
        the main analysis information.

        db is a sqlite3 db connection
    """
    if check_table(db, 'mastiff') == True:
        # table already exists, nothing to do
        return True

    fields = ['id INTEGER PRIMARY KEY',
              'md5 TEXT DEFAULT NULL',
              'sha1 TEXT DEFAULT NULL',
              'sha256 TEXT DEFAULT NULL',
              'type TEXT DEFAULT NULL']

    # if we were not successful, return None
    if add_table(db, 'mastiff', fields) is None:
        return False
    db.commit()

    return True

def get_id(db, hashes):
    """
       Return the db id number of the given tuple of hashes.
       Returns None if tuple does not exist.
    """

    log = logging.getLogger('Mastiff.DB.get_id')
    cur = db.cursor()
    try:
        cur.execute('SELECT id FROM mastiff WHERE (md5=? AND \
        sha1=? AND sha256=?)',
                    [ hashes[0], hashes[1], hashes[2], ])
    except sqlite3.OperationalError, err:
        log.error('Could not execute query: %s', err)
        return None

    sqlid = cur.fetchone()
    if sqlid is None:
        return sqlid
    else:
        return sqlid[0]

def insert_mastiff_item(db, hashes, cat_list=None):
    """
       Insert info on analyzed file into database.
       hashes tuple  and cat_list will be inserted into mastiff table.
    """

    log = logging.getLogger('Mastiff.DB.Insert')

    # we'll create the tables just to be sure they exist
    create_mastiff_tables(db)

    cur = db.cursor()
    sqlid = get_id(db, hashes)

    if sqlid is not None:
        # already in there, just send back the id
        log.debug('Hashes %s are already in the database.', hashes)
    else:
        try:
            cur.execute('INSERT INTO mastiff (md5, sha1, sha256) \
            VALUES (?, ?, ?)',
                                    (hashes[0], hashes[1], hashes[2]))
            db.commit()
        except sqlite3.OperationalError, err:
            log.error('Could not insert item into mastiff: %s', err)
            return None
        sqlid = cur.lastrowid

    if cat_list is not None and sqlid is not None:
        try:
            log.info('Adding %s', str(cat_list))
            cur.execute('UPDATE mastiff SET type=? WHERE id=?',
                        (str(cat_list), sqlid, ))
            db.commit()
        except sqlite3.OperationalError, err:
            log.error('Could not update file type in DB: %s', err)

    if sqlid is None:
        return sqlid

    return sqlid

# testing functions
if __name__ == '__main__':

    # configure logging for Mastiff module
    format_ = '[%(asctime)s] [%(levelname)s] [%(name)s] : %(message)s'
    logging.basicConfig(format=format_)
    log = logging.getLogger("Mastiff")
    log.setLevel(logging.DEBUG)

    mysql = open_db('/tmp/test.db')
    if mysql is None:
        print "Was not created"

    create_mastiff_tables(mysql)
    print "*** TEST: inserting items"
    insert_mastiff_item(mysql, ('123', '345', '456'), 'filename')
    insert_mastiff_item(mysql, ('135', '790', '246'), 'filename2')
    insert_mastiff_item(mysql, ('111', '333', '555'), 'filename3')
    insert_mastiff_item(mysql, ('444', '666', '888'), 'filename4')
    print "*** TEST: insert dup hashes"
    insert_mastiff_item(mysql, ('111', '333', '555'), 'filename5')
    print "*** TEST: insert dup filename"
    insert_mastiff_item(mysql, ('111', '333', '555'), 'filename3')
    print "*** TEST: add column"
    add_column(mysql, 'mastiff', 'test_col TEXT DEFAULT NULL')
    mysql.close()


================================================
FILE: mastiff.conf
================================================
# This is the configuration file for mastiff.
#
# Comments are preceded by a # or ;
#

[Dir]
# log_dir is the base directory where the logs generated will
# be placed in.
#log_dir = /usr/local/mastiff/log
log_dir = ./work/log

# plugin_dir is a list of directories test plugins may be present in.
# should be comma-separated.
# This may be left blank.
# For example:
#plugin_dir = ./plugins, /etc/mastiff
plugin_dir = 

# output_plugin_dir is a list of directories test output plugins may be present in.
# should be comma-separated.
# This may be left blank.
# For example:
#output_plugin_dir = ./plugins, /etc/mastiff
output_plugin_dir = 

[Misc]
# verbose = [on|off]
verbose = off
# Make a copy of the analyzed file in the log directory with a .VIR extension.
# copy = [on|off]
copy = on

[Sqlite]
# Sqlite database options
# db_file = Name of the database file
db_file = mastiff.db

[File ID]
# trid is the location of the TrID binary
# trid_db is the location of the TrID database
#trid = /usr/local/bin/trid
trid = 
trid_db = 

[Fuzzy Hashing]
# compare decides whether or not to correlate previous fuzzy hashes
# compare = [on|off]
compare = on

[Hex Dump]
# Options for Hex Dump plug-in
# enabled = [on|off]
enabled = off

[Embedded Strings Plugin]
# Options for the Embedded Strings Plugin.
# strcmd is the path to the strings command
# DO NOT CHANGE THE FOLLOWING OPTIONS UNLESS YOU KNOW WHAT YOU ARE DOING!
# str_opts are the options to use for all strings operations
# str_uni_opts are the options to use to obtain UNICODE strings
strcmd = /usr/bin/strings
str_opts = -a -t d
str_uni_opts = -e l

[VirusTotal]
# Options for the VirusTotal Submission Plug-in.
# api_key is your API key from virustotal.com
#   - Leave this empty if you wish to disable this plug-in
api_key = 

# submit [on|off] - submit binary to VirusTotal
submit = off

[Metascan Online]
# Options for the Metascan Online Submission Plug-in.
# api_key is your API key from metascan-online.com
#   - Leave this empty if you wish to disable this plug-in
api_key = 

# submit [on|off] - submit binary to Metascan Online
submit = off

[MASTIFF Online]
# Options for submission to MASTIFF Online
# accept_terms_of_service [true|false] - To upload samples to MASTIFF Online,
#  you agree to the terms of service and privacy policy located at 
# https://mastiff-online.korelogic.com. Set the option below to true to 
# indicate you agree to the terms.
accept_terms_of_service = false
# submit [on|off] - submit sample to MASTIFF Online
submit = off

[pdfid]
# Options to run Didier Stevens pdfid.py script
# pdfid_cmd = Path to the pdfid.py script
#   - Leave blank if you want the script disabled.
# pdfid_opts = Options for program.
#   - Do not put multiple options in quotes.
# Note: pdfid.py has bugs that may cause errors when examining
#       malformed PDFs when using the -e option.
pdfid_cmd = /usr/local/bin/pdfid.py
#pdfid_opts = -e
pdfid_opts =

[pdf-parser]
# Options to run Didier Stevens pdf-parser.py script
# pdf_cmd = Path to pdf-parser.py.
# feedback: [on|off] - Feed extracted files back into the MASTIFF queue.
pdf_cmd = /usr/local/bin/pdf-parser.py
feedback = on

[PDF Metadata]
# Options for PDF Metadata script
# exiftool = path to exitfool
exiftool = /usr/bin/exiftool

[yara]
# Options for the Yara signature plug-in
# yara_sigs = Base path to Yara signatures. This path will be recursed
#             to find additional signatures.
#             Leave blank to disable the plug-in.
yara_sigs = /usr/local/yara

[Digital Signatures]
# Options to extract the digital signatures
#
# disitool - path to disitool.py script.
# openssl - path to openssl binary
disitool = /usr/local/bin/disitool.py
openssl = /usr/bin/openssl

[Office Metadata]
# Options for Office Metadata script
# exiftool = path to exitfool
exiftool = /usr/bin/exiftool

[Single-Byte Strings]
# options for single-byte string extraction plug-in
# length - Minimum length to extract
length = 3
# raw - print raw characters instead of formatted ones (e.g. \\n vs. \n)
raw = False

[ZipExtract]
# options for Zip archive file extraction plug-in
# enabled: [on|off] - Extract files or not
# password: Password to use for zip file. OK to leave blank.
# feedback: [on|off] - Feed extracted files back into the MASTIFF queue.
enabled = on
password =
feedback = on

[Office pyOLEScanner]
# olecmd = Path to pyOLEScanner.py
olecmd=/usr/local/src/pyOLEScanner/pyOLEScanner.py

################################
# Output Plug-in Configuration
################################

[Raw Output]
# enabled: [on|off] - Dump output in raw form or not
enabled = off

[Text Output]
# enabled = [on|off] - Dump output in raw form or not
# format = [multiple|single] - Put text output in individual files or one page.
enabled = on
format = multiple


================================================
FILE: pylint.rc
================================================
[MASTER]

# Specify a configuration file.
#rcfile=

# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=

# Profiled execution.
profile=no

# Add files or directories to the blacklist. They should be base names, not
# paths.
ignore=CVS

# Pickle collected data for later comparisons.
persistent=yes

# List of plugins (as comma separated values of python modules names) to load,
# usually to register additional checkers.
load-plugins=


[MESSAGES CONTROL]

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple time.
#enable=

# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where
# it should appear only once).
disable=C0301,C0326


[REPORTS]

# Set the output format. Available formats are text, parseable, colorized, msvs
# (visual studio) and html
output-format=parseable

# Include message's id in output
include-ids=no

# Put messages in a separate file for each module / package specified on the
# command line instead of printing them on stdout. Reports (if any) will be
# written in a file name "pylint_global.[txt|html]".
files-output=no

# Tells whether to display a full report or only the messages
reports=yes

# Python expression which should return a note less than 10 (10 is the highest
# note). You have access to the variables errors warning, statement which
# respectively contain the number of errors / warnings messages and the total
# number of statements analyzed. This is used by the global evaluation report
# (RP0004).
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)

# Add a comment according to your evaluation note. This is used by the global
# evaluation report (RP0004).
comment=no


[BASIC]

# Required attributes for module, separated by a comma
required-attributes=

# List of builtins function names that should not be used, separated by a comma
bad-functions=map,filter,apply,input

# Regular expression which should only match correct module names
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$

# Regular expression which should only match correct module level names
const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$

# Regular expression which should only match correct class names
class-rgx=[A-Z_][a-zA-Z0-9]+$

# Regular expression which should only match correct function names
function-rgx=[a-z_][a-z0-9_]{2,30}$

# Regular expression which should only match correct method names
method-rgx=[a-z_][a-z0-9_]{2,30}$

# Regular expression which should only match correct instance attribute names
attr-rgx=[a-z_][a-z0-9_]{2,30}$

# Regular expression which should only match correct argument names
argument-rgx=[a-z_][a-z0-9_]{2,30}$

# Regular expression which should only match correct variable names
variable-rgx=[a-z_][a-z0-9_]{2,30}$

# Regular expression which should only match correct list comprehension /
# generator expression variable names
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$

# Good variable names which should always be accepted, separated by a comma
good-names=i,j,k,ex,Run,_

# Bad variable names which should always be refused, separated by a comma
bad-names=foo,bar,baz,toto,tutu,tata

# Regular expression which should only match functions or classes name which do
# not require a docstring
no-docstring-rgx=__.*__


[FORMAT]

# Maximum number of characters on a single line.
max-line-length=80

# Maximum number of lines in a module
max-module-lines=1000

# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
# tab).
indent-string='    '


[TYPECHECK]

# Tells whether missing members accessed in mixin class should be ignored. A
# mixin class is detected if its name ends with "mixin" (case insensitive).
ignore-mixin-members=yes

# List of classes names for which member attributes should not be checked
# (useful for classes with attributes dynamically set).
ignored-classes=SQLObject

# When zope mode is activated, add a predefined set of Zope acquired attributes
# to generated-members.
zope=no

# List of members which are set dynamically and missed by pylint inference
# system, and so shouldn't trigger E0201 when accessed. Python regular
# expressions are accepted.
generated-members=REQUEST,acl_users,aq_parent


[SIMILARITIES]

# Minimum lines number of a similarity.
min-similarity-lines=4

# Ignore comments when computing similarities.
ignore-comments=yes

# Ignore docstrings when computing similarities.
ignore-docstrings=yes


[VARIABLES]

# Tells whether we should check for unused import in __init__ files.
init-import=no

# A regular expression matching the beginning of the name of dummy variables
# (i.e. not used).
dummy-variables-rgx=_|dummy

# List of additional names supposed to be defined in builtins. Remember that
# you should avoid to define new builtins when possible.
additional-builtins=


[MISCELLANEOUS]

# List of note tags to take in consideration, separated by a comma.
notes=FIXME,XXX,TODO


[CLASSES]

# List of interface methods to ignore, separated by a comma. This is used for
# instance to not check methods defines in Zope's Interface base class.
ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by

# List of method names used to declare (i.e. assign) instance attributes.
defining-attr-methods=__init__,__new__,setUp

# List of valid names for the first argument in a class method.
valid-classmethod-first-arg=cls


[IMPORTS]

# Deprecated modules which should not be used, separated by a comma
deprecated-modules=regsub,string,TERMIOS,Bastion,rexec

# Create a graph of every (i.e. internal and external) dependencies in the
# given file (report RP0402 must not be disabled)
import-graph=

# Create a graph of external dependencies in the given file (report RP0402 must
# not be disabled)
ext-import-graph=

# Create a graph of internal dependencies in the given file (report RP0402 must
# not be disabled)
int-import-graph=


[DESIGN]

# Maximum number of arguments for function / method
max-args=5

# Argument names that match this expression will be ignored. Default to name
# with leading underscore
ignored-argument-names=_.*

# Maximum number of locals for function / method body
max-locals=15

# Maximum number of return / yield for function / method body
max-returns=6

# Maximum number of branch for function / method body
max-branchs=12

# Maximum number of statements in function / method body
max-statements=50

# Maximum number of parents for a class (see R0901).
max-parents=7

# Maximum number of attributes for a class (see R0902).
max-attributes=7

# Minimum number of public methods for a class (see R0903).
min-public-methods=2

# Maximum number of public methods for a class (see R0904).
max-public-methods=20


[EXCEPTIONS]

# Exceptions that will emit a warning when being caught. Defaults to
# "Exception"
overgeneral-exceptions=Exception


================================================
FILE: setup.cfg
================================================
[egg_info]
tag_build = 
tag_date = 0
tag_svn_revision = 0


================================================
FILE: setup.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
This file is the setup/install script for MASTIFF.
"""

import sys
from setuptools import setup, find_packages
from mastiff import get_release_string

if sys.version_info < (2, 6, 6):
    sys.stderr.write("Mastiff requires python version 2.6.6")
    sys.exit(1)
    
setup(
    author='Tyler Hudak',
    author_email='mastiff-project@korelogic.com',
    data_files=[('/etc/mastiff', ['mastiff.conf'])],
    description="""MASTIFF is a static analysis automation framework.""",
    install_requires=['Yapsy == 1.10, !=1.10-python3'],
    license='Apache License V2.0',
    long_description="""MASTIFF is a static analysis framework that automates the
process of extracting key characteristics from a number of different file
formats. To ensure the framework remains flexible and extensible, a
community-driven set of plug-ins is used to perform file analysis and data
extraction. While originally designed to support malware, intrusion, and
forensic analysis, the framework is well-suited to support a broader range of
analytic needs. In a nutshell, MASTIFF allows analysts to focus on analysis
rather than figuring out how to parse files.""",
    maintainer='Tyler Hudak',
    maintainer_email='mastiff-project@korelogic.com',
    name='mastiff',        
    packages=find_packages(), 
    package_data={'': ['*.py', '*.yapsy-plugin'] },
    platforms=['Linux'],
    scripts=['mas.py'],
    url='http://www.korelogic.com',
    version=get_release_string())


================================================
FILE: skeleton/OUTPUT-skel.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Output plugin skeleton code

Purpose:
  This file provides the skeleton code for a plugin that formats the data 
  generated by the analysis plug-ins. This is an example that shows all 
  functions defined.
  
  __init__(): MANDATORY: Any initialization code the plugin requires. It must
            also call the __init__ for masOutput.MastiffOutputPlugin.

activate(): OPTIONAL: Activation code called by Yapsy to activate the plugin.

deactivate(): OPTIONAL: Deactivated code called by Yapsy.

output(config, output): MANDATORY: Function that formats the data from analysis
                        plug-ins into a specific format. Receives the MASTIFF configuration
                        as the config parameter, and the pages of data in the data 
                        parameter.
"""

__version__ = "$Id: 960d687e79158fbba349a472f85ff2b75d8c9bb1 $"

import logging
import mastiff.plugins.output as masOutput

class OUTPUTSkeleton(masOutput.MastiffOutputPlugin):
    """Raw output plugin.."""

    def __init__(self):
        """Initialize the plugin."""
        masOutput.MastiffOutputPlugin.__init__(self)

    def activate(self):
        """Activate the plugin."""
        masOutput.MastiffOutputPlugin.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        masOutput.MastiffOutputPlugin.deactivate(self)

    def output(self, config, data):
        log = logging.getLogger('Mastiff.Plugins.Output.' + self.name)
        
        # see if we are enabled
        if config.get_bvar(self.name, 'enabled') is False:
            log.debug('Disabled. Exiting.')
            return True

        log.info('Writing FORMAT output.')
        
        # loop through category data
        for cats, catdata in data[data.keys()[0]].iteritems():
            catstr = '{} Category Analysis Results'.format(cats)
            log.debug('Writing {} results.'.format(cats))

        # loop through plugin data and generate the output text
        for plugin, pages in catdata.iteritems():
            # process the page data into the specific format and 
            # output it to the appropriate file/files
            
            # loop through each table in the page
            for tabledata in sorted(pages, key=lambda page: pages[2]):
                (title, mytable, index) = tabledata
                
                # process table data here
                for row in mytable:
                    # act on row data 
                    # (REMOVE THE NEXT LINE)
                    pass

            
        return True


================================================
FILE: skeleton/OUTPUT-skel.yapsy-plugin
================================================
[Core]
Name = Generic Output Skeleton Plugin
Module = OUTPUT-skel

[Documentation]
Description = Your Description Here
Author = Your Name Here
Version = 0.1
Website = Your Website Here


================================================
FILE: skeleton/analysis-ext-skel.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Analysis Plugin using external program code

Plugin Type: Generic
Purpose:
  This file provides the skeleton code for a plugin that performs static
  analysis on any file given to the Mastiff framework using an external
  program. This is an example that shows all functions defined.

Output:
   None.

In the MASTIFF configuration file, the options for this particular plug-in
would be:

[GenSkel Ext Prog]
plugcmd = /path/to/my_prog
"""

__version__ = "$Id: 042c8a566d07d74c75251d9ab7306f4a8ab71c0d $"

import subprocess
import logging
import os

# Change the following line to import the category class you for the files
# you wish to perform analysis on
import mastiff.plugins.category.generic as gen

# Change the class name and the base class
class GenSkelExt(gen.GenericCat):
    """Skeleton generic plugin that calls external program."""

    def __init__(self):
        """Initialize the plugin."""
        gen.GenericCat.__init__(self)
        self.page_data.meta['filename'] = 'CHANGEME'

    def analyze(self, config, filename):
        """
        Obtain the command and options from the config file and call the
        external program.
        """
        # make sure we are activated
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        # get my config options
        plug_opts = config.get_section(self.name)
        if plug_opts is None:
            log.error('Could not get %s options.', self.name)
            return False

        # *** plug_opts['plugcmd'] SHOULD BE CHANGED TO THE PLUGIN SPECIFIC OPTIONS

        # verify external program exists and we can call it
        if not plug_opts['plugcmd'] or \
           not os.path.isfile(plug_opts['plugcmd']) or \
           not os.access(plug_opts['plugcmd'], os.X_OK):
            log.error('%s is not accessible. Skipping.', plug_opts['plugcmd'])
            return False

        # run your external program here
        run = subprocess.Popen([plug_opts['plugcmd']] + \
                               [ filename ],
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE, 
                               close_fds=True)
        (output, error) = run.communicate()
        if error is not None and len(error) > 0:
            log.error('Error running program: %s' % error)
            return False

        self.gen_output(output)
        log.debug ('Successfully ran %s.', self.name)

        return True

    def gen_output(self, output):
        """Place the results into a Mastiff Output Page."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name)

        # self.page_data was previously initialized
        # add a table to it
        new_table = self.page_data.addTable('ANALYSIS PLUGIN DESCRIPTION')

        # parse through data generated from output here
        
        # add header to table
        # example: new_table.addHeader([('Header 1', str), ('Header 2', int)])
        
        # add rows of data to table
        # example: new_table.addRow(['row1', 1])

        return True


================================================
FILE: skeleton/analysis-ext-skel.yapsy-plugin
================================================
[Core]
Name = GenSkel Ext Prog
Module = analysis-ext-skel

[Documentation]
Description = Your Description Here
Author = Your Name Here
Version = 0.1
Website = Your Website Here


================================================
FILE: skeleton/analysis-skel.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Analysis plugin skeleton code

Plugin Type: Generic
Purpose:
  This file provides the skeleton code for a plugin that performs static
  analysis on any file given to the Mastiff framework. This is an example that
  shows all functions defined.

Output:
   None

__init__(): MANDATORY: Any initialization code the plugin requires. It must
            also call the __init__ for its category class.

activate(): OPTIONAL: Activation code called by Yapsy to activate the plugin.

deactivate(): OPTIONAL: Deactivated code called by Yapsy.

analyze(config, filename): MANDATORY: The main body of code that performs the
                           analysis on the file.

gen_output(outdir): Function that puts the data into self.page_data for the output
                             plug-ins.
"""

__version__ = "$Id: 107798be8154ef41517034e77db3b5a95dd4fe6b $"

import logging

# Change the following line to import the category class you for the files
# you wish to perform analysis on
import mastiff.plugins.category.generic as gen

# Change the class name and the base class
class GenSkeleton(gen.GenericCat):
    """Skeleton generic plugin code."""

    def __init__(self):
        """Initialize the plugin."""
        gen.GenericCat.__init__(self)
        self.page_data.meta['filename'] = 'CHANGEME'

    def activate(self):
        """Activate the plugin."""
        gen.GenericCat.activate(self)

    def deactivate(self):
        """Deactivate the plugin."""
        gen.GenericCat.deactivate(self)

    def analyze(self, config, filename):
        """Analyze the file."""

        # sanity check to make sure we can run
        if self.is_activated == False:
            return False
        log = logging.getLogger('Mastiff.Plugins.' + self.name)
        log.info('Starting execution.')

        # Add analysis code here. Data can be added to tables or passed into gen_output

        self.gen_output()
        
        return self.page_data

    def gen_output(self):
        """Place the results into a Mastiff Output Page."""
        log = logging.getLogger('Mastiff.Plugins.' + self.name)

        # self.page_data was previously initialized
        # add a table to it
        new_table = self.page_data.addTable('ANALYSIS PLUGIN DESCRIPTION')

        # add header to table
        # example: new_table.addHeader([('Header 1', str), ('Header 2', int)])
        
        # add rows of data to table
        # example: new_table.addRow(['row1', 1])

        return True


================================================
FILE: skeleton/analysis-skel.yapsy-plugin
================================================
[Core]
Name = Generic Skeleton Plugin
Module = analysis-skel

[Documentation]
Description = Your Description Here
Author = Your Name Here
Version = 0.1
Website = Your Website Here


================================================
FILE: skeleton/category-skel.py
================================================
#!/usr/bin/env python
"""
  Copyright 2012-2013 The MASTIFF Project, All Rights Reserved.

  This software, having been partly or wholly developed and/or
  sponsored by KoreLogic, Inc., is hereby released under the terms
  and conditions set forth in the project's "README.LICENSE" file.
  For a list of all contributors and sponsors, please refer to the
  project's "README.CREDITS" file.
"""

__doc__ = """
Category Skeleton Plugin

File Type: New File Type
Purpose:
  This file contains the skeleton code for a new category class to analyze
  a new file type.

Output:
   None

__init__(): MANDATORY: Any initialization code the category requires. It must
            also call the __init__ for its superclass - in this case OfficeCat.
"""

__version__ = "$Id: 64ee75c4869a530a4030a50ff7add6ab87601a11 $"

import mastiff.plugins.category.categories as categories
import mastiff.filetype as FileType

# Change the class name to identify the new file type
class SkelCat(categories.MastiffPlugin):
    """ Category class for Word documents."""

    def __init__(self, name=None):
        """Initialize the category."""
        categories.MastiffPlugin.__init__(self, name)

        # cat_name should be a one word description of the file type
        self.cat_name = 'SkelCat'
        # Add in strings from libmagic and TrID output
        self.my_types = [ 'libmagic string', 'TrID string' ]
        # Add in the Yara rule
        self.yara_filetype = """rule istype { } """

    def is_my_filetype(self, id_dict, file_name):
        """Determine if the magic string is appropriate for this category"""

        # check magic string first
        try:
            if [ type_ for type_ in self.my_types if type_ in id_dict['magic']]:
                return self.cat_name
        except:
            return None
        
        # run Yara type check
        if FileType.yara_typecheck(file_name, self.yara_filetype) is True:
            return self.cat_name

        # check TrID output, if available
        # this can likely be removed
        for (percent, desc) in id_dict['trid']:
            for type_ in self.my_types:
                # make sure percent is high enough and trid string matches
                if type_ in desc and percent > 50:
                    return self.cat_name

        # add your own code on additional file type determination here        

        return None


================================================
FILE: skeleton/category-skel.yapsy-plugin
================================================
[Core]
Name = Category Skeleton Plug-in
Module = category-skeleton

[Documentation]
Description = Your Description Here
Author = Your Name Here
Website = Your Website Here
Version = 0.1


================================================
FILE: skeleton/output-skel.yapsy-plugin
================================================
[Core]
Name = Generic Output Skeleton Plugin
Module = output-skel

[Documentation]
Description = Your Description Here
Author = Your Name Here
Version = 0.1
Website = Your Website Here


================================================
FILE: tests/import-test.sh
================================================
#!/bin/bash

# $Id: 00c702350cf2edd48c2e57517593c5bce6a64781 $
#
# Find all imports from the MASTIFF python files and ensure they can be
# imported.
#
# $1 = directory to test

if [ $# -eq 0  ] ; then
  echo "Need a directory to scan."
  exit
elif [ ! -d $1 ] ; then
  echo "$1 is not a directory."
  exit
fi

PWD=`pwd`
SAVEIFS=$IFS
IFS=$(echo -en "\n\b")

echo "Checking Python imports in $1 and below."
echo

cd $1
for FILE in `find . -name "*.py"`; do
  for IMPORT in `egrep "^\s*import\s+|^\s*from \S+ import" ${FILE} | sed -e 's/^[ \t]*//' | sort -u`; do
  ERROR=`python -c "${IMPORT}" 2>&1 | grep "ImportError" | grep -vi disitool`
  if [ $? -ne 1 ]; then
    echo ERROR: ${FILE}: ${ERROR}
  fi
done; done

cd ${PWD}
IFS=${SAVEIFS}

echo
echo "Done checking imports."
echo


================================================
FILE: tests/mastiff-test.sh
================================================
#!/bin/bash

MASCMD="python ./mas.py -c ./mastiff.conf -V "

# Test mastiff by running it against various file types.
# $1 = file type
# $2 = file to test
# $3 = outfile
mas_test()
{
    echo -n "Testing ${1}: "
    if [ ! -f $2 ] ; then
        echo "$2 missing. Unable to test."
        return 0
    fi

    ${MASCMD} ${2} > ${3} 2>&1
    if [ $? -ne 0 ] ; then
        OUTMSG="Failed. See ${3} for details."
    else
        OUTMSG="Success."
    fi
    echo $OUTMSG
}
    
echo "Checking for MASTIFF functionality."
echo

mas_test EXE tests/test.exe tests/test-EXE.txt
mas_test Office tests/test.doc tests/test-DOC.txt
mas_test PDF tests/test.pdf tests/test-PDF.txt
mas_test ZIP tests/test.zip tests/test-ZIP.txt

echo
echo "Done checking MASTIFF functionality."


================================================
FILE: utils/version2string
================================================
#!/usr/bin/perl -w
######################################################################
#
# $Id: 6c139ab440c14c954b44b9fad19f5c34154259f7 $
#
######################################################################
#
# Copyright 2008-2013 The WebJob Project, All Rights Reserved.
#
######################################################################
#
# Purpose: Convert version numbers to a string representation.
#
######################################################################

use strict;
use File::Basename;
use File::Path;
use Getopt::Std;

######################################################################
#
# Main Routine
#
######################################################################

  ####################################################################
  #
  # Punch in and go to work.
  #
  ####################################################################

  my ($sProgram);

  $sProgram = basename(__FILE__);

  ####################################################################
  #
  # Validation expressions.
  #
  ####################################################################

  my $sVersionRegex  = qq(0x[0-9A-Fa-f]{8});
  my $sTypeRegex     = qq((?:cvs|program|tar));

  ####################################################################
  #
  # Get Options.
  #
  ####################################################################

  my (%hOptions);

  if (!getopts('t:v:', \%hOptions))
  {
    Usage($sProgram);
  }

  ####################################################################
  #
  # A type, '-t', is optional.
  #
  ####################################################################

  my $sType;

  $sType = (exists($hOptions{'t'})) ? $hOptions{'t'} : "program";

  if (defined($sType) && $sType !~ /^$sTypeRegex$/)
  {
    print STDERR "$sProgram: Type='$sType' Error='Invalid version type.'\n";
    exit(2);
  }

  ####################################################################
  #
  # A version, '-v', is required.
  #
  ####################################################################

  my $sVersion = (exists($hOptions{'v'})) ? $hOptions{'v'} : undef;

  if (!defined($sVersion))
  {
    Usage($sProgram);
  }

  if ($sVersion !~ /^$sVersionRegex$/)
  {
    print STDERR "$sProgram: Version='$sVersion' Error='Invalid version.'\n";
    exit(2);
  }

  ####################################################################
  #
  # If any arguments remain, it's an error.
  #
  ####################################################################

  if (scalar(@ARGV) > 0)
  {
    Usage($sProgram);
  }

  ####################################################################
  #
  # Do some work.
  #
  ####################################################################

  print VersionToString(hex($sVersion), $sType), "\n";

  1;

######################################################################
#
# VersionToString
#
######################################################################

sub VersionToString
{
  my ($sVersion, $sType) = @_;

  my $sState = ($sVersion >> 10) & 0x03;
  my $sStateString = "xx";
  if ($sState == 0)
  {
    $sStateString = "ds";
  }
  elsif ($sState == 1)
  {
    $sStateString = "rc";
  }
  elsif ($sState == 2)
  {
    $sStateString = "sr";
  }
  elsif ($sState == 3)
  {
    $sStateString = "xs";
  }

  my $sString = "";
  if (($sVersion & 0xfff) == 0x800)
  {
    if ($sType =~ /^cvs$/)
    {
      $sString = sprintf
      (
        "V%d_%d_%d",
        ($sVersion >> 28) & 0x0f,
        ($sVersion >> 20) & 0xff,
        ($sVersion >> 12) & 0xff
      );
    }
    elsif ($sType =~ /^tar$/)
    {
      $sString = sprintf
      (
        "%d.%d.%d",
        ($sVersion >> 28) & 0x0f,
        ($sVersion >> 20) & 0xff,
        ($sVersion >> 12) & 0xff
      );
    }
    elsif ($sType =~ /^program$/)
    {
      $sString = sprintf
      (
        "%d.%d.%d",
        ($sVersion >> 28) & 0x0f,
        ($sVersion >> 20) & 0xff,
        ($sVersion >> 12) & 0xff
      );
    }
  }
  else
  {
    if ($sType =~ /^cvs$/)
    {
      $sString = sprintf
      (
        "V%d_%d_%d_%s%d",
        ($sVersion >> 28) & 0x0f,
        ($sVersion >> 20) & 0xff,
        ($sVersion >> 12) & 0xff,
        uc($sStateString),
        $sVersion & 0x3ff
      );
    }
    elsif ($sType =~ /^tar$/)
    {
      $sString = sprintf
      (
        "%d.%d.%d.%s%d",
        ($sVersion >> 28) & 0x0f,
        ($sVersion >> 20) & 0xff,
        ($sVersion >> 12) & 0xff,
        $sStateString,
        $sVersion & 0x3ff
      );
    }
    elsif ($sType =~ /^program$/)
    {
      $sString = sprintf
      (
        "%d.%d.%d (%s%d)",
        ($sVersion >> 28) & 0x0f,
        ($sVersion >> 20) & 0xff,
        ($sVersion >> 12) & 0xff,
        $sStateString,
        $sVersion & 0x3ff
      );
    }
  }

  return $sString;
}


######################################################################
#
# Usage
#
######################################################################

sub Usage
{
  my ($sProgram) = @_;
  print STDERR "\n";
  print STDERR "Usage: $sProgram [-t {cvs|program|tar}] -v version\n";
  print STDERR "\n";
  exit(1);
}


================================================
FILE: utils/version_helper
================================================
#!/usr/bin/perl -w
######################################################################
#
# $Id: 40c3c9381e39f6934a485d3cde86765789e61f42 $
#
######################################################################
#
# Copyright 2006-2013 The WebJob Project, All Rights Reserved.
#
######################################################################
#
# Purpose: Manage version numbers.
#
######################################################################

use strict;
use File::Basename;
use File::Path;
use Getopt::Std;

######################################################################
#
# Main Routine
#
######################################################################

  ####################################################################
  #
  # Punch in and go to work.
  #
  ####################################################################

  my ($sProgram);

  $sProgram = basename(__FILE__);

  ####################################################################
  #
  # Validation expressions.
  #
  ####################################################################

  my $sBuildNumberRegex = qq((?:\\d+|[+]));
  my $sMajorNumberRegex = qq((?:\\d+|[+]));
  my $sMinorNumberRegex = qq((?:\\d+|[+]));
  my $sPatchNumberRegex = qq((?:\\d+|[+]));
  my $sStateNumberRegex = qq((?:[0-3+]|[dx]s|rc|sr));

  ####################################################################
  #
  # Get Options.
  #
  ####################################################################

  my (%hOptions);

  if (!getopts('b:f:M:m:p:s:', \%hOptions))
  {
    Usage($sProgram);
  }

  ####################################################################
  #
  # A filename is required, and can be '-' or a regular file.
  #
  ####################################################################

  my ($sFileHandle, $sFilename);

  if (!exists($hOptions{'f'}))
  {
    Usage($sProgram);
  }
  else
  {
    $sFilename = $hOptions{'f'};
    if (!defined($sFilename) || length($sFilename) < 1)
    {
      Usage($sProgram);
    }
    if (-f $sFilename)
    {
      if (!open(FH, "< $sFilename"))
      {
        print STDERR "$sProgram: File='$sFilename' Error='$!'\n";
        exit(2);
      }
      $sFileHandle = \*FH;
    }
    else
    {
      if ($sFilename ne '-')
      {
        print STDERR "$sProgram: File='$sFilename' Error='File must be regular.'\n";
        exit(2);
      }
      $sFileHandle = \*STDIN;
    }
  }

  ####################################################################
  #
  # A MajorNumber, '-M', is optional.
  #
  ####################################################################

  my $sMajorNumber;

  $sMajorNumber = (exists($hOptions{'M'})) ? $hOptions{'M'} : undef;

  if (defined($sMajorNumber) && $sMajorNumber !~ /^$sMajorNumberRegex$/)
  {
    print STDERR "$sProgram: MajorNumber='$sMajorNumber' Error='Invalid major number.'\n";
    exit(2);
  }

  ####################################################################
  #
  # A MinorNumber, '-m', is optional.
  #
  ####################################################################

  my $sMinorNumber;

  $sMinorNumber = (exists($hOptions{'m'})) ? $hOptions{'m'} : undef;

  if (defined($sMinorNumber) && $sMinorNumber !~ /^$sMinorNumberRegex$/)
  {
    print STDERR "$sProgram: MinorNumber='$sMinorNumber' Error='Invalid minor number.'\n";
    exit(2);
  }

  ####################################################################
  #
  # An PatchNumber, '-p', is optional.
  #
  ####################################################################

  my $sPatchNumber;

  $sPatchNumber = (exists($hOptions{'p'})) ? $hOptions{'p'} : undef;

  if (defined($sPatchNumber) && $sPatchNumber !~ /^$sPatchNumberRegex$/)
  {
    print STDERR "$sProgram: PatchNumber='$sPatchNumber' Error='Invalid patch number.'\n";
    exit(2);
  }

  ####################################################################
  #
  # A StateNumber, '-s', is optional.
  #
  ####################################################################

  my $sStateNumber;

  $sStateNumber = (exists($hOptions{'s'})) ? $hOptions{'s'} : undef;

  if (defined($sStateNumber) && $sStateNumber !~ /^$sStateNumberRegex$/)
  {
    print STDERR "$sProgram: StateNumber='$sStateNumber' Error='Invalid state number.'\n";
    exit(2);
  }
  if (defined($sStateNumber) && $sStateNumber eq "ds")
  {
    $sStateNumber = 0;
  }
  elsif (defined($sStateNumber) && $sStateNumber eq "rc")
  {
    $sStateNumber = 1;
  }
  elsif (defined($sStateNumber) && $sStateNumber eq "sr")
  {
    $sStateNumber = 2;
  }
  elsif (defined($sStateNumber) && $sStateNumber eq "xs")
  {
    $sStateNumber = 3;
  }

  ####################################################################
  #
  # A BuildNumber, '-b', is optional.
  #
  ####################################################################

  my $sBuildNumber;

  $sBuildNumber = (exists($hOptions{'b'})) ? $hOptions{'b'} : undef;

  if (defined($sBuildNumber) && $sBuildNumber !~ /^$sBuildNumberRegex$/)
  {
    print STDERR "$sProgram: BuildNumber='$sBuildNumber' Error='Invalid build number.'\n";
    exit(2);
  }

  ####################################################################
  #
  # If any arguments remain, it's an error.
  #
  ####################################################################

  if (scalar(@ARGV) > 0)
  {
    Usage($sProgram);
  }

  ####################################################################
  #
  # Attempt to locate/identify the current version number.
  #
  ####################################################################

  my ($sOldVersion, $sVersionFmt);

  while (my $sLine = <$sFileHandle>)
  {
    if ($sLine =~ /^#define VERSION (0x[0-9A-Fa-f]{8})\s*/)
    {
      $sOldVersion = hex($1);
      $sVersionFmt = "define";
      last;
    }
    elsif ($sLine =~ /^\s*(0x[0-9A-Fa-f]{8})\s*$/)
    {
      $sOldVersion = hex($1);
      $sVersionFmt = "string";
      last;
    }
    elsif ($sLine =~ /^\s*(?:version\s+=\s+)?(0x[0-9A-Fa-f]{8})\s*$/)
    {
      $sOldVersion = hex($1);
      $sVersionFmt = "assign";
      last;
    }
    else
    {
      next;
    }
  }
  close($sFileHandle);

  if (!defined($sOldVersion))
  {
    print STDERR "$sProgram: Error='Failed to locate/identify current version number.'\n";
    exit(2);
  }

  if (!defined($sVersionFmt))
  {
    print STDERR "$sProgram: Error='Failed to determine version format.'\n";
    exit(2);
  }

  ####################################################################
  #
  # Compute the new version number.
  #
  ####################################################################

  my ($sNewVersion);

  $sNewVersion = $sOldVersion;

  if (defined($sMajorNumber))
  {
    if ($sMajorNumber =~ /^\+$/)
    {
      $sNewVersion += 0x10000000;
      $sNewVersion &= 0xf0000000;
    }
    else
    {
      if ($sMajorNumber < 0 || $sMajorNumber > 15)
      {
        print STDERR "$sProgram: MajorNumber='$sMajorNumber' Error='Invalid major number.'\n";
        exit(2);
      }
      $sNewVersion = (($sMajorNumber & 0xf) << 28) + ($sNewVersion & 0x0fffffff);
    }
  }

  if (defined($sMinorNumber))
  {
    if ($sMinorNumber =~ /^\+$/)
    {
      $sNewVersion += 0x00100000;
      $sNewVersion &= 0xfff00000;
    }
    else
    {
      if ($sMinorNumber < 0 || $sMinorNumber > 255)
      {
        print STDERR "$sProgram: MinorNumber='$sMinorNumber' Error='Invalid minor number.'\n";
        exit(2);
      }
      $sNewVersion = (($sMinorNumber & 0xff) << 20) + ($sNewVersion & 0xf00fffff);
    }
  }

  if (defined($sPatchNumber))
  {
    if ($sPatchNumber =~ /^\+$/)
    {
      $sNewVersion += 0x00001000;
      $sNewVersion &= 0xfffff000;
    }
    else
    {
      if ($sPatchNumber < 0 || $sPatchNumber > 255)
      {
        print STDERR "$sProgram: PatchNumber='$sPatchNumber' Error='Invalid patch number.'\n";
        exit(2);
      }
      $sNewVersion = (($sPatchNumber & 0xff) << 12) + ($sNewVersion & 0xfff00fff);
    }
  }

  if (defined($sStateNumber))
  {
    if ($sStateNumber =~ /^\+$/)
    {
      $sNewVersion += 0x00000400;
      $sNewVersion &= 0xfffffc00;
    }
    else
    {
      if ($sStateNumber < 0 || $sStateNumber > 255)
      {
        print STDERR "$sProgram: StateNumber='$sStateNumber' Error='Invalid state number.'\n";
        exit(2);
      }
      $sNewVersion = (($sStateNumber & 0x3) << 10) + ($sNewVersion & 0xfffff3ff);
    }
  }

  if (defined($sBuildNumber))
  {
    if ($sBuildNumber =~ /^\+$/)
    {
      $sNewVersion += 0x00000001;
    }
    else
    {
      if ($sBuildNumber < 0 || $sBuildNumber > 255)
      {
        print STDERR "$sProgram: BuildNumber='$sBuildNumber' Error='Invalid build number.'\n";
        exit(2);
      }
      $sNewVersion = ($sBuildNumber & 0x3ff) + ($sNewVersion & 0xfffffc00);
    }
  }

  ####################################################################
  #
  # Generate update/commit/tag commands the user can run manually.
  #
  ####################################################################

  my $sOldVersionString = VersionToString($sOldVersion, "tar");
  my $sNewVersionString = VersionToString($sNewVersion, "tar");
  my $so = sprintf("0x%08x", $sOldVersion);
  my $sn = sprintf("0x%08x", $sNewVersion);
  my $sCommand = "perl -p -i.bak ";
  if ($sVersionFmt eq "macro")
  {
    $sCommand .= " -e 's/define VERSION $so/define VERSION $sn/g;' $sFilename";
  }
  else
  {
    $sCommand .= " -e 's/$so/$sn/g;' $sFilename";
  }
  print $sCommand, "\n";
  $sCommand = "cvs commit -m \"Updated version number ($sOldVersionString --> $sNewVersionString).\"";
  print $sCommand, "\n";
  $sCommand = "cvs tag " . VersionToString($sNewVersion, "vcs");
  print $sCommand, "\n";
  if (((($sNewVersion >> 10) & 0x03) == 2) && (($sNewVersion & 0x3ff) == 0))
  {
    $sCommand = "cvs tag " . VersionToString($sNewVersion, "vcs_sr0");
    print $sCommand, "\n";
  }

  1;

######################################################################
#
# VersionToString
#
######################################################################

sub VersionToString
{
  my ($sVersion, $sType) = @_;

  my $sState = ($sVersion >> 10) & 0x03;
  my $sStateString = "xx";
  if ($sState == 0)
  {
    $sStateString = "ds";
  }
  elsif ($sState == 1)
  {
    $sStateString = "rc";
  }
  elsif ($sState == 2)
  {
    $sStateString = "sr";
  }
  elsif ($sState == 3)
  {
    $sStateString = "xs";
  }

  my $sString = "";
  if ($sType =~ /^vcs$/)
  {
    $sString = sprintf
    (
      "V%d_%d_%d_%s%d",
      ($sVersion >> 28) & 0x0f,
      ($sVersion >> 20) & 0xff,
      ($sVersion >> 12) & 0xff,
      uc($sStateString),
      $sVersion & 0x3ff
    );
  }
  elsif ($sType =~ /^vcs_sr0$/)
  {
    $sString = sprintf
    (
      "V%d_%d_%d",
      ($sVersion >> 28) & 0x0f,
      ($sVersion >> 20) & 0xff,
      ($sVersion >> 12) & 0xff
    );
  }
  elsif ($sType =~ /^tar$/)
  {
    $sString = sprintf
    (
      "%d.%d.%d.%s%d",
      ($sVersion >> 28) & 0x0f,
      ($sVersion >> 20) & 0xff,
      ($sVersion >> 12) & 0xff,
      $sStateString,
      $sVersion & 0x3ff
    );
  }
  elsif ($sType =~ /^program$/)
  {
    $sString = sprintf
    (
      "%d.%d.%d (%s%d)",
      ($sVersion >> 28) & 0x0f,
      ($sVersion >> 20) & 0xff,
      ($sVersion >> 12) & 0xff,
      $sStateString,
      $sVersion & 0x3ff
    );
  }

  return $sString;
}


######################################################################
#
# Usage
#
######################################################################

sub Usage
{
  my ($sProgram) = @_;
  print STDERR "\n";
  print STDERR "Usage: $sProgram [-M major] [-m minor] [-p patch] [-s state] [-b build] -f {file|-}\n";
  print STDERR "\n";
  exit(1);
}