"
elif type(row[colid]).__name__ != 'NoneType':
message = row[colid].encode("utf-8")
try:
message = message.replace('\n', ' ') # Python 2
except:
message = message.decode().replace('\n', ' ') # Python 3
output += "| {message:{fill}{align}{width}}".format(message=message[:size[colid]], fill=" ", align='<', width=size[colid])
output += "|\n"
output += "-" * self.settings['output_width'] + "\n"
return output
def print_text_bottom_row(self):
"""Print the last bottom row of a txt output"""
return "\n"
def print_csv_top_row(self, columns):
"""Print the first row of the csv table (and then print_csv_row will be used)"""
output = ""
if isinstance(columns, list):
output = ",".join(columns) + "\n"
return output
def print_csv_row(self, results):
"""Print a row of the table (previously print_text_top_row was used and finally print_text_bottom_row will be used used)"""
output = ""
if isinstance(results, list):
for result in results:
for row in result:
for colid in range(0, len(row)):
if type(row[colid]).__name__ in ['int', 'NoneType']:
message = str(row[colid])
else:
message = (row[colid]).encode("utf-8")
if colid != 0:
output += ","
try:
output += message # Python 2
except:
output += message.decode() # Python 3
output += "\n"
return output
def print_xml_row(self, title, column, results):
"""Print a row of the table (previously print_text_top_row was used and finally print_text_bottom_row will be used used)"""
output = ""
if isinstance(title, str) and isinstance(column, list) and isinstance(results, list):
output = "\t<" + "".join(ch for ch in title if ch.isalnum()) + ">\n"
for result in results:
for row in result:
column_id = 0
for item in row:
output += "\t\t<" + str(compat.escape(column[column_id])) + ">" + str(compat.escape(item)) + "" + str(compat.escape(column[column_id])) + ">\n"
column_id += 1
output += "\n"
output += "\t" + "".join(ch for ch in title if ch.isalnum()) + ">\n"
return output
def print_html_top_row(self, title, columns):
"""Print the first row of the HTML table (and then print_html_row will be used)"""
output = ""
if isinstance(title, str) and isinstance(columns, list):
output = """
"""
for column in columns:
output += "| " + str(compat.escape(column)) + " | "
output += "
\n"
return output
def print_html_row(self, results):
"""Print a row of the table (previously print_html_top_row was used and finally print_html_bottom_row will be used used)"""
output = ""
if isinstance(results, list):
cont = 1
for result in results:
if cont % 2 == 0:
trclass = " class='gray'"
else:
trclass = ""
for row in result:
output += " "
for item in row:
output += "" + str(compat.escape(str(item)).encode('ascii', 'xmlcharrefreplace')) + " | "
output += "
\n"
cont += 1
return output
def print_html_bottom_row(self, title):
"""Print the first row of the HTML table (and then print_html_row will be used)"""
output = "
\n"
if isinstance(title, str) and title.find("Analyze") != -1 and self.toggle_table:
output += "\n"
return output
def set_toggle_table(self, toggle):
"""Set a boolean flag to activate/deactivate if a table will be shown in HTML"""
self.toggle_table = bool(toggle)
def pre_general(self, output):
"""Print any previous code or perform tasks required before printing any table"""
contents = ""
if output == "xml":
contents = "\n"
elif output == "html":
contents = """
Fuzzer Results for """ + str(compat.escape(self.settings['db_file'])) + """
"""
if "output_file" in self.settings:
self.write_file(self.settings['output_file'], 'w+', contents)
else:
print(contents)
def post_general(self, output):
"""Print any post code required before wrapping up"""
contents = ""
if output == "xml":
contents = ""
elif output == "html":
contents = " \n"
if "output_file" in self.settings:
self.write_file(self.settings['output_file'], 'a+', contents)
else:
print(contents)
def general(self, output, title, columns, rows):
"""Main function to dump stuff: from here, you can export in different formats (txt, csv, xml, html) to the screen or files"""
if not rows:
return
contents = ""
title = title + " (" + str(len(rows)) + " rows)"
if output is None:
return
elif output == "txt":
contents = self.print_text_top_row(title, columns)
contents += self.print_text_row(columns, rows)
contents += self.print_text_bottom_row()
elif output == "csv":
contents = self.print_csv_top_row(columns)
contents += self.print_csv_row(rows)
elif output == "xml":
contents += self.print_xml_row(title, columns, rows)
elif output == "html":
contents += self.print_html_top_row(title, columns)
contents += self.print_html_row(rows)
contents += self.print_html_bottom_row(title)
else:
self.settings['logger'].error("Incorrect output selected")
if output in ["txt", "csv", "xml", "html"] and contents:
if "output_file" in self.settings and self.settings['output_file'] is not None:
self.write_file(self.settings['output_file'], 'a+', contents)
else:
print(contents)
def write_file(self, output_file, mode, content):
"""Write the content into a file"""
if content:
try:
target = open(output_file, mode)
target.write(content)
target.close()
except:
self.settings['logger'].error("Could not write in file '%s'.", output_file)
================================================
FILE: classes/execute.py
================================================
#
# Copyright (C) 2018 Fernando Arnaboldi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
import os
import signal
import subprocess
import threading
import time
import compat
class Execute(object):
"""Thread being executed by Fuzzer"""
def __init__(self, settings, piece, testcase):
self.kill_status = None
self.settings = settings
self.results = {}
self.t = threading.Thread(target=self.run_subprocess, args=(piece, testcase))
self.t.start()
self.deleteme = testcase['data']
def join(self):
"""Join the results to the thread"""
try:
self.t.join()
except:
pass
def get_output(self):
"""Delete the file as part of getting the output"""
if self.deleteme and os.path.isfile(self.deleteme[0]['datafile'][1]):
os.remove(self.deleteme[0]['datafile'][1])
return self.results
def kill_process(self, process):
"""After the defined timeout, try to kill the process"""
self.kill_status = self.settings['kill_status']['requested']
if process.poll() is None: # don't send the signal unless it seems it is necessary
try:
# Unix
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
# Windows/Unix
# process.kill()
self.kill_status = self.settings['kill_status']['killed']
except OSError: # ignore
self.kill_status = self.settings['kill_status']['not_killed']
self.settings['logger'].debug("Killed process status: %s" % str(self.kill_status))
def run_subprocess(self, piece, testcase):
"""Obtain the stdout and stderr when executing a piece of software using subprocess"""
self.settings['logger'].debug("Input received: " + str(testcase))
stdout = stderr = elapsed = returncode = ""
self.kill_status = self.settings['kill_status']['not_killed']
start_test = time.time()
if "execute" in piece:
try:
if 'stdin' in testcase:
# Unix
p = subprocess.Popen(testcase['execute'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid)
# Windows/Unix
# p = subprocess.Popen(testcase['execute'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
else:
# Unix
p = subprocess.Popen(testcase['execute'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid)
# Windows/Unix
# p = subprocess.Popen(testcase['execute'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
t = threading.Timer(self.settings['timeout'], self.kill_process, [p])
t.start()
if 'stdin' in testcase:
stdout, stderr = p.communicate(input=testcase['stdin'])
else:
stdout, stderr = p.communicate()
t.cancel()
returncode = p.returncode
stdout = compat.unicode(stdout.strip(), errors='ignore')
stderr = compat.unicode(stderr.strip(), errors='ignore')
stdout, stderr = self.analyze_results(stdout, stderr)
except OSError:
stderr = "Exception: OSErrorException"
except KeyboardInterrupt:
stderr = "Exception: KeyboardInterruptException"
except Exception as e:
stderr = "Exception: " + str(e)
elapsed = str(round(time.time() - start_test, 4))
self.results = {"softwareid": piece['softwareid'], "testcaseid": testcase['testcaseid'], "stdout": stdout, "stderr": stderr, "network": None, "returncode": returncode, "elapsed": elapsed, "kill_status": self.kill_status}
self.settings['logger'].debug("Output produced: " + str(self.results))
def analyze_results(self, stdout, stderr):
"""Save full results for certain specific special strings"""
if 'soft_bypass' in self.settings:
full = False
if any([x in stdout for x in self.settings['soft_bypass']]):
full = True
elif any([x in stderr for x in self.settings['soft_bypass']]):
full = True
if not full:
stdout = stdout[:self.settings['soft_limit']]
stderr = stderr[:self.settings['soft_limit']]
if 'hard_limit' in self.settings:
stdout = stdout[:self.settings['hard_limit']]
stderr = stderr[:self.settings['hard_limit']]
if 'hard_limit_lines' in self.settings:
stdout = stdout.split("\n")[0]
return stdout, stderr
================================================
FILE: classes/fuzzer.py
================================================
#
# Copyright (C) 2018 Fernando Arnaboldi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
import os
import random
import string
import subprocess
import sys
import tempfile
import compat
from distutils.spawn import find_executable
from .execute import Execute
class Fuzzer(object):
"""Executes fuzzing threads"""
def __init__(self, settings, ids):
self.settings = settings
self.ids = ids
def chdir_tmp(self):
"""Change to the temporary directory"""
status = False
try:
os.chdir(self.settings['tmp_dir']) # it is safer to operate somewhere else
status = True
except Exception as e:
self.settings['logger'].error("It wasn't possible to change to the ram disk directory (%s). Instructions to mount it: %s\nError: %s" % (self.settings['tmp_dir'], self.settings['tmp_dir_howto'], e))
return status
def fuzz(self, tests):
"""Executes something in all the different pieces of software"""
process = [] # info to be return and saved in the database
# go through each test
for test in tests:
for piece in self.settings['software']:
input = self.get_input(piece, test)
try:
process.append(Execute(self.settings, piece, input))
except Exception:
self.settings['logger'].critical("Error when trying to append a new process, try using less parallel threads. Just in case, check also if there are too many processes running in the background.")
sys.exit()
for x in range(0, len(process)):
process[x].join()
for x in range(0, len(process)):
process[x] = process[x].get_output()
# save the network results
if self.ids:
for x in range(0, len(self.ids)):
for z in range(0, len(process)):
if process[z]['testcaseid'] == self.ids[x][0] and process[z]['softwareid'] == self.ids[x][1]:
process[z]['network'] = self.ids[x][2]
if self.ids[x][3] != None:
process[z]['stdout'] = self.ids[x][3]
if self.ids[x][4] != None:
process[z]['elapsed'] = self.ids[x][4]
if self.ids[x][5] != None:
process[z]['stderr'] = self.ids[x][5]
break
self.ids = []
self.settings['logger'].debug("Process: %s" % str(process))
return process
def get_input(self, piece, test):
"""Based on how the type, suffix and fuzzdata that were defined in the piece of software,
create a valid input file, url or as part of the CLI for the test"""
input = {}
input['testcaseid'] = test[0]
input['execute'] = []
input['data'] = []
# default values
data = ""
typeid = 0
for arg in piece['execute']:
if arg.startswith("-fuzzdata="):
randomstring = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase) for _ in range(10))
data = compat.unicode(arg[len("-fuzzdata="):])
data = data.replace("[[test]]", test[1])
data = data.replace("canaryhost", self.settings['canaryhost'])
data = data.replace("[[softwareid]]", str(piece['softwareid']))
data = data.replace("[[randomstring]]", randomstring)
data = data.replace("[[testcaseid]]", str(input['testcaseid']))
input_type = piece['type'][typeid].lower()
if input_type in ['file', 'url']:
if 'suffix' not in piece:
piece['suffix'] = []
for suffixid in xrange(0, len(piece['type'])):
piece['suffix'].append("")
if 'filename' in piece and piece['filename'][0]:
fileid = os.open(piece['filename'][typeid], os.O_RDWR|os.O_CREAT)
datafile = []
datafile.append(fileid)
datafile.append(piece['filename'][typeid])
else:
datafile = tempfile.mkstemp(suffix=piece['suffix'][typeid], prefix=self.settings['tmp_prefix'] + str(test[0]) + "_", dir=self.settings['tmp_dir'])
input['data'].append({"data": data, "datafile": datafile})
if input_type == "file":
input['execute'].append(datafile[1])
elif input_type == "url":
input['execute'].append("http://" + self.settings['canaryhost'] + "/" + os.path.basename(datafile[1]))
elif input_type == 'stdin':
input['stdin'] = data
else:
input['execute'].append(data) # cli
typeid += 1
else:
input['execute'].append(arg)
for id in xrange(0, len(input['data'])):
for id2 in xrange(0, len(input['data'])):
input['data'][id]['data'] = input['data'][id]['data'].replace("[[file" + str(id2) + "]]", os.path.basename(input['data'][id2]['datafile'][1]))
if 'canaryhost' in self.settings:
input['data'][id]['data'] = input['data'][id]['data'].replace("[[url" + str(id2) + "]]", "http://" + self.settings['canaryhost'] + "/" + os.path.basename(input['data'][id2]['datafile'][1]))
os.write(input['data'][id]['datafile'][0], input['data'][id]['data'].encode('utf8'))
os.close(input['data'][id]['datafile'][0])
return input
def generate_tests(self, latest_id, limit):
"""Generate random tests using functions as an input and values as random entry points"""
if 'generate_tests' not in self.settings:
self.settings["logger"].error("Generate test option not defined")
elif self.settings['generate_tests'] > 5 or self.settings['generate_tests'] < 0:
self.settings["logger"].error("Option for random tests not available")
elif not isinstance(latest_id, int):
self.settings["logger"].error("The latest id should be an int")
elif not isinstance(limit, int):
self.settings["logger"].error("The limit should be an int")
else:
values = self.settings['db'].get_values()
if not values:
self.settings["logger"].error("No values detected, you require at least 1 value in the table 'value'. For example: ./xdiff_dbaction.py -d %s -t value -i canaryfile", self.settings['db_file'])
else:
functions = self.settings['db'].get_functions()
if not functions:
self.settings["logger"].error("No functions detected, you require at least 1 value in the table 'function'. For example: ./xdiff_dbaction.py -d %s -t function -i [[test]]", self.settings['db_file'])
else:
self.settings['logger'].info("Testcases being generated")
count = 0
while count < (limit * self.settings['generate_multiplier']): # add more tests than necessary
for value in values:
stdout = [] # where the new random values will be stored
if self.settings['generate_tests'] in [0, 1, 2, 3]: # radamsa
if not find_executable("radamsa"):
self.settings["logger"].error("Radamsa not found within PATH")
sys.exit()
input_value = tempfile.mkstemp(suffix="File", prefix=self.settings['tmp_prefix'] + "mutate_", dir=self.settings['tmp_dir'])
if self.settings['generate_tests'] in [0, 2]: # add a newline to speed up the generation process
os.write(input_value[0], value[0] + "\n")
repeat = 1
input_count = limit
else:
os.write(input_value[0], value[0])
repeat = limit
input_count = 1
os.close(input_value[0])
for x in range(0, repeat):
stdout.append(self.execute_shell("radamsa -n " + str(input_count) + " " + input_value[1]))
os.unlink(input_value[1])
if self.settings['generate_tests'] in [0, 1, 4, 5]: # zzuf
if not find_executable("zzuf"):
self.settings["logger"].error("Zzuf not found within PATH")
sys.exit()
input_value = tempfile.mkstemp(suffix="File", prefix=self.settings['tmp_prefix'] + "mutate_", dir=self.settings['tmp_dir'])
if self.settings['generate_tests'] in [0, 4]: # add a newline to speed up the generation process
os.write(input_value[0], "\n".join([value[0]] * limit))
repeat = 1
else:
os.write(input_value[0], value[0])
repeat = limit
os.close(input_value[0])
for x in range(0, repeat):
stdout.append(self.execute_shell("zzuf -r" + str(random.uniform(0.01, 0.03)) + " -s" + str(latest_id + repeat + x) + " <" + input_value[1])) # zzuf -s 1.
#
import ctypes
import os.path
import shutil
import socket
import subprocess
import sys
try:
from urllib2 import urlopen # python 2
from urllib2 import HTTPError
from urllib2 import URLError
except ImportError:
from urllib.request import urlopen # python 3
from urllib.error import HTTPError
from urllib.error import URLError
class Monitor(object):
"""Checks that everything is looking good before the fuzzer stats, and while the fuzzer operates"""
def __init__(self, settings):
"""Execute all the checks within this class to verify that canarys have been properly set up in the testcases"""
self.settings = settings
def check_once(self):
"""Check only once"""
self.check_canary_references(self.settings['canaryfile'])
self.check_canary_references("canaryhost")
self.check_canary_web(self.settings['canaryhost'], self.settings['canaryfile'], self.settings['canaryfileremote'])
self.check_canary_command(self.settings['canaryexec'], self.settings['canaryexectoken'])
self.check_ulimit()
self.check()
return None
def check(self):
"""Check on each loop the canary file and the free space"""
self.remove_stuff()
status = self.check_canary_file(self.settings['tmp_dir'] + self.settings['canaryfile'], self.settings['canaryfiletoken'])
status += self.check_free_space()
return status
def remove_stuff(self):
"""Remove files that may affect the behaviour"""
# delete specific files
if sys.platform == "linux2":
try:
os.remove(os.getenv("HOME") + '.hhvm.hhbc') # hhvm may fill up the disk with temp stuff
except:
pass
# delete all tmp_dir files
for root, dirs, files in os.walk(self.settings['tmp_dir']):
for f in files:
try:
if os.path.isfile(os.path.join(root, f)):
os.unlink(os.path.join(root, f))
except:
pass
for d in dirs:
try:
if os.path.isdir(os.path.join(root, d)):
shutil.rmtree(os.path.join(root, d))
except:
pass
def check_canary_file(self, filename, token):
"""Check if the file exists and its contents are equal to the token"""
status = None
if not isinstance(filename, str):
self.settings['logger'].error("Filename is not a string")
elif not isinstance(token, str):
self.settings['logger'].error("Token is not a string")
else:
if os.path.isfile(filename):
try:
token_file = open(filename, 'r')
except:
self.settings['logger'].debug("CanaryFile could not be open, changing its permissions")
os.chmod(filename, 0o644)
token_file = open(filename, 'r')
tmptoken = token_file.read().strip()
token_file.close()
if tmptoken == token:
return 1
else:
self.settings['logger'].debug("CanaryFile token differs, creating a new one")
else:
self.settings['logger'].debug("CanaryFile %s not found, creating a new one", str(filename))
status = self.create_canary_file(filename, token)
return status
def create_canary_file(self, filename, token):
"""Create a text file with a certain token"""
status = None
if not isinstance(filename, str):
self.settings['logger'].error("Filename is not a string")
elif not isinstance(token, str):
self.settings['logger'].error("Token is not a string")
else:
canary_file = open(filename, 'w')
canary_file.write(token)
canary_file.close()
self.settings['logger'].debug("CanaryFile created")
status = True
return status
def check_canary_web(self, hostname, filename, token):
"""Check if the hostname exists, that is possible to retrieve the filename and the contents are equal to the token"""
status = None
if not isinstance(hostname, str):
self.settings['logger'].error("Hostname is not a string")
elif not isinstance(filename, str):
self.settings['logger'].error("Filename is not a string")
elif not isinstance(token, str):
self.settings['logger'].error("Token is not a string")
else:
url = "http://" + hostname + "/" + filename + "?monitor"
try:
response = urlopen("http://" + hostname + "/" + filename + "?monitor", timeout=5)
data = response.read().strip()
if data == token:
status = True
else:
self.settings['logger'].warning("CanaryWeb token mismatch: expected %s and received %s", token, data)
status = False
except socket.error:
self.settings['logger'].warning("CanaryWeb Hostname %s not found", str(hostname))
status = False
except HTTPError:
self.settings['logger'].warning("CanaryWeb Filename %s not found: %s", str(filename), url)
status = False
except URLError:
self.settings['logger'].warning("CanaryWeb may not work, network is unreachable")
status = False
return status
def check_canary_command(self, command, token):
"""Check that the command can be executed and returns the expected token"""
stdout = None
found = None
try:
stdout, stderr = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
except Exception as e:
self.settings['logger'].warning("CanaryCommand %s not found: %s", str(command), str(e))
if stdout:
found = True
if token not in stdout.strip():
self.settings['logger'].warning("CanaryCommand token (%s) differs: '%s'", token, str(stdout.strip()))
found = False
return found
def check_canary_references(self, reference):
"""Check if the reference is on any of the testcases of the database"""
found = 1
if self.settings['db'].count_reference(reference) == 0:
self.settings['logger'].warning("CanaryReferences were not found in the db for the string: %s", str(reference))
found = 0
return found
def check_free_space(self):
"""Check if the there are more than Xmb free"""
if sys.platform == "win32":
free_bytes = ctypes.c_ulong(0)
ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p("."), None, None, ctypes.pointer(free_bytes))
free_mb = free_bytes.value / 1024 / 1024
else:
stat = os.statvfs('.')
free_mb = stat.f_bfree * stat.f_frsize / 1024 / 1024
if free_mb <= self.settings['lowerlimit']:
self.settings['logger'].critical("There is not enough space on the device. The current free disk space in gigabytes is: %s", str(stat.f_bfree * stat.f_frsize / 1024 / 1024))
sys.exit()
return 1
def check_ulimit(self):
"""Check that the command can be executed and returns the expected token"""
if sys.platform != "win32":
minimum = 1024
try:
stdout, stderr = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
except:
self.settings['logger'].debug("ulimit check did not work")
return 0
if int(stdout.strip()) < minimum:
self.settings['logger'].critical("ulimit is too low (%s), you must raise ulimit (`ulimit -n %s`)", str(stdout.strip()), str(minimum))
sys.exit()
return 1
================================================
FILE: classes/queue.py
================================================
#
# Copyright (C) 2018 Fernando Arnaboldi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
from __future__ import absolute_import
from .fuzzer import Fuzzer
from .webserver import WebServer
class Queue(Fuzzer, WebServer):
"""Used to share information between executions and the webserver"""
def __init__(self, settings):
self.ids = []
Fuzzer.__init__(self, settings, self.ids)
WebServer.__init__(self, settings)
================================================
FILE: classes/settings.py
================================================
#
# Copyright (C) 2018 Fernando Arnaboldi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
from __future__ import print_function
from __future__ import absolute_import
import getpass
import logging
import os
import random
import sys
from xdiff_dbaction import Dbaction
from .queue import Queue
from .dbsqlite import DbSqlite
from .monitor import Monitor
def define_software(settings):
"""The software gets loaded in a dictionary"""
software = []
if "software" in settings and settings['software'] and "fuzz_category" in settings and settings['fuzz_category']:
Category = None
if os.path.isfile(settings['software']):
software_file = open(settings['software'], "r")
for line in software_file:
line = line.strip()
if line[:1] != "#": # parse lines that are not comments
if line[:1] == "[" and line[len(line) - 1:len(line)] == "]": # is this a category?
Category = line[1:len(line) - 1]
Type = None
Suffix = None
Filename = None
OS = []
if Category == settings['fuzz_category']:
if line[:2] == "OS" or line[:4] == "Type" or line[:6] == "Suffix" or line[:8] == "Filename":
exec(line)
if OS is not None and sys.platform not in OS:
OS = None
else:
if line.find('=') != -1 and OS is not None:
if Type is None:
Type = ["CLI"]
if Suffix is None:
Suffix = [""]
if Filename is None:
Filename = [""]
item = {}
item['category'] = Category
item['type'] = Type
item['suffix'] = Suffix
item['filename'] = Filename
item['name'] = line[:line.find('=')].strip()
if 'valgrind' in settings and settings['valgrind']:
item['execute'] = eval('["valgrind", "-q", ' + line[line.find('=') + 1:].strip()[1:])
else:
item['execute'] = eval(line[line.find('=') + 1:].strip())
item['softwareid'] = settings['db'].get_software_id(item)
if item['softwareid']:
settings['logger'].debug("Software found: %s", str(item))
software.append(item)
software_file.close()
else:
settings['logger'].error("The settings file %s does not exist", os.path.abspath(settings['software']))
return software
def set_logger(settings):
"""Insantiate the logging functionality"""
logging.basicConfig(filename='fuzz.log', level=logging.INFO, format='%(asctime)s %(levelname)s %(module)s: %(message)s', datefmt='%Y-%m-%d %H.%M.%S')
console = logging.StreamHandler()
console.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(module)s: %(message)s'))
logger = logging.getLogger('fuzzer')
logger.addHandler(console)
if 'loglevel' in settings and settings['loglevel'] == 'debug':
logger.setLevel(logging.DEBUG)
elif 'loglevel' in settings and settings['loglevel'] == 'critical':
logger.setLevel(logging.CRITICAL)
return logger
def load_settings(settings):
"""Define global settings"""
settings['logger'] = set_logger(settings)
# Run
settings['version'] = "1.2.0 (HITB Edition)"
settings['soft_limit'] = 250 # maximum limit for the output of stdout & stderr
settings['soft_bypass'] = ["canarytoken", getpass.getuser(), "root", "/usr", "/bin", "PATH", "== "] # exceptions for the soft_limit setting
settings['hard_limit'] = 1024 # maximum hard limit, regardless of the soft_limit & soft_bypass
# settings['hard_limit_lines'] = 1 # maximum line limit in the output
settings['tmp_prefix'] = "chkF_" # prefix for temporary files created
if sys.platform in ["darwin"]:
settings['tmp_dir'] = "/Volumes/ramdisk/"
settings['tmp_dir_howto'] = "diskutil erasevolume HFS+ 'ramdisk' `hdiutil attach -nomount ram://838860`"
elif sys.platform == "win32":
settings['tmp_dir'] = "X:\\"
settings['tmp_dir_howto'] = "imdisk -a -s 512M -m X: -p \"/fs:ntfs /q/y\"; notepad \"C:\Windows\System32\canaryfile.bat\": @echo off; echo canarytokencommand"
elif sys.platform == "linux2" or sys.platform == "freebsd11":
settings['tmp_dir'] = "/mnt/ramdisk/"
settings['tmp_dir_howto'] = "mkdir /mnt/ramdisk; mount -t tmpfs -o size=512m tmpfs /mnt/ramdisk; echo \"tmpfs /mnt/ramdisk tmpfs nodev,nosuid,noexec,nodiratime,size=512M 0 0\" >> /etc/fstab"
settings['webserver_port'] = random.randrange(10000, 65535) # dynamic web server port: crashes in the same port may interfere
# settings['webserver_port'] = 8000 # sometimes you just need a fixed value
if "db_file" not in settings:
settings["db_file"] = None
settings['db'] = DbSqlite(settings, settings['db_file'])
if settings['db'].db_connection:
settings['kill_status'] = {"not_killed": settings['db'].get_constant_value("kill_status", "not killed"), "requested": settings['db'].get_constant_value("kill_status", "requested"), "killed": settings['db'].get_constant_value("kill_status", "killed"), "not_found": settings['db'].get_constant_value("kill_status", "not found")}
if "db_tests" not in settings:
settings['db_tests'] = 100 # save the results in the database every X tests
if "software" not in settings:
settings['software'] = os.path.abspath("software.ini") # software definitions
if "timeout" not in settings:
settings['timeout'] = 10 # default timeout for threads in seconds
settings['software'] = define_software(settings) # load the software and find potential inconsistencies
settings['queue'] = Queue(settings) # prepare the fuzzer and the webserver to interact
settings['monitor'] = Monitor(settings) # instantiate the monitor object
settings['dbaction'] = Dbaction(settings) # instantiate the dbaction object
# Fuzzer
if "generate_multiplier" not in settings:
settings['generate_multiplier'] = 100 # multiply the testcase limit by this number to generate new test cases
# Monitor
settings['lowerlimit'] = 200 # minimum free space in megabytes
settings['canaryfile'] = "canaryfile"
settings['canaryfiletoken'] = "canarytokenfilelocal" # contents of settings['canaryfile']
settings['canaryexec'] = "canaryfile"
settings['canaryexectoken'] = "canarytokencommand" # contents of settings['canaryexec']
settings['canaryhost'] = "127.0.0.1:" + str(settings['webserver_port'])
settings['canaryfileremote'] = "canarytokenfileremote"
# Analyze
settings['output_width'] = 130
settings['testcase_limit'] = 200 # a low number will help with RAM comsumption when performing queries against big databases
if "output_type" not in settings:
settings["output_type"] = "html" # default output type
settings["print_risk"] = False # print the risk?
if "minimum_risk" not in settings:
settings["minimum_risk"] = 0 # defaul minimum risk
settings["max_results"] = 999999999 # ridiculous high number to get all the occurrences of a function
if settings['db_file']:
settings['output_file'] = settings['db_file'] + "." + settings['output_type']
settings['error_disclosure'] = ["Exception", "stack trace", "core dump", "egmentation fault", "Traceback"]
settings['soft_bypass'].extend(settings['error_disclosure'])
return settings
================================================
FILE: classes/webserver.py
================================================
#
# Copyright (C) 2018 Fernando Arnaboldi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
import threading
import os.path
import compat
try: # Python 2
from SimpleHTTPServer import SimpleHTTPRequestHandler
import BaseHTTPServer
import urlparse
except ImportError: # Python 3
from http.server import SimpleHTTPRequestHandler
from http.server import BaseHTTPRequestHandler, HTTPServer
from urllib.parse import urlparse
class BaseHandler(SimpleHTTPRequestHandler):
"""Changes a few things from SimpleHTTPServer to handle requests"""
my_class = None # type:BaseHandler
def log_message(self, format, *args):
"""Avoid SimpleHTTPServer logs"""
pass
def do_GET(self):
"""Handle GET requests to parse parameters and save the responses to the corresponding ids"""
# self.my_class.settings['logger'].debug("URL: %s Query: %s", str(url), str(query))
data = compat.unicode("GET " + str(self.path) + "\n" + str(self.headers), errors='ignore')
self.do_REQUEST(data)
def do_POST(self):
"""Handle GET requests to parse parameters and save the responses to the corresponding ids"""
# self.my_class.settings['logger'].debug("URL: %s Query: %s", str(url), str(query))
data = compat.unicode("POST " + str(self.path) + "\n" + str(self.headers), errors='ignore')
self.do_REQUEST(data)
def do_REQUEST(self, data):
"""Handle GET and POST requests to parse parameters and save the responses to the corresponding ids"""
url = urlparse.urlparse(self.path)
query = url.query.split('&')
self.my_class.settings['logger'].debug("%s", data)
if len(query) > 1:
# with tag0 we can identify the testcaseid
tag0 = query[0].split("=")
# with tag1 we can identify the softwareid
tag1 = query[1].split("=")
if tag0[0] == "tag0" and tag1[0] == "tag1":
testcaseid = None
softwareid = None
try:
testcaseid = int(tag0[1])
except Exception as e:
self.my_class.settings['logger'].warning("Tag0 received, but is not a number: %s",e)
try:
softwareid = int(tag1[1])
except Exception as e:
self.my_class.settings['logger'].warning("Tag1 received, but is not a number: %s",e)
# if we found a testcaseid and a software id, we can correlate it to the results
if testcaseid and softwareid:
# we don't want dupes, check if the request hasn't been issued before
flag = False
for x in range(0, len(self.my_class.ids)):
if self.my_class.ids[x][0] == testcaseid and self.my_class.ids[x][1] == softwareid and self.my_class.ids[x][2] == data:
flag = True
break
if not flag:
# can we extract the stdout and elapsed data from the url?
stdout = None
elapsed = None
stderr = None
for parameter in query:
parameter = parameter.split('=')
if len(parameter) == 2:
if parameter[0] == 'stdout':
stdout = parameter[1]
elif parameter[0] == 'elapsed':
elapsed = parameter[1]
elif parameter[0] == 'stderr':
stderr = parameter[1]
self.my_class.ids.append([testcaseid, softwareid, data, stdout, elapsed, stderr])
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
getfile = url[2][1:].split('?')[0]
if url.path == "/canaryfile":
self.wfile.write(self.my_class.settings['canaryfileremote'])
elif os.path.isfile(getfile):
content = open(getfile, "r")
self.wfile.write(content.read())
class WebServer(object):
"""Used to parse HTTP connections"""
def __init__(self, settings):
self.settings = settings
self.server = None
def start_web_server(self):
"""Web server: load simplehttpserver as a thread and continue execution"""
BaseHandler.my_class = self
self.server = BaseHTTPServer.HTTPServer(("127.0.0.1", self.settings['webserver_port']), BaseHandler)
thread = threading.Thread(target=self.server.serve_forever)
thread.daemon = True
self.settings['logger'].debug("Loading web server using port %s" % str(self.settings['webserver_port']))
try:
thread.start()
except KeyboardInterrupt:
self.stop_web_server()
def stop_web_server(self):
"""Web server shutdown when closing the fuzzer"""
if self.server:
self.settings['logger'].debug("Shutting down Web Server...")
self.server.shutdown()
================================================
FILE: docs/1.-Install.md
================================================
Follwing are the instructions on how to execute XDiFF in:
* [Linux](#Linux)
* [OSX](#OSX)
* [Freebsd](#Freebsd)
* [Windows](#Windows)
---
## Linux (Ubuntu/Debian)
1. Install some utilities as root:
```
apt update; apt -y install python2.7 gcc make git sqlite3 wget
```
2. Download the latest copy of XDiFF:
```
git clone https://github.com/IOActive/XDiFF.git; cd XDiFF
```
3. Install some input fuzzers (minimum 1gb of RAM required) as root:
```
git clone https://github.com/aoh/radamsa.git; cd radamsa; make OFLAGS=-O1; make install; cd ..; rm -r radamsa/
wget https://github.com/samhocevar/zzuf/releases/download/v0.15/zzuf-0.15.tar.bz2; tar -xf zzuf-0.15.tar.bz2; cd zzuf-0.15/; ./configure; make; make install; cd ..; rm -r zzuf-0.15.tar.bz2 zzuf-0.15/
```
4. Create a ramdisk where files will be created as root:
```
mkdir /mnt/ramdisk; mount -t tmpfs -o size=512m tmpfs /mnt/ramdisk; echo "tmpfs /mnt/ramdisk tmpfs nodev,nosuid,noexec,nodiratime,size=512M 0 0" >> /etc/fstab
```
5. Point the host *canaryhost* to *localhost* as root:
```
echo "127.0.0.1 canaryhost"|tee -a /etc/hosts
```
6. Create the *canarycommand*:
```
echo '#!/bin/sh'>/usr/local/bin/canaryfile.bat; echo 'echo canarytokencommand'>>/usr/local/bin/canaryfile.bat; chmod +x /usr/local/bin/canaryfile.bat; cp /usr/local/bin/canaryfile.bat /usr/local/bin/canaryfile
```
---
## OSX
1. Install some utilities. The following utilies are installed using brew, if you don't have it you can install it by executing ```/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"```:
```
brew install git wget
```
2. Download the latest copy of XDiFF:
```
git clone https://github.com/IOActive/XDiFF.git; cd XDiFF
```
3. Install some input fuzzers (minimum 1gb of RAM required):
```
git clone https://github.com/aoh/radamsa.git; cd radamsa; make OFLAGS=-O1; sudo cp bin/radamsa /usr/local/bin/; cd ..
wget https://github.com/samhocevar/zzuf/releases/download/v0.15/zzuf-0.15.tar.bz2; tar -xf zzuf-0.15.tar.bz2; cd zzuf-0.15/; ./configure; make; make install; cd ..; rm -r zzuf-0.15.tar.bz2 zzuf-0.15/
```
4. Create a ramdisk where files will be created:
```
diskutil erasevolume HFS+ 'ramdisk' `hdiutil attach -nomount ram://838860`
```
5. Point the host *canaryhost* to *localhost*:
```
echo "127.0.0.1 canaryhost"|sudo tee -a /etc/hosts
```
6. Create the *canarycommand*:
```
echo '#!/bin/sh'>/usr/local/bin/canaryfile.bat; echo 'echo canarytokencommand'>>/usr/local/bin/canaryfile.bat; chmod +x /usr/local/bin/canaryfile.bat; cp /usr/local/bin/canaryfile.bat /usr/local/bin/canaryfile
```
7. Raise the ulimit
```
ulimit -n 1024
```
---
## Freebsd
1. Install some utilities:
```
pkg install git wget py27-sqlite3
```
2. Download the latest copy of XDiFF:
```
git clone https://github.com/IOActive/XDiFF.git; cd XDiFF
```
3. Install some input fuzzers (minimum 1gb of RAM required):
```
git clone https://github.com/aoh/radamsa.git; cd radamsa; make OFLAGS=-O1; sudo make install; cd ..; rm -r radamsa/
```
Pending: Zzuf compile options
4. Create a ramdisk where files will be created:
```
sudo mkdir /mnt/ramdisk; sudo mount -t tmpfs -o size=512m tmpfs /mnt/ramdisk; sudo echo "tmpfs /mnt/ramdisk tmpfs nodev,nosuid,noexec,nodiratime,size=512M 0 0" >> /etc/fstab
```
5. Point the host *canaryhost* to *localhost*:
```
echo "127.0.0.1 canaryhost" | sudo tee -a /etc/hosts
```
6. Create the *canarycommand*:
```
echo '#\!/bin/sh' > /usr/local/bin/canaryfile.bat ; echo 'echo canarytokencommand' >> /usr/local/bin/canaryfile.bat ; chmod +x /usr/local/bin/canaryfile.bat ; cp /usr/local/bin/canaryfile.bat /usr/local/bin/canaryfile
```
---
## Windows
1. Download and install some utilities:
```
Python 2.7: https://www.python.org/ftp/python/2.7.14/python-2.7.14.amd64.msi
IMDisk: https://sourceforge.net/projects/imdisk-toolkit/files/latest/download
```
2. Download the latest copy of XDiFF:
```
https://github.com/IOActive/XDiFF/archive/master.zip
```
3. Download some input fuzzers. For Radamsa, download and put within your PATH the .dll and the .exe:
```
https://github.com/vah13/radamsa/releases
```
4. Create a ramdisk where files will be created:
```
imdisk -a -s 512M -m X: -p \"/fs:ntfs /q/y\"
```
Then, format the ram disk once the Windows pop up appears
5. Point the host *canaryhost* to *localhost*. Right click on startup -> Command Prompt (Admin):
```
echo 127.0.0.1 canaryhost >> C:\Windows\System32\drivers\etc\hosts
```
6. Create the *canarycommand*. Right click on startup -> Command Prompt (Admin):
```
echo @echo off > C:\Windows\System32\canaryfile.bat & echo.echo canarytokencommand >> C:\Windows\System32\canaryfile.bat
```
---
# What's next?
You want to define [the input](https://github.com/IOActive/XDiFF/wiki/2.-The-input)
================================================
FILE: docs/2.-The-input.md
================================================
# Why do I want to use a database?
A database allows you to compare the results of how the software was executed when using different inputs, versions, implementations or operating systems. All the test cases to be evaluated are contained in one place and any issues found across multiple scenarios will be detected. Not only there is value on exploiting the vulnerabilities with the higher risk, but also the ones that affect multiple pieces of software at the same time. The performance and capabilities of SQLite for the fuzzer were proven to be better than MySQL and Redis.
## How's the database structure?
The initial analysis of the database was constructed around how to fuzz programming languages. They allow you to create any piece of software, so they will have access to all the functionalities. With this in mind, this is the basic look of a plain SQLite database used by XDiFF:
# sqlite3 dbs/plain.sqlite
SQLite version 3.11.0 2016-02-15 17:29:24
Enter ".help" for usage hints.
sqlite> .tables
function fuzz_software fuzz_testcase_result
fuzz_constants fuzz_testcase value
There are two tables where you may want to manually insert or edit some values:
* **value**: contains the items that will replace the ```[[test]]``` values in *function*. If you don't have a 'function', you can use the values in here with input fuzzers.
* **function**: contains what you want to fuzz. There is a special keyword ```[[test]]``` that gets replaced by the values contained in **value**. For example, if you would like to fuzz the print() function, you would normally want to have in here ```print([[test]])```.
The tables that start with 'fuzz_' are generated by XDiFF:
* **fuzz_testcase**: contains the combination of *function* and *value*
* **fuzz_software**: contains the software defined in *software.ini*
* **fuzz_testcase_result**: contains the result of executing the software defined in *fuzz_software* with the input defined in *fuzz_testcase*
* **fuzz_constants**: contains internal constant values used by the fuzzer
## Grab a sample database
Let's grab a copy of the plain.sqlite database:
```
cp dbs/plain.sqlite shells.sqlite
```
## Insert testcases
Data can be inserted in the database using a ***sqlite3*** parser or using the ***xdiff_dbaction.py*** script. In case your test case/s are in a file, you may want to insert it directly into the database like this for example:
echo "insert into value values (readfile('sample_file'))"|sqlite3 shells.sqlite
## Insert combinations of functions/values
If you have a certain function (or portion of code) that you want to fuzz with certain values, you can insert first the functions into the database:
./xdiff_dbaction.py -d shells.sqlite -t function -i "foo([[test]])"
Insert the values that you want to use to fuzz the piece of code within the function table:
./xdiff_dbaction.py -d shells.sqlite -t value -i "bar"
Then you can generate the permutations:
./xdiff_dbaction.py -d shells.sqlite -g 1
2017-11-20 22:06:24,901 INFO dbaction: Values: 1 - Functions: 1
2017-11-20 22:06:24,901 INFO dbaction: Testcases generated: 1
2017-11-20 22:06:24,902 INFO dbaction: Time required: 0.0 seconds
You can later confirm how the information everything looks like:
./xdiff_dbaction.py -d shells.sqlite -t fuzz_testcase -p
----------------------------------------------------------------------------------------------------------
| fuzz_testcase (1 rows) |
----------------------------------------------------------------------------------------------------------
| id | testcase |
----------------------------------------------------------------------------------------------------------
| 1 | foo(bar) |
----------------------------------------------------------------------------------------------------------
## Extending the detection
Part of the install process required to create a command named ```canaryfile``` (and ```canaryfile.bat```). When this file gets executed, it produces a specific output that can be later analyzed. Basically, you want the string ```canaryfile``` as part of your values.
Moreover, if the software may open network connections, you also want to define the ```canaryhost``` as part of the potential values to be used. The connections will be detected locally and be included as part of the output to be analyzed.
# What's next?
You want to define [the software](https://github.com/IOActive/XDiFF/wiki/3.-The-software)
================================================
FILE: docs/3.-The-software.md
================================================
In here you will find information about how to define pieces of software in the file *software.ini*.
This defines pieces of data in three columns:
1. The first column defines the software category between brackets. Lets suppose that you want to fuzz command shells, so we can name the software category ***shells***.
```javascript
[shells]
```
2. The second column has four predefined possibilities:
2.1. **Type**: how the information is going to be read by the programs. By default if you don't specify anything is going to be ```CLI```, which means that the input to be fuzzed is grabbed from the command line. Another possibility is ```File```, which means that the contents of what's going to be fuzzed will be written into a file first. Moreover, whenever you're fuzzing files, you may want to specify what is the suffix of that file (please see below in 2.3). Finally, one last possibility for the input is ```Stdin```, as you would use it when piping information to another program.
2.2. **OS**: it could either be ***darwin***, ***linux2***, ***freebsd11*** or ***win32***
2.3. **Suffix**: the suffix used for files when the input type is set to ```File```. We can easily fuzz command shells without files and suffixes, but to illustrate the point let's use them:
Type = ["File"]
OS = ["darwin", "linux2", "freebsd11"]
Suffix = [".sh"]
2.4. **Filename**: if the software to be fuzzed reads information from a certain static filename, you can define it in here. Don't forget to run the fuzzer with only 1 thread when using this.
3. The third column defines the pieces of software to be fuzzed. If you want to fuzz mp3 files using mpg321 and mpg123, you can do it like this:
Bash = ["bash", "-c", "-fuzzdata=echo $(([[test]]))"]
Ksh = ["ksh", "-c", "-fuzzdata=echo $(([[test]]))"]
First we set the name of the software to be fuzzed (***bash***, ***dash***, or ***ksh***). Then, we defined in an array the command and options to be executed. There is a special option named *-fuzzdata=* that indicates the fuzzer that the next piece of information is where we will be placed our fuzzed test case. The *[[test]]* will be replaced by a temporary file name containing a weird mp3 to fuzz the software on this example.
### Putting all the pieces together
This is how you could define the software category ***shells*** to be fuzzed using the ***CLI***:
```
# Sample fuzzing of shells
[shells]
OS = ["darwin", "linux2", "freebsd11"]
Bash = ["bash", "-c", "-fuzzdata=echo $(([[test]]))"]
Ksh = ["ksh", "-c", "-fuzzdata=echo $(([[test]]))"]
```
---
# What's next?
You want to [run the fuzzer](https://github.com/IOActive/XDiFF/wiki/4.-The-fuzzer)
================================================
FILE: docs/4.-The-fuzzer.md
================================================
## Fuzzing
The most basic execution requires defining which category and which database will be used:
```
./xdiff_run.py -c shells -d shells.sqlite
```
The output should look like this:

It includes a lot of debugging information, and the most important parts are marked. At the top is the execution, and at the bottom is the beginning of the execution along with the rate (you want this number to be as high as possible).
## Fuzzing using the input fuzzers
If you want to generate new test cases based on the currently defined test cases, you can use the input fuzzers that were installed as part of the install process.
```
./xdiff_run.py -c shells -d shells.sqlite -z 0
```
Now the output should indicate now and then when new inputs are being generated

## Additional fuzzing options:
There are three additional important optional settings to be mentioned:
- [*-D*]: Print debugging information
- [*-t 100*]: The amount of threads to be executed in parallel.
- [*-T 10*]: The timeout per thread
- [*-v*]: Use valgrind to execute the software to be fuzzed.
The combination of threads and the timeout is something to be defined per category. Fuzzing a shell requires no time, while compiling and fuzzing a java program takes much more time. Pay attention at the output produced to see if the software is being properly executed (or is getting mostly killed because the timeout is too low).
---
# What's next?
You want to analyze [the output](https://github.com/IOActive/XDiFF/wiki/5.-The-output)
================================================
FILE: docs/5.-The-output.md
================================================
## Analyzing the output
The most basic form of analyzing the output is running:
```
./xdiff_analyze.py -d shells.sqlite
```
A normal analysis output looks like this:

### HTML
The previous execution creates by default an HTML file named ```shells.sqlite.html``` that for this session looks like this on a web browser:

### Text
Another possibility is to output the analysis as text when using the ```-t txt``` option:

## The analytic functions
There are multiple analytic functions that can expose information from the database. The default function that gets executed is ```report```, which include 15 functions. Following is the whole list of function, and the ones in bold are already included as part of the ```report```:
- **```analyze_canary_file```**: Find canary filenames in the stdout or stderr, even though canary files were not part of the payload
- **```analyze_canary_token_code```**: Find canary tokens of code executed in the stdout or in the stderr
- **```analyze_canary_token_command```**: Find canary tokens of commands in the stdout or stderr
- **```analyze_canary_token_file```**: Find canary tokens of files in the stdout or in the stderr
- ```analyze_elapsed```: Analize which was the total time required for each piece of software
- ```analyze_error_disclosure```: Analyze errors disclosed in the output taken from settings['error_disclosure']
- ```analyze_file_disclosure_without_path```: Find the tmp_prefix in the stdout or stderr without the full path
- ```analyze_file_disclosure```: Find the tmp_prefix in the stdout or in the stderr
- ```analyze_killed_differences```: Find when one piece of software was killed AND another one was not killed for the same input
- **```analyze_output_messages```**: Analize which were the different output messages for each piece of software
- ```analyze_path_disclosure_without_file```: Find the tmp_dir in the stdout or stderr, even though the testcase did not have a temporary file
- ```analyze_path_disclosure_stdout```: Find the tmp_dir in the stdout
- ```analyze_path_disclosure_stderr```: Find the tmp_dir in the stderr
- **```analyze_remote_connection```**: Find remote connections made
- **```analyze_return_code_differences```**: Find when different return codes are received for the same input
- **```analyze_return_code_same_software_differences```**: Find when different return codes are received for the same software using different input forms
- **```analyze_return_code```**: Get the different return codes for each piece of software
- ```analyze_same_software```: Find when the same software produces different results when using different inputs
- ```analyze_same_stdout```: Finds different testcases that produce the same standard output
- ```analyze_specific_return_code```: Find specific return codes
- **```analyze_stdout```**: Find when different pieces of software produces different results
- ```analyze_top_elapsed_killed```: Find which killed tests cases required more time
- ```analyze_top_elapsed_not_killed```: Find which not killed tests cases required more time
- **```analyze_username_disclosure```**: Find when a specific username is disclosed in the stdout or in the stderr
- **```analyze_valgrind```**: Find Valgrind references in case it was used
- ```list_killed_results```: Print the killed fuzzing results
- **```list_results```**: Print the fuzzing results: valuable to see how the software worked with the testcases defined, without using any constrains
- **```list_software```**: Print the list of [active] software used with testcases from the database
- ```list_summary```: Print an quantitative information summary using all the analytic functions from this class
### Working with the analytic functions
Depending on what type of software you're fuzzing, it may be convenient to enable or disable certain functions. The best way is to modify the ```xdiff_analyze.py``` script to expose the information that we need.
For other scenarios, you may just want to expose the output of a single function. Let's suppose that you only care about the analytic function ```analyze_return_code``` to see how code behaves:
./xdiff_analyze.py -d shells.sqlite -m analyze_return_code -o txt
The previous command produces the following output:
```
----------------------------------------------------------------------------------------
| Analyze Different Return Codes per Software - analyze_return_code (5 rows) |
----------------------------------------------------------------------------------------
| Software | Type | OS | Return Code | Amount |
----------------------------------------------------------------------------------------
| Bash | CLI | darwin | 1 | 499 |
----------------------------------------------------------------------------------------
| Bash | CLI | darwin | 2 | 76 |
----------------------------------------------------------------------------------------
| Ksh | CLI | darwin | 0 | 73 |
----------------------------------------------------------------------------------------
| Ksh | CLI | darwin | 1 | 495 |
----------------------------------------------------------------------------------------
| Ksh | CLI | darwin | 3 | 7 |
----------------------------------------------------------------------------------------
```
================================================
FILE: docs/Changelog.md
================================================
# Changelog
Changes are listed in time order: newer changes are at the top, older changes are at the bottom.
## Version: [1.2.0](https://github.com/IOActive/XDiFF/releases/tag/1.2)
- Changed main function names in the root directory
- Improved code, documentation, and (most of) the code is now tested. Tons of bugfixes.
- Added new analysis for error disclosure (analyze_error_disclosure) and path disclosure (analyze_path_disclosure_stderr)
- Added new compatibility class (classes.compat) to support Python 3
- Added risk value to the different analytic functions. Print functions based on their rating: ./xdiff_analyze.py -d db.sqlite -r 0/1/2/3
- Improved analysis of network connections to test browsers connections
- software.ini: added support to test non random filenames. Set on the second column: Filename = /etc/myfixedfilename
- Added -d for debug output
- Added new parameters in the settings.py class
#### Contributors:
- farnaboldi
## Version: [1.1.1](https://github.com/IOActive/XDiFF/releases/tag/1.1.1) (beta)
- Added support for Python 3 [[2]](https://github.com/IOActive/XDiFF/pull/2)
#### Contributors:
- cclauss
## Version: [1.1.0](https://github.com/IOActive/XDiFF/releases/tag/1.1.0)
- First public release for Blackhat Europe 2017
#### Contributors:
- farnaboldi
================================================
FILE: xdiff_analyze.py
================================================
#!/usr/bin/env python
#
# Copyright (C) 2018 Fernando Arnaboldi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from __future__ import print_function
import datetime
import getopt
import getpass
import inspect
import os
# import profile # uncomment here for benchmarking and at the bottom
import re
import sys
import time
import classes.settings
from classes.dump import Dump
try:
reload # Python 2
except NameError: # Python 3
from importlib import reload
class Analyze(object):
"""Analyzes the fuzzing information for abnormal behaviors"""
def __init__(self, settings):
reload(sys)
try:
sys.setdefaultencoding('utf8')
except:
pass # Python3
self.settings = settings
self.settings['tmp_dir'] = "ramdisk" # by using this, it will work on multiple directories (ie, /Volumes/ramdisk, /mnt/ramdisk, etc)
self.dump = Dump(self.settings)
self.count_results = None
def check_minimum_risk(self, function_risk, title):
"""Check if the function has the minum risk required"""
check = False
if self.settings['print_risk']:
print("Function: %s, Risk: %s, Title: %s" % (inspect.stack()[1][3], function_risk, title[:title.find(" - ")]))
elif function_risk >= self.settings['minimum_risk']:
check = True
return check
def dump_results(self, method, toplimit, extra):
"""Prints the output of an internal method"""
success = False
method_to_call = None
if self.settings['output_type'] not in ["txt", "csv", "xml", "html"]:
self.settings['logger'].error("Incorrect output type selected. Valid outputs: txt, csv, xml, html.")
else:
if method not in ['dump_results']:
try:
method_to_call = getattr(self, method)
except Exception as e:
self.settings['logger'].error("Error when executing the method %s: %s", method, e)
if method_to_call:
if method != "report":
self.settings["minimum_risk"] = 0 # set the minimum risk to 0
self.dump.set_toggle_table(False)
start_time = time.time()
self.settings['logger'].info("Dumping: database %s - method %s - output %s" % (self.settings['db_file'], method, self.settings['output_type']))
self.dump.pre_general(self.settings['output_type'])
if extra:
try:
method_to_call(self.settings['output_type'], toplimit, extra)
success = True
except Exception as e:
self.settings['logger'].error("Error executing the method '%s' with parameter '%s': %s", method, extra, e)
else:
try:
method_to_call(self.settings['output_type'], toplimit)
success = True
except Exception as e:
self.settings['logger'].error("Error executing the method '%s': %s", method, e)
if success:
self.dump.post_general(self.settings['output_type'])
size = ""
if 'output_file' in self.settings and os.path.isfile(self.settings['output_file']):
size = ", output file: " + self.settings['output_file'] + " (" + str(int(os.stat(self.settings['output_file']).st_size / 1024)) + " kb)"
elif 'output_file' in self.settings:
size = ". No information to be written into the output file."
finish_time = time.time() - start_time
self.settings['logger'].info("Time elapsed %s seconds%s" % (str(int(finish_time)), size))
return success
def report(self, output, toplimit):
"""Print several functions in the form of a report (useful for HTML)"""
# self.settings['db'].set_software(["9", "10"])
# self.list_summary(output, toplimit) # informational
self.list_software(output, self.settings["max_results"])
self.analyze_elapsed(output, toplimit) # informational
self.list_results(output, toplimit)
self.analyze_top_elapsed_killed(output, toplimit) # informational
self.analyze_top_elapsed_not_killed(output, toplimit) # informational
self.analyze_valgrind(output, toplimit)
self.analyze_username_disclosure(output, toplimit, username="root")
if getpass.getuser() != "root": # do not repeat the information if the root user was the one already used for the execution
self.analyze_username_disclosure(output, toplimit, username=getpass.getuser())
self.analyze_canary_token_file(output, toplimit)
self.analyze_canary_token_code(output, toplimit)
self.analyze_remote_connection(output, toplimit)
self.analyze_canary_token_command(output, toplimit)
self.analyze_canary_file(output, toplimit)
self.analyze_killed_differences(output, toplimit) # informational
self.analyze_return_code(output, toplimit)
self.analyze_specific_return_code(output, toplimit)
self.analyze_return_code_differences(output, toplimit)
self.analyze_return_code_same_software_differences(output, toplimit)
self.analyze_output_messages(output, toplimit, 'stderr')
self.analyze_output_messages(output, toplimit, 'stdout')
self.analyze_error_disclosure(output, toplimit)
self.analyze_same_software(output, toplimit) # low_risk
self.analyze_stdout(output, toplimit)
self.analyze_same_stdout(output, toplimit) # low_risk
self.analyze_file_disclosure(output, toplimit) # low_risk
self.analyze_file_disclosure_without_path(output, toplimit) # low_risk
self.analyze_path_disclosure_stdout(output, toplimit) # low_risk
self.analyze_path_disclosure_stderr(output, toplimit) # low_risk
self.analyze_path_disclosure_without_file(output, toplimit) # low_risk
def list_summary(self, output, toplimit):
"""Print an quantitative information summary using all the analytic functions from this class"""
title = "Summary for " + self.settings['db_file']
columns = ["Information", "Amount"]
function_risk = 0
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = len(self.list_software(None, self.settings["max_results"]))
rows.append([["Pieces of Software", str(results)]])
if self.count_results is None:
self.count_results = self.settings['db'].count_results(0, None)
rows.append([["Amount of Testcases", str(self.count_results)]])
rows.append([["Output Top Limit", str(toplimit)]])
results = len(self.analyze_valgrind(None, self.settings["max_results"]))
rows.append([["Valgrind References Found", str(results)]])
results = len(self.analyze_username_disclosure(None, self.settings["max_results"], "root"))
rows.append([["Username 'root' Disclosure", str(results)]])
results = len(self.analyze_username_disclosure(None, self.settings["max_results"], getpass.getuser()))
rows.append([["Username '" + getpass.getuser() + "' Disclosure", str(results)]])
results = len(self.analyze_canary_token_file(None, self.settings["max_results"]))
rows.append([["Canary Token File Found", str(results)]])
results = len(self.analyze_canary_token_code(None, self.settings["max_results"]))
rows.append([["Canary Token Code Found", str(results)]])
results = len(self.analyze_canary_token_command(None, self.settings["max_results"]))
rows.append([["Canary Token Command Found", str(results)]])
results = len(self.analyze_canary_file(None, self.settings["max_results"]))
rows.append([["Canary File Found", str(results)]])
results = len(self.analyze_top_elapsed_killed(None, self.settings["max_results"]))
rows.append([["Testcases Killed", str(results)]])
results = len(self.analyze_top_elapsed_not_killed(None, self.settings["max_results"]))
rows.append([["Testcases not Killed", str(results)]])
results = len(self.analyze_killed_differences(None, self.settings["max_results"]))
rows.append([["Software Killed and Not Killed", str(results)]])
results = len(self.analyze_return_code(None, self.settings["max_results"]))
rows.append([["Return Code", str(results)]])
results = len(self.analyze_return_code_differences(None, self.settings["max_results"]))
rows.append([["Return Code Differences", str(results)]])
results = len(self.analyze_return_code_same_software_differences(None, self.settings["max_results"]))
rows.append([["Return Code Same Software Differences", str(results)]])
results = len(self.analyze_same_software(None, self.settings["max_results"]))
rows.append([["Same Software having a Different Output", str(results)]])
results = len(self.analyze_stdout(None, self.settings["max_results"]))
rows.append([["Stdout for Different Results", str(results)]])
results = len(self.analyze_output_messages(None, self.settings["max_results"], 'stderr'))
rows.append([["Different Stderr Messages", str(results)]])
results = len(self.analyze_output_messages(None, self.settings["max_results"], 'stdout'))
rows.append([["Different Stdout Messages", str(results)]])
results = len(self.analyze_error_disclosure(None, self.settings["max_results"]))
rows.append([["Analyze Error Messages for exceptions", str(results)]])
results = len(self.analyze_same_stdout(None, self.settings["max_results"]))
rows.append([["Testcases that Produce the Same Stdout", str(results)]])
results = len(self.analyze_file_disclosure(None, self.settings["max_results"]))
rows.append([["Temp File Disclosure", str(results)]])
results = len(self.analyze_file_disclosure_without_path(None, self.settings["max_results"]))
rows.append([["Temp File Disclosure (without path)", str(results)]])
results = len(self.analyze_path_disclosure_stdout(None, self.settings["max_results"]))
rows.append([["Path Disclosure Stdout", str(results)]])
results = len(self.analyze_path_disclosure_stderr(None, self.settings["max_results"]))
rows.append([["Path Disclosure Stderr", str(results)]])
results = len(self.analyze_path_disclosure_without_file(None, self.settings["max_results"]))
rows.append([["Path Disclosure (without temp file)", str(results)]])
results = len(self.analyze_remote_connection(None, self.settings["max_results"]))
rows.append([["Remote Connections", str(results)]])
results = self.analyze_elapsed(None, self.settings["max_results"])
results = datetime.timedelta(seconds=round(results, 0))
rows.append([["Total Time Elapsed", str(results)]])
self.dump.general(output, title, columns, rows)
def list_software(self, output, toplimit):
"""Print the list of [active] software used with testcases from the database"""
title = "List Software Tested - list_software "
columns = ["ID", "Software", "Type", "OS"]
function_risk = 0
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].list_software()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([result])
self.dump.general(output, title, columns, rows)
return rows
def list_results(self, output, toplimit):
"""Print the fuzzing results: valuable to see how the software worked with the testcases defined, without using any constrains"""
lowerlimit = 0
title = "Analyze the Testcase Results from " + str(int(lowerlimit)) + " to " + str(lowerlimit + toplimit) + " - list_results"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr", "Kill"]
function_risk = 0
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
testcase = None
tmpoutput = []
results = self.settings['db'].list_results(lowerlimit, toplimit * len(self.list_software(None, self.settings["max_results"])))
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if testcase is None:
testcase = result[0]
if testcase != result[0]:
testcase = result[0]
rows.append(tmpoutput)
tmpoutput = []
tmpoutput.append((result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5], result[6]))
if len(rows) < toplimit and tmpoutput:
rows.append(tmpoutput)
self.dump.general(output, title, columns, rows)
return rows
def analyze_valgrind(self, output, toplimit):
"""Find Valgrind references in case it was used"""
title = "Analyze Valgrind Output - analyze_valgrind"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr", "Return Code"]
function_risk = 2
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_string_disclosure("== ",)
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if result[5][:10].count('=') == 4: # Valgrind outputs can be detected because they have 4 equal signs in the first 10 characters
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5], result[6])])
self.dump.general(output, title, columns, rows)
return rows
def list_killed_results(self, output, toplimit):
"""Print the killed fuzzing results"""
title = "Analyze the Killed Testcase Results - list_killed_results"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr", "Kill"]
function_risk = 2
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
testcase = None
tmpoutput = []
results = self.settings['db'].list_killed_results()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if testcase is None:
testcase = result[0]
if testcase != result[0]:
testcase = result[0]
rows.append(tmpoutput)
tmpoutput = []
tmpoutput.append((result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4][:500], result[5][:500], result[6]))
if len(rows) < toplimit and tmpoutput:
rows.append(tmpoutput)
self.dump.general(output, title, columns, rows)
return rows
def analyze_return_code(self, output, toplimit):
"""Get the different return codes for each piece of software"""
title = "Analyze Different Return Codes per Software - analyze_return_code"
columns = ["Software", "Type", "OS", "Return Code", "Amount"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].list_return_code_per_software()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0], result[1], result[2], result[3], result[4])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_specific_return_code(self, output, toplimit):
"""Find specific return codes"""
returncodes = ["-6", "-9", "-11", "-15"]
title = "Analyze Specific Return Codes: " + ",".join(returncodes) + " - analyze_specific_return_code"
columns = ["Testcase", "Software", "Type", "OS", "Returncode", "Stdout", "Stderr"]
function_risk = 2
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_specific_return_code(returncodes)
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5], result[6])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_return_code_same_software_differences(self, output, toplimit):
"""Find when different return codes are received for the same software using different input forms"""
title = "Analyze Return Code Same Software Differences - analyze_return_code_same_software_differences"
columns = ["Testcase", "Software", "Type", "Return Code", "Stdout", "Stderr"]
function_risk = 2
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
# First check if there is more than one type of input per software, and save the IDs
software_ids = []
software_name = ""
results = self.settings['db'].list_software()
for result in results:
if software_name == result[1]:
software_ids.append(str(result[0]))
else:
software_name = result[1]
rows = []
if software_ids:
original_ids = self.settings['db'].get_software()
self.settings['db'].set_software(software_ids) # restrict the ids
software = ""
software_returncode = ""
testcase = ""
outputtmp = []
results = self.settings['db'].analyze_return_code_differences()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if testcase == result[0] and software == result[1] and software_returncode != result[3]:
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5]])
else:
if len(outputtmp) > 1:
rows.append(outputtmp)
outputtmp = []
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5]])
testcase = result[0]
software = result[1]
software_returncode = result[3]
self.settings['db'].set_software(original_ids)
self.dump.general(output, title, columns, rows)
return rows
def analyze_return_code_differences(self, output, toplimit):
"""Find when different return codes are received for the same input"""
title = "Analyze Return Code Differences - analyze_return_code_differences"
columns = ["Testcase", "Software", "Type", "Return Code", "Stdout", "Stderr"]
function_risk = 2
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
software_returncode = ""
testcase = ""
outputtmp = []
results = self.settings['db'].analyze_return_code_differences()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if testcase == result[0] and software_returncode != result[3]:
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5]])
else:
if len(outputtmp) > 1:
rows.append(outputtmp)
outputtmp = []
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5]])
testcase = result[0]
software_returncode = result[3]
self.dump.general(output, title, columns, rows)
return rows
def analyze_username_disclosure(self, output, toplimit, username=None):
"""Find when a specific username is disclosed in the stdout or in the stderr"""
title = "Analyze Username Disclosure: " + username + " - analyze_username_disclosure"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if username is None:
print("Error: extra parameter username has not been defined")
help()
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_string_disclosure(username, excludeme=self.settings['tmp_prefix'])
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_error_disclosure(self, output, toplimit):
"""Find canary filenames in the stdout or stderr, even though canary files were not part of the payload"""
title = "Analyze Presence of Exceptions - analyze_error_disclosure"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
for error in self.settings['error_disclosure']:
results = self.settings['db'].analyze_string_disclosure(error)
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if result[0].find('canaryfile') == -1:
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_canary_file(self, output, toplimit):
"""Find canary filenames in the stdout or stderr, even though canary files were not part of the payload"""
title = "Analyze Presence of Canary Files - analyze_canary_file"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 3
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_canary_file()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if result[0].find('canaryfile') == -1:
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_canary_token_file(self, output, toplimit):
"""Find canary tokens of files in the stdout or in the stderr"""
title = "Analyze Presence of Canary Tokens File Local - analyze_canary_token_file"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 3
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_string_disclosure("canarytokenfile")
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_canary_token_code(self, output, toplimit):
"""Find canary tokens of code executed in the stdout or in the stderr"""
title = "Analyze Presence of Canary Tokens Code - analyze_canary_token_code"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 3
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_string_disclosure("canarytokencode")
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_canary_token_command(self, output, toplimit):
"""Find canary tokens of commands in the stdout or stderr"""
title = "Analyze Presence of Canary Tokens Command - analyze_canary_token_command"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 3
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_string_disclosure("canarytokencommand")
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_remote_connection(self, output, toplimit):
"""Find remote connections made"""
title = "Analyze Remote Connections - analyze_remote_connection"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr", "Network"]
function_risk = 3
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
testcase = ""
outputtmp = []
rows = []
results = self.settings['db'].analyze_remote_connection()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if testcase != result[0] and outputtmp:
testcase = result[0]
rows.append(outputtmp)
outputtmp = []
outputtmp.append((result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5], result[6]))
if outputtmp:
rows.append(outputtmp)
self.dump.general(output, title, columns, rows)
return rows
def analyze_top_elapsed_killed(self, output, toplimit):
"""Find which killed tests cases required more time"""
title = "Analyze Top Time Elapsed (and eventually killed) - analyze_top_elapsed_killed"
columns = ["Testcase", "Software", "Type", "OS", "Elapsed"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_top_elapsed(True)
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_top_elapsed_not_killed(self, output, toplimit):
"""Find which not killed tests cases required more time"""
title = "Analyze Top Time Elapsed (but not killed) - analyze_top_elapsed_not_killed"
columns = ["Testcase", "Software", "Type", "OS", "Elapsed"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_top_elapsed(False)
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_killed_differences(self, output, toplimit):
"""Find when one piece of software was killed AND another one was not killed for the same input"""
title = "Analyze Killed Software vs Not Killed Software - analyze_killed_differences"
columns = ["Testcase", "Software", "Type", "OS", "Kill", "Stdout", "Stderr"]
function_risk = 2
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
testcase = kill_status = None
outputtmp = []
try:
results = self.settings['db'].analyze_killed_differences()
except:
print("Error when requesting the killed differences")
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if testcase is None or testcase != result[0]:
testcase = result[0]
kill_status = result[4]
if testcase == result[0] and kill_status != result[4]:
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5], result[6]])
else:
if len(outputtmp) > 1:
rows.append(outputtmp)
outputtmp = []
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5], result[6]])
testcase = result[0]
kill_status = result[4]
self.dump.general(output, title, columns, rows)
return rows
def analyze_same_software(self, output, toplimit):
"""Find when the same software produces different results when using different inputs (ie, Node CLI vs Node File Input)"""
title = "Analyze Same Software having a Different Output - analyze_same_software"
columns = ["Testcase", "Software", "Type", "Stdout"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
# First check if there is more than one type of input per software, and save the IDs
software_ids = []
software_name = ""
results = self.settings['db'].list_software()
for result in results:
if software_name == result[1]:
software_ids.append(str(result[0]))
else:
software_name = result[1]
rows = []
if software_ids:
original_ids = self.settings['db'].get_software()
self.settings['db'].set_software(software_ids) # restrict the ids
software = ""
software_stdout = ""
testcase = ""
outputtmp = []
results = self.settings['db'].analyze_same_software()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if testcase == result[0] and software == result[1] and software_stdout != result[3]:
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[3]])
else:
if len(outputtmp) > 1:
rows.append(outputtmp)
outputtmp = []
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[3]])
testcase = result[0]
software = result[1]
software_stdout = result[3]
if len(outputtmp) > 1:
rows.append(outputtmp)
self.dump.general(output, title, columns, rows)
self.settings['db'].set_software(original_ids)
return rows
def analyze_stdout(self, output, toplimit):
"""Find when different pieces of software produces different results (basic differential testing)"""
title = "Analyze Stdout for Different Results (Basic Differential Testing) - analyze_stdout"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "ID"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
testcase = ""
stdout = ""
tobeprinted = False
outputtmp = []
rows = []
lowerlimit = 0
upperlimit = 100000
while True:
results = self.settings['db'].analyze_stdout(lowerlimit, upperlimit)
if not results:
break
lowerlimit += 100000
upperlimit += 100000
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if testcase != result[0]:
testcase = result[0]
stdout = result[3]
if outputtmp and tobeprinted:
rows.append(outputtmp)
tobeprinted = False
outputtmp = []
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[5], result[3], result[6]])
if stdout != result[3]:
tobeprinted = True
if outputtmp and tobeprinted and len(rows) < toplimit:
rows.append(outputtmp)
self.dump.general(output, title, columns, rows)
return rows
def analyze_same_stdout(self, output, toplimit):
"""Finds different testcases that produce the same standard output, but ignore the testcases where ALL the pieces of software match"""
title = "Analyze Testcases that Produce the Same Stdout - analyze_same_stdout"
columns = ["Testcase", "Software", "Type", "OS", "Stdout"]
function_risk = 0
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
testcase = ""
outputtmp = []
rows = []
countsoftware = self.settings['db'].count_software()
results = self.settings['db'].analyze_same_stdout()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if testcase != result[4]:
if outputtmp and len(outputtmp) != countsoftware:
rows.append(outputtmp)
outputtmp = []
testcase = result[4]
if not results or results[len(results) - 1][0] != result[0] or results[len(outputtmp) - 1][1] != result[1]:
outputtmp.append([result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4]])
#if outputtmp and len(outputtmp) != countsoftware and len(rows) < toplimit:
# rows.append(outputtmp)
self.dump.general(output, title, columns, rows)
return rows
def analyze_file_disclosure(self, output, toplimit):
"""Find the tmp_prefix in the stdout or in the stderr"""
title = "Analyze Temp File Disclosure (" + self.settings['tmp_prefix'] + ") - analyze_file_disclosure"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_string_disclosure(self.settings['tmp_prefix'])
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_file_disclosure_without_path(self, output, toplimit):
"""Find the tmp_prefix in the stdout or stderr without the full path"""
title = "Analyze Temp File Disclosure (" + self.settings['tmp_prefix'] + ") Without Path (" + self.settings['tmp_dir'] + ") - analyze_file_disclosure_without_path"
columns = ["Test", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_string_disclosure(self.settings['tmp_prefix'])
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if result[3].find(self.settings['tmp_dir']) == -1 and result[4].find(self.settings['tmp_dir']) == -1:
rows.append([(result[0], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_path_disclosure_stdout(self, output, toplimit):
"""Find the tmp_dir in the stdout or stderr"""
title = "Analyze Path Disclosure Stdout (" + self.settings['tmp_dir'] + ") - analyze_path_disclosure_stdout"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_string_disclosure(self.settings['tmp_dir'], where='stdout')
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_path_disclosure_stderr(self, output, toplimit):
"""Find the tmp_dir in the stdout or stderr"""
title = "Analyze Path Disclosure Stderr (" + self.settings['tmp_dir'] + ") - analyze_path_disclosure_stderr"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_string_disclosure(self.settings['tmp_dir'], where='stderr')
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
return rows
def analyze_path_disclosure_without_file(self, output, toplimit):
"""Find the tmp_dir in the stdout or stderr, even though the testcase did not have a temporary file"""
title = "Analyze Path Disclosure (" + self.settings['tmp_dir'] + ") Without Temp File (" + self.settings['tmp_prefix'] + ") - analyze_path_disclosure_without_file"
columns = ["Testcase", "Software", "Type", "OS", "Stdout", "Stderr"]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
software_ids = []
results = self.settings['db'].get_software_type('CLI')
for result in results:
software_ids.append(str(result[0]))
rows = []
if software_ids:
original_ids = self.settings['db'].get_software()
self.settings['db'].set_software(software_ids) # restrict the ids
results = self.settings['db'].analyze_string_disclosure(self.settings['tmp_dir'])
self.settings['db'].set_software(original_ids) # set the ids to the original value
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
if result[3].find(self.settings['tmp_prefix']) == -1 and result[4].find(self.settings['tmp_prefix']) == -1:
rows.append([(result[0][:self.settings['testcase_limit']], result[1], result[2], result[3], result[4], result[5])])
self.dump.general(output, title, columns, rows)
self.settings['db'].set_software(original_ids)
return rows
def analyze_output_messages(self, output, toplimit, messages='stderr'):
"""Analize which were the different output messages for each piece of software"""
title = "Analyze Different " + messages[0].upper() + messages[1:] + " Output Messages - analyze_output_messages"
columns = ["Software", "Type", "OS", "Return Code", messages[0].upper() + messages[1:]]
function_risk = 1
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
rows = []
results = self.settings['db'].analyze_output_messages(messages)
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
output_parsed = result[5]
if len(result[0]) > 5:
output_parsed = output_parsed.replace(result[0], "TESTCASE") # if possible, remove the testcase from output
output_parsed = output_parsed.replace(str(result[0].encode("utf-8")), "TESTCASE") # if possible, remove the testcase from output
if output_parsed.find(self.settings['tmp_prefix']) != -1:
regex = re.compile('[\S]*' + self.settings['tmp_prefix'] + '[\S]*')
regex_iter = re.finditer(regex, output_parsed)
for match in regex_iter:
output_parsed = output_parsed.replace(match.group(0), "TMPFILE")
test = [result[1], result[2], result[3], result[4], output_parsed]
flag = False
for row in rows:
if [test] == row:
flag = True
break
if not flag:
rows.append([test])
rows = sorted(rows)
self.dump.general(output, title, columns, rows)
return rows
def analyze_elapsed(self, output, toplimit):
"""Analize which was the total time required for each piece of software"""
title = "Analyze Elapsed Time - analyze_elapsed"
columns = ["Software", "Type", "OS", "Elapsed", "Average per Testcase"]
function_risk = 0
if not self.check_minimum_risk(function_risk, title):
return False
if output:
self.settings['logger'].info(title)
total = 0
rows = []
if self.count_results is None:
self.count_results = self.settings['db'].count_results(0, None)
results = self.settings['db'].analyze_elapsed()
for result in results:
if toplimit is not None and len(rows) >= toplimit:
break
rows.append([[result[0], result[1], result[2], str(datetime.timedelta(seconds=int(result[3]))), str(round(result[3] / self.count_results, 5))]])
total += result[3]
self.dump.general(output, title, columns, rows)
return total
def help(err=""):
"""Print a help screen and exit"""
if err:
print("Error: %s\n" % err)
print("Syntax: ")
print(os.path.basename(__file__) + " -d db.sqlite Choose the database")
print("\t\t [-D] Debug information")
print("\t\t [-m methodName] Method: report (default), analyze_stdout, analyze_specific_return_code, etc")
print("\t\t [-e extra_parameter] Extra parameter used when specifying a for certain methodName (ie, analyze_username_disclosure)")
print("\t\t [-o html] Output: html (default), txt or csv.")
print("\t\t [-l 20] Top limit results (default: 20)")
print("\t\t [-r 3] Minimum risk (0:informational, 1:low, 2:medium, 3:high (default)")
sys.exit()
def main():
"""Analyze potential vulnerabilities on a database fuzzing session"""
try:
opts, args = getopt.getopt(sys.argv[1:], "hd:De:m:o:pl:r:", ["help", "database=", "extra=", "method=", "output=", "limit=", "risk="])
except getopt.GetoptError as err:
help(err)
settings = {}
method = "report" # default method name
toplimit = 20 # default top limit
extra = None
for o, a in opts:
if o in ("-d", "--database"):
if os.path.isfile(a):
settings['db_file'] = a
else:
help("Database should be a valid file.")
elif o in ("-D"):
settings['loglevel'] = 'debug'
elif o in ("-e", "--extra"):
extra = a
elif o in ("-h", "--help"):
help()
elif o in ("-l", "--limit"):
try:
toplimit = int(a)
except ValueError:
help("Top limit should be an integer.")
elif o in ("-m", "--method"):
method = a
elif o in ("-o", "--output"):
settings["output_type"] = a
elif o in ("-p"):
settings["print_risk"] = True
elif o in ("-r", "--risk"):
try:
settings["minimum_risk"] = int(a)
except ValueError:
help("Risk should be an integer.")
if 'db_file' not in settings:
help("The database was not specified.")
elif 'db_file' not in settings and 'print_risk' not in settings:
help("The database was not specified and the only functionality without a database -p was not selected. ")
settings = classes.settings.load_settings(settings)
if settings['db'].db_connection:
analyze = Analyze(settings)
analyze.dump_results(method, toplimit, extra)
if __name__ == "__main__":
main()
# profile.run('analyze.dump_results(method, toplimit)')
================================================
FILE: xdiff_dbaction.py
================================================
#!/usr/bin/env python
#
# Copyright (C) 2018 Fernando Arnaboldi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
from __future__ import print_function
import getopt
import itertools
import os.path
import sys
import time
import classes.settings
import classes.compat
from classes.dump import Dump
from classes.dbsqlite import DbSqlite
class Dbaction(object):
"""Do stuff with the fuzzer's databases: copy databases, print tables, insert stuff and generate testcases"""
def __init__(self, settings):
self.settings = settings
if 'max_permutation' not in self.settings:
self.settings['max_permutation'] = 5
if 'generate_type' not in self.settings:
self.settings['generate_type'] = 2
def print_table(self, fromdb, table, output_type):
"""Print all the conents of a table"""
if table is None:
self.settings['logger'].error("You must select a table.")
else:
self.settings['output_file'] = None
self.settings['db'] = DbSqlite(self.settings, fromdb)
columns = self.settings['db'].get_columns(table)
rows = self.settings['db'].get_rows(table)
if columns:
dump = Dump(self.settings)
dump.general(output_type, table, columns, [rows])
else:
self.print_valid_tables(table)
def insert_table(self, fromdb, table, separator, insert):
"""Insert a row into a table"""
if table is None:
self.settings['logger'].error("You must select a table.")
else:
if not insert:
self.settings['logger'].error("There are no values to be inserted")
else:
self.settings['db'] = DbSqlite(self.settings, fromdb)
columns = self.settings['db'].get_columns(table)
if columns:
# If the user supplied one value less than the one required and the first column is called id, just ignore that column..
if len(columns) == (len(insert.split(separator)) + 1) and columns[0] == 'id':
del columns[0]
if len(columns) != len(insert.split(separator)):
print("The table '" + table + "' has " + str(len(columns)) + " columns: " + str(columns) + ". However, you want to insert " + str(len(insert.split(separator))) + " value/s: " + str(insert.split(separator)) + ". It doesn't work like that.")
else:
self.settings['db'].insert_row(table, columns, insert.split(separator))
else:
self.print_valid_tables(table)
def print_valid_tables(self, table=None):
"""Provide information on what are the valid tables"""
if table:
self.settings['logger'].error("Error: table '%s' not found" % table)
else:
if self.output_type:
print("Valid table names:")
print("- fuzz_testcase: contains the inputs to be sent to the software. You can define an input in 'function' and potential values in 'value' and generate the combinations on this table.")
print("- function: contains what you want to fuzz. The special keyword [[test]] gets replaced by the values contained in the table 'value'. Ie, if you want to fuzz the 'print()'' function, you want to write in here 'print([[test]])'.")
print("- value: contains the items that will replace the [[test]] values in the 'function' table")
print("")
print("Valid tables generated by XDiFF:")
print("- fuzz_software: contains the software defined in software.ini")
print("- fuzz_testcase_result: contains the result of executing the software defined in 'fuzz_software' with the input defined in 'fuzz_testcase'")
print("- fuzz_constants: contains internal constant values used by the fuzzer")
def permute(self, functions, values):
"""Perform a permutation between the two lists received (functions & values)"""
total = 0
if not functions:
self.settings['logger'].error("There are no functions to permute")
elif not values:
self.settings['logger'].error("There are no values to permute")
else:
# Prioritize the lower count injections
for count in range(0, self.settings['max_permutation'] + 1):
# Give a heads up of how many testcases will be generated
subtotal = 0
countfunctions = functions
for function in countfunctions:
if isinstance(function, tuple):
if len(function) == 1:
function = function[0] # when it is generated by random testcases (classes/fuzzer.py)
elif len(function) == 2:
function = function[1] # when it is read from the database
if function is not None and count == function.count("[[test]]"):
subtotal += 1
self.settings['logger'].debug("Testcases generation: %s entry points, %s testcases to be generated." % (str(count), str(subtotal)))
# Generate the testcases
for function in functions:
if len(function) == 1:
function = function[0] # when it is generated by random testcases (classes/fuzzer.py)
elif len(function) == 2:
function = function[1] # when it is read from the database
if function is not None and count == function.count("[[test]]"):
testcases, total = self.permute_values(values, function, total)
self.settings['db'].set_testcase(testcases)
return total
def permute_values(self, values, function, total):
"""Perform a permutation between the values and the functions received based on the generate_type received"""
testcases = []
function_tuple = function
# There are no values, only functions:
if not values:
testcases.append((classes.compat.unicode(function_tuple),))
else:
if self.settings['generate_type'] == 1:
# Permute
for valuetuple in itertools.product(values, repeat=function_tuple.count("[[test]]")):
total += 1
for value in valuetuple:
# unicode values are tuples
if isinstance(valuetuple, tuple):
value = value[0]
value = value.replace('[[id]]', str(total))
function_tuple = function_tuple.replace("[[test]]", value, 1)
testcases.append((classes.compat.unicode(function_tuple),))
function_tuple = function # reset to the original value
elif self.settings['generate_type'] == 2:
# Do not permute, just replace
for value in values:
if isinstance(value, tuple):
value = value[0]
total += 1
value = value.replace('[[id]]', str(total))
function_tuple = function_tuple.replace('[[test]]', value)
testcases.append((classes.compat.unicode(function_tuple),))
function_tuple = function # reset to the original value
elif self.settings['generate_type'] == 3:
# Do not permute, replace but also include testcases with less parameters
if (function.count("[[test]]")) > 1:
for tests in range(1, function.count("[[test]]") + 1):
for value in values:
if isinstance(value, tuple):
value = value[0]
total += 1
value = value.replace('[[id]]', str(total))
function_tuple = function_tuple.replace('[[test]]', value)
testcases.append((classes.compat.unicode(function_tuple),))
function_tuple = function # reset to the original value
function_tuple = function = function.replace(',[[test]]', '', 1)
else:
print("Error: the permutation type does not exist")
sys.exit()
return testcases, total
def generate(self, fromdb):
"""Generate the testcases with a permutation of values and functions"""
start_time = time.time()
self.settings['db'] = DbSqlite(self.settings, fromdb)
if self.settings['db'].db_connection:
self.settings['db'].create_table()
values = self.settings['db'].get_values()
functions = self.settings['db'].get_functions()
self.settings['logger'].info("Values: %s - Functions: %s" % (str(len(values)), str(len(functions))))
total = self.permute(functions, values)
self.settings['db'].commit()
finish_time = time.time() - start_time
self.settings['logger'].info("Testcases generated: %s" % str(total))
self.settings['logger'].info("Time required: %s seconds" % str(round(finish_time, 2)))
def migrate(self, fromdb, todb):
"""Migrates tables from one database ('dbfrom') to another database ('dbto')"""
start_time = time.time()
self.settings['dbfrom'] = DbSqlite(self.settings, fromdb)
self.settings['dbto'] = DbSqlite(self.settings, todb)
if self.settings['dbfrom'].db_connection and self.settings['dbto'].db_connection:
self.settings['dbto'].create_table()
values = self.settings['dbfrom'].get_values()
self.settings['dbto'].set_values(values)
functions = self.settings['dbfrom'].get_functions()
self.settings['dbto'].set_functions(functions)
self.settings['dbto'].commit()
finish_time = time.time() - start_time
self.settings['logger'].info("Finished, time elapsed %s seconds" % str(finish_time)[:5])
def help(err=None):
"""Print a help screen and exit"""
if err:
print("Error: %s\n" % str(err))
print("Syntax: ")
print(os.path.basename(__file__) + " -d db.sqlite -D fuzz.db Migrate values and functions to another database")
print("\t\t -d fuzz.db -g 1 [-m 5] Generate testcases permuting values and functions (set to maximum 5 input test cases)")
print("\t\t -d fuzz.db -g 2 [-m 5] Generate testcases replacing values in functions (set to max..)")
print("\t\t -d fuzz.db -g 3 [-m 5] Generate testcases replacing values in functions including testcases with less parameters (set to max..)")
print("\t\t -d fuzz.db -t table -p Print a database table: fuzz_software, fuzz_testcase, value, function)")
print("\t\t -d fuzz.db -t table [-s,] -i \"foo\" Insert foo into table (optional field separator -s uses a comma)")
sys.exit()
def main():
"""Perform multiple database actions"""
try:
opts, args = getopt.getopt(sys.argv[1:], "hd:D:g:i:m:ps:t:", ["help", "database=", "Database=", "generate=", "insert=", "maximum=", "print", "separator=", "table="])
except getopt.GetoptError as err:
help(err)
settings = {}
settings['output_type'] = 'txt'
fromdb = None
todb = None
table = None
action = None
separator = ","
for o, a in opts:
if o in ("-h", "--help"):
help()
elif o in ("-d", "--database"):
fromdb = a
if os.path.isfile(fromdb):
settings['db_file'] = fromdb
else:
help("The database selected '%s' is not a valid file." % a)
elif o in ("-D", "--Database"):
todb = a
action = "migrate"
break
elif o in ("-g", "--generate"):
action = "generate"
try:
settings['generate_type'] = int(a)
except:
help("The generate parameter should be a number")
elif o in ("-i", "--insert"):
action = "insert"
insert = classes.compat.unicode(str(a), errors='ignore')
elif o in ("-m", "--maximum"):
try:
settings['max_permutation'] = int(a)
except ValueError:
help("The max permutation parameter should be a number")
elif o in ("-p", "--print"):
action = "print"
elif o in ("-s", "--separator"):
separator = a
elif o in ("-t", "--table"):
table = a
if not fromdb:
help("The database was not specified.")
settings = classes.settings.load_settings(settings)
dbaction = Dbaction(settings)
if action == "migrate":
dbaction.migrate(fromdb, todb)
elif action == "generate":
if todb is not None:
fromdb = todb
dbaction.generate(fromdb)
elif action == "print":
dbaction.print_table(fromdb, table, settings['output_type'])
elif action == "insert":
dbaction.insert_table(fromdb, table, separator, insert)
else:
help("You must select an action: migrate, generate, print or insert.")
if __name__ == "__main__":
main()
================================================
FILE: xdiff_run.py
================================================
#!/usr/bin/env python
#
# Copyright (C) 2018 Fernando Arnaboldi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
from __future__ import print_function
import getopt
import os
import signal
import sys
import time
import classes.settings
def dfuzz(settings):
"""Fuzz something based on he settings received"""
if 'fuzz_category' not in settings:
help("The category was not specified.")
settings = classes.settings.load_settings(settings) # load the fuzzer settings
if not settings:
return False
if not settings['software']:
help("There is no software associated to the category selected")
if not settings['queue'].chdir_tmp():
return False
banner = "Starting Fuzzer v%s" % str(settings['version'])
settings['logger'].info(len(banner) * "-")
settings['logger'].info(banner)
settings['logger'].info(len(banner) * "-")
for key in sorted(settings.iterkeys()):
settings['logger'].debug("Setting %s: %s" % (key, str(settings[key])))
settings['queue'].start_web_server() # load the webserver
settings['monitor'].check_once() # check before start if the canaries are in place
settings['db'].optimize()
total_testcases = settings['db'].count_testcases()
current_test = settings['db'].get_latest_id(settings['software'])
settings['logger'].info("Setting testcases: %s/%s" % (str(current_test), str(total_testcases)))
elapsed_time = 0
test_count = 0
while True:
start_time = time.time()
tests = settings['db'].get_test(current_test, settings['db_tests'])
if not tests:
settings['logger'].info("Terminated: no more testcases")
break
dbinput = settings['queue'].fuzz(tests)
settings['monitor'].check() # check free space before saving results
saved, size = settings['db'].set_results(dbinput)
finish_time = (time.time() - start_time)
elapsed_time += finish_time # Total time elapsed testing
remaining_tests = total_testcases - (current_test + settings['db_tests']) # Tests left
test_count += settings['db_tests']
rate = test_count / elapsed_time # Rate per second
time_left = remaining_tests / rate / 60 # How many hours are left ?
settings['logger'].info("Tests " + str(current_test) + "-" + str(current_test + settings['db_tests']) + " - Set " + str(saved) + " (" + str(int(size / 1024)) + " kb) - Took " + str(int(finish_time)) + "s - Avg Rate " + str(int(rate) * len(settings['software'])) + " - ETC " + str(int(time_left)) + "'")
current_test += settings['db_tests']
# break # uncomment if you want to run just one cycle of the fuzzer for debugging purposes
settings['queue'].stop_web_server()
def help(err=""):
"""Print a help screen and exit"""
if err:
print("Error: %s\n" % err)
print("XDiFF Syntax: ")
print(os.path.basename(__file__) + " -d db.sqlite Choose the database")
print("\t -c Python Software category to be fuzzed")
print("\t [-D] Print debugging information")
print("\t [-r 0] Random inputs: radamsa & zzuf without newlines (faster)")
print("\t [-r 1] Random inputs: radamsa & zzuf with newlines (slower)")
print("\t [-r 2] Random inputs: radamsa without newlines (faster)")
print("\t [-r 3] Random inputs: radamsa with newlines (slower)")
print("\t [-r 4] Random inputs: zzuf without newlines (faster)")
print("\t [-r 5] Random inputs: zzuf with newlines (slower)")
print("\t [-s software.ini] Configuration file for software to be fuzzed")
print("\t [-t 100] Threads executed in parallel")
print("\t [-T 10] Timeout per thread")
print("\t [-v] Use valgrind")
sys.exit()
def main():
"""Fuzz something FFS!"""
def signal_handler(signal, frame):
"""Catch SIGINT and do some cleaning before termination"""
settings['monitor'].remove_stuff()
settings['queue'].stop_web_server()
settings['logger'].info("Program terminated")
sys.exit(1)
signal.signal(signal.SIGINT, signal_handler)
try:
opts, args = getopt.getopt(sys.argv[1:], "hc:d:Dr:s:t:T:v", ["help", "category=", "database=", "random=", "software=", "threads=", "timeout=", "valgrind"])
except getopt.GetoptError as err:
help(err)
settings = {}
for o, a in opts:
if o in ("-h", "--help"):
help()
elif o in ("-c", "--category"):
settings['fuzz_category'] = a
elif o in ("-d", "--database"):
settings['db_file'] = os.path.abspath(a)
elif o in ("-D"):
settings['loglevel'] = 'debug'
elif o in ("-r", "--random"):
settings['generate_tests'] = int(a)
elif o in ("-s", "--software"):
settings['software'] = os.path.abspath(a)
elif o in ("-t", "--threads"):
settings['db_tests'] = int(a)
elif o in ("-T", "--timeout"):
settings['timeout'] = int(a)
elif o in ("-v", "--valgrind"):
settings['valgrind'] = True
if "db_file" not in settings or "fuzz_category" not in settings:
help("The -d and -c parameters are mandatory")
else:
dfuzz(settings)
if __name__ == "__main__":
main()