Repository: malus-security/sandblaster
Branch: master
Commit: d417bf90c9b2
Files: 31
Total size: 242.8 KB

Directory structure:
gitextract_1fgfkynp/

├── .github/
│   └── workflows/
│       ├── config/
│       │   └── config.json
│       ├── linter.yml
│       └── rules/
│           ├── common/
│           │   ├── inlineTokenChildren.js
│           │   └── wordPattern.js
│           ├── md101.js
│           ├── md102.js
│           ├── md103.js
│           ├── md104.js
│           └── rules.js
├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── helpers/
│   └── extract_sandbox_data.py
└── reverse-sandbox/
    ├── filters/
    │   ├── filters_ios11.json
    │   ├── filters_ios12.json
    │   ├── filters_ios13.json
    │   ├── filters_ios14.json
    │   ├── filters_ios4.json
    │   ├── filters_ios5.json
    │   └── filters_ios6.json
    ├── filters.py
    ├── logger.config
    ├── operation_node.py
    ├── regex_parser_v1.py
    ├── regex_parser_v2.py
    ├── regex_parser_v3.py
    ├── reverse_sandbox.py
    ├── reverse_string.py
    ├── sandbox_filter.py
    └── sandbox_regex.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/config/config.json
================================================
{
	"default": true,
	"MD048": { "style": "backtick" },
	"MD046": { "style": "fenced" },
	"MD029": { "style": "one" },
	"line-length": false,
	"no-hard-tabs": false
}


================================================
FILE: .github/workflows/linter.yml
================================================
name: Linter

on: [push, pull_request]

jobs:
  superlinter:
    name: Super Linter
    runs-on: ubuntu-latest

    steps:
      - name: Checkout Code
        uses: actions/checkout@v3
        with:
          # Full git history is needed to get a proper list of changed files within `super-linter`
          fetch-depth: 0

      - name: Lint Code Base
        uses: github/super-linter@v4
        env:
          # Don't check already existent files
          VALIDATE_ALL_CODEBASE: false
          VALIDATE_GITHUB_ACTIONS: false
          LINTER_RULES_PATH: /.github/workflows/
          MARKDOWN_CONFIG_FILE: config/config.json
          MARKDOWN_CUSTOM_RULE_GLOBS: rules/rules.js
          DEFAULT_BRANCH: main
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/rules/common/inlineTokenChildren.js
================================================
 class InlineTokenChildren {
    constructor(token) {
        if (token.type === "inline") {
            this.root = token;
            this.column = -1;
            this.lineNumber = token.map[0];
        } else {
            throw new TypeError("wrong argument token type");
        }
    }

    *[Symbol.iterator]() {
        for (let token of this.root.children) {
            let { line, lineNumber } = token;
            if (this.lineNumber !== lineNumber) {
                this.column = -1;
                this.lineNumber = lineNumber;
            }
            this.column = line.indexOf(token.content, this.column + 1);
            yield { token, column: this.column + 1, lineNumber };
        }
    }
}

module.exports = { InlineTokenChildren };


================================================
FILE: .github/workflows/rules/common/wordPattern.js
================================================
class WordPattern {
    constructor(pattern, parameters) {
        const escapedDots = pattern.replace(/\\?\./g, "\\.");
        this.pattern = parameters && parameters.hasOwnProperty('noWordBoundary') ? escapedDots : "\\b" + escapedDots + "\\b";
        const modifiers = parameters && parameters.hasOwnProperty('caseSensitive') && parameters.caseSensitive ? "" : "i";
        this.regex = new RegExp(this.pattern, modifiers);
        this.suggestion = parameters && parameters.hasOwnProperty('suggestion') ? parameters.suggestion : pattern;
        this.stringRegex = new RegExp("^" + escapedDots + "$", modifiers); // To match "Category" column words in changelogs, see case-sensitive.js
        this.skipForUseCases = !!(parameters && parameters.hasOwnProperty('skipForUseCases'));
    }

    test(line) {
        return new Match(line.match(this.regex));
    }
}

class Match {
    constructor(match) {
        this.match = match;
    }

    range() {
        if (this.match) {
            let column = this.match.index + 1;
            let length = this.match[0].length;
            if (this.match[2]) {
                column += this.match[1].length;
                length -= this.match[1].length;
            }
            return [column, length];
        }
        return null;
    }

    toString() {
        return this.match ? this.match.toString() : "null";
    }
}

module.exports = { WordPattern };


================================================
FILE: .github/workflows/rules/md101.js
================================================
const { InlineTokenChildren } = require("./common/inlineTokenChildren");
const { WordPattern } = require("./common/wordPattern");

const keywords = [
    new WordPattern("iExtractor-manager"),
    new WordPattern("device-info"),
    new WordPattern("device-name"),
    new WordPattern("list_apps"),
    new WordPattern("decrypt_kcache"),
    new WordPattern("decrypt_fs"),
    new WordPattern("curl"),
    new WordPattern("wget"),
    new WordPattern("crontab"),
    new WordPattern("cron"),
    new WordPattern("netcat"),
    new WordPattern("ping"),
    new WordPattern("traceroute"),
    new WordPattern("sudo"),
    new WordPattern("(?<!(system |ISRG ))root(?! ca)", { suggestion: "root" }),// match "root", but not "root CA", "MacOS System Root" and "ISRG Root X1"
    new WordPattern("true"),
    new WordPattern("false"),
    new WordPattern("jps"),
    new WordPattern("name=value"),
    new WordPattern("key=value"),
    new WordPattern("time:value"),
    new WordPattern("atsd.log"),
    new WordPattern("start.log"),
    new WordPattern("logback.xml"),
    new WordPattern("graphite.conf"),
    new WordPattern("command_malformed.log"),
    new WordPattern("stdout"),
    new WordPattern("stderr"),
    new WordPattern("SIGTERM"),
    new WordPattern("NaN"),
    new WordPattern(".png", { noWordBoundary: true }),
    new WordPattern(".xml", { noWordBoundary: true }),
    new WordPattern(".jar", { noWordBoundary: true }),
    new WordPattern(".gz", { noWordBoundary: true }),
    new WordPattern(".tar.gz", { noWordBoundary: true }),
    new WordPattern(".zip", { noWordBoundary: true }),
    new WordPattern(".txt", { noWordBoundary: true }),
    new WordPattern(".csv", { noWordBoundary: true }),
    new WordPattern(".json", { noWordBoundary: true }),
    new WordPattern(".pdf", { noWordBoundary: true }),
    new WordPattern(".html", { noWordBoundary: true })

];

module.exports = {
    names: ["MD101", "backtick-keywords"],
    description: "Keywords must be fenced and must be in appropriate case.",
    tags: ["backtick", "code", "bash"],
    "function": (params, onError) => {
        var inHeading = false;
        var inLink = false;
        for (let token of params.tokens) {
            switch (token.type) {
                case "heading_open":
                    inHeading = true; break;
                case "heading_close":
                    inHeading = false; break;
                case "inline":
                    let children = new InlineTokenChildren(token);
                    for (let { token: child, column, lineNumber } of children) {
                        let isText = child.type === "text";
                        switch (child.type) {
                            case "link_open":
                                inLink = true; break;
                            case "link_close":
                                inLink = false; break;
                        }
                        for (let k of keywords) {
                            let anyCaseMatch = child.content.match(k.regex);
                            if (anyCaseMatch != null) {
                                let match = anyCaseMatch[0];
                                let correct = k.suggestion;
                                if ((!inHeading && !inLink && isText) || // Bad not fenced
                                    (match !== correct)) { // Right fencing, wrong case
                                    onError({
                                        lineNumber,
                                        detail: `Expected \`${correct}\`. Actual ${match}.`,
                                        range: [column + anyCaseMatch.index, match.length]
                                    })
                                }
                            }
                        }
                    }
            }
        }
    }
};


================================================
FILE: .github/workflows/rules/md102.js
================================================
const http_keywords = [
    "GET",
    "POST",
    "PUT",
    "PATCH",
    "DELETE",
    "Content-Type",
    "Content-Encoding",
    "User-Agent",
    "200 OK",
    "401 Unauthorized",
    "403 Forbidden",
    "API_DATA_READ",
    "API_DATA_WRITE",
    "API_META_READ",
    "API_META_WRITE",
    "USER",
    "EDITOR",
    "ENTITY_GROUP_ADMIN",
    "ADMIN"
];
const keywordsRegex = new RegExp(http_keywords.map(word => "\\b" + word + "\\b").join("|"));

const { InlineTokenChildren } = require("./common/inlineTokenChildren");

module.exports = {
    names: ["MD102", "backtick-http"],
    description: "HTTP keywords must be fenced.",
    tags: ["backtick", "HTTP", "HTTPS"],
    "function": (params, onError) => {
        var inHeading = false;
        for (let token of params.tokens) {
            switch (token.type) {
                case "heading_open":
                    inHeading = true; break;
                case "heading_close":
                    inHeading = false; break;
                case "inline":
                    if (!inHeading) {
                        let children = new InlineTokenChildren(token);
                        for (let { token: child, column, lineNumber } of children) {
                            if (child.type === "text") {
                                let exactCaseMatch = child.content.match(keywordsRegex);
                                if (exactCaseMatch != null) {
                                    let match = exactCaseMatch[0];
                                    onError({
                                        lineNumber,
                                        detail: `Expected \`${match}\`. Actual ${match}.`,
                                        range: [column + exactCaseMatch.index, match.length]
                                    })
                                }
                            }
                        }
                    }
            }
        }
    }
};


================================================
FILE: .github/workflows/rules/md103.js
================================================
"use strict";

module.exports = {
  "names": [ "MD103", "inline triple backticks" ],
  "description": "inline triple backticks",
  "tags": [ "backticks" ],
  "function": function rule(params, onError) {
    for (const inline of params.tokens.filter(function filterToken(token) {
      return token.type === "inline";
    })) {
        const index = inline.content.toLowerCase().indexOf("```");
        if (index !== -1) {
          onError({
            "lineNumber": inline.lineNumber,
            "context": inline.content.substr(index - 1, 4),
            "detail": "Expected `. Actual ```"
          });
        }
      }
  }
};


================================================
FILE: .github/workflows/rules/md104.js
================================================
"use strict";

module.exports = {
  names: ["MD104", "one line per sentence"],
  description: "one line (and only one line) per sentence",
  tags: ["sentences"],
  function: function rule(params, onError) {
    for (const inline of params.tokens.filter(function filterToken(token) {
      return token.type === "inline";
    })) {
      var actual_lines = inline.content.split("\n");
      actual_lines.forEach((line, index, arr) => {
		let outside = true;
		let count = 0;
		Array.from(line).forEach((char) => {
			if ((char == "." || char == "?" || char == "!" || char == ";" || char == ":") && outside) {
				count++;
			}
			if (char == "`") outside = !outside;
			if (char == "[") outside = false;
			if (char == "(") outside = false;
			if (char == "]") outside = true;
			if (char == ")") outside = true;
		});
        if (count > 1) {
          onError({
            lineNumber: inline.lineNumber + index,
            detail:
              "Expected one sentence per line. Multiple end of sentence punctuation signs found on one line!",
          });
        }
      });
    }
  },
};


================================================
FILE: .github/workflows/rules/rules.js
================================================
"use strict";

const rules = [
	require("./md101.js"),
	require("./md102.js"),
	require("./md103.js"),
	require("./md104.js"),
];
module.exports = rules;


================================================
FILE: .gitignore
================================================
*~
*.o
*.zip
*.rar
*.tar
*gz
*bz2
*.obj
*.a
*.so
*.lib
*.dll
*.swp
*.swo
tags
TAGS
*.exe
*.class
*.jar
*.pyc
*.log
*.bin
core
.DS_STORE


================================================
FILE: .gitmodules
================================================


================================================
FILE: LICENSE
================================================
BSD 3-Clause License

Copyright (c) 2016, North Carolina State University and University POLITEHNICA
of Bucharest.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: README.md
================================================
# SandBlaster: Reversing the Apple Sandbox

SandBlaster is a tool for reversing (decompiling) binary Apple sandbox profiles. Apple sandbox profiles are written in SBPL (*Sandbox Profile Language*), a Scheme-like language, and are then compiled into an undocumented binary format and shipped. Primarily used on iOS, sandbox profiles are present on macOS as well. SandBlaster is, to our knowledge, the first tool that reverses binary sandbox profiles to their original SBPL format. SandBlaster works on iOS from version 7 onwards including iOS 11.

The technical report [SandBlaster: Reversing the Apple Sandbox](https://arxiv.org/abs/1608.04303) presents extensive (though a bit outdated) information on SandBlaster internals.

SandBlaster relied on previous work by [Dionysus Blazakis](https://github.com/dionthegod/XNUSandbox) and Stefan Esser's [code](https://github.com/sektioneins/sandbox_toolkit) and [slides](https://www.slideshare.net/i0n1c/ruxcon-2014-stefan-esser-ios8-containers-sandboxes-and-entitlements).

The reverser (in the `reverse-sandbox/` folder) and the helper tool (in the `helpers/` folder) run on any Python running platform.

SandBlaster may be installed and run standalone, though we recommend installing and running it from within [iExtractor](https://github.com/malus-security/iExtractor). Check the [iExtractor documentation](https://github.com/malus-security/iExtractor/blob/master/README.md) for information.

iExtractor is open source software released under the 3-clause BSD license.

## Installation

SandBlaster requires Python2 for the reverser (in `reverse-sandbox/`), Python3 with `lief` library for helper script (in `helpers/`).

After cloning the SandBlaster repository, you have to install `lief` for Python3:
```
pip3 install lief
```

If the installation of `lief` fails you need compile to it. More information about how to compile it can be found on the [wiki page](https://lief.quarkslab.com/doc/stable/compilation.html).

## Usage

In order to use SandBlaster you need access to the binary sandbox profiles and the sandbox operations, a set of strings that define sandbox-specific actions. Sandbox operations and sandbox profiles are extracted using the `helpers/extract_sandbox_data.py` script. Sandbox profiles are extracted from the kernel sandbox extension (as a bundle for iOS 4 and 9-11) or from kernel cache (as a bundle for iOS 12) or from the `sandboxd` file in the iOS filesystem (for iOS 5-8). Sandbox operations are extracted either from kernel extension (for iOS 4-11) or from kernel cache (for iOS 12).

So, as input data, SandBlaster requires the kernelcache, the kernel sandbox extension and the `sandboxd` file. Information and scripts on extracting them from a publicly available IPSW (*iPhone Software*) file is presented by [iExtractor](https://github.com/malus-security/iExtractor).

Below are the steps and commands to reverse the sandbox profiles for iOS 8.4.1, assuming the sandbox kernel extension (`com.apple.security.sandbox.kext`) and the `sandboxd` file are available:

```
# Extract sandbox operations from kernelcache.
cd helpers/
./extract_sandbox_data.py -o iPad2,1_8.4.1_12H321.sb_ops iPad2,1_8.4.1_12H321.com.apple.security.sandox.kext 8.4.1
# Extract binary sandbox profile files from sandboxd.
mkdir iPad2,1_8.4.1_12H321.sandbox_profiles
./extract_sandbox_data.py -O iPad2,1_8.4.1_12H321.sandbox_profiles/ iPad2,1_8.4.1_12H321.sandboxd 8.4.1
# Reverse all binary sandbox profiles.
cd ../reverse-sandbox/
mkdir iPad2,1_8.4.1_12H321.reversed_profiles
for i in ../helpers/iPad2,1_8.4.1_12H321.sandbox_profiles/*; do python reverse_sandbox.py -r 8.4.1 -o ../helpers/iPad2,1_8.4.1_12H321.sb_ops -d iPad2,1_8.4.1_12H321.reversed_profiles/ "$i"; done
```

Below are the steps and commands to reverse the sandbox profiles for iOS 9.3, assuming the sandbox kernel extension (`com.apple.security.sandbox.kext`) is available:

```
# Extract sandbox operations from kernelcache.
cd helpers/
./extract_sandbox_data.py -o iPhone5,1_9.3_13E237.sb_ops iPhone5,1_9.3_13E237.com.apple.security.sandox.kext 9.3
# Extract sandbox profile bundle from kernel sandbox extension.
./extract_sandbox_data.py -O . iPhone5,1_9.3_13E237.com.apple.security.sandox.kext 9.3
cd ../reverse-sandbox/
# Reverse all binary sandbox profiles in sandbox bundle.
mkdir iPhone5,1_9.3_13E237.reversed_profiles
# Print all sandbox profiles in bundle.
python reverse_sandbox.py -r 9.3 -o ../helpers/iPhone5,1_9.3_13E237.sb_ops -d iPhone5,1_9.3_13E237.reversed_profiles/ ../helpers/sandbox_bundle -psb
# Do actual reversing.
python reverse_sandbox.py -r 9.3 -o ../helpers/iPhone5,1_9.3_13E237.sb_ops -d iPhone5,1_9.3_13E237.reversed_profiles/ ../helpers/sandbox_bundle
```

The extraction of the binary sandbox profiles differs between iOS <= 8 and iOS >= 9. Since iOS >= 9 the binary sandbox profiles are stored in a sandbox bundle in the kernel sandbox extension. The `helpers/extract_sandbox_data.py` script extracts them appropriately depending on the iOS version.

The `-psb` option for `reverse_sandbox.py` prints out the sandbox profiles part of a sandbox bundle without doing the actual reversing.

The `reverse_sandbox.py` script needs to be run in its directory (`reverse-sandbox/`) since it needs the other Python modules and the `logger.config` file.

## Internals

The `helpers/` subfolder contains helper scripts that provide a nicer interface for the external tools.

The actual reverser is part of the `reverse-sandbox/` folder. Files here can be categorized as follows:

  * The main script is `reverse_sandbox.py`. It parses the command line arguments, does basic parsing of the input binary file (extracts sections) and calls the appropriate functions from the other modules.
  * The core of the implementation is `operation_node.py`. It provides functions to build the rules graph corresponding to the sandbox profile and to convert the graph to SBPL. It is called by `reverse_sandbox.py`.
  * Sandbox filters (i.e. match rules inside sandbox profiles) are handled by the implementation in `sandbox_filter.py` and the configuration in `filters.json`, `filter_list.py` and `filters.py`. Filter specific functions are called by `operation_node.py`.
  * Regular expression reversing is handled by `sandbox_regex.py` and `regex_parse.py`. `regex_parse.py` is the back end parser that converts the binary representation to a basic graph. `sandbox_regex.py` converts the graph representation (an automaton) to an actual regular expression (i.e. a string of characters and metacharacters). It is called by `reverse_sandbox.py` for parsing regular expressions, with the resulting regular expression list being passed to the functions exposed by `operation_node.py`; `operation_node.py` passes them on to sandbox filter handling files.
  * The new format for storing strings since iOS 10 is handled by `reverse_string.py`. The primary `SandboxString` class in `reverse_string.py` is used in `sandbox_filter.py`.
  * Logging is configured in the `logger.config` file. By default, `INFO` and higher level messages are printed to the console, while `DEBUG` and higher level messages are printed to the `reverse.log` file.

## Supported iOS Versions

SandBlaster works for iOS version 4 onwards including iOS 12. Apple has been making updates to the binary format of the sandbox profiles: since iOS 9 sandbox profiles are stored in a bundle, since iOS 10 strings are aggregated together in a specialied binary format. iOS 11 didn't bring any change to the format.

## Community

Join us on [Discord](https://discord.gg/m3gjuyHYw9) for live discussions.


================================================
FILE: helpers/extract_sandbox_data.py
================================================
#!/usr/bin/env python3

import sys
import argparse
import struct
import lief

CSTRING_SECTION = '__cstring'
CONST_SECTION = '__const'
DATA_SECTION = '__data'


def binary_get_word_size(binary: lief.MachO.Binary):
    """Gets the word size of the given binary

    The Mach-O binary has 'magic' bytes. These bytes can be used for checking
    whether the binary is 32bit or 64bit.
    Note: iOS 4 and 5 are different to the other sandbox profiles as they have
    no magic values.

    Args:
        binary: A sandbox profile in its binary form.

    Returns:
        4: for 32bit MachO binaries
        8: for 64bit MachO binaries
    """

    assert (binary.header.magic in
            [lief.MachO.MACHO_TYPES.MAGIC, lief.MachO.MACHO_TYPES.MAGIC_64])

    return 4 if binary.header.magic == lief.MachO.MACHO_TYPES.MAGIC else 8


def unpack(bytes_list):
    """Unpacks bytes

    The information is stored as little endian so '<' is needed.
    For 32bit 'I' is needed and for 64bit 'Q'.

    Args:
        bytes_list: A packed list of bytes.

    Returns:
        The unpacked 'higher-order' equivalent.
    """

    if len(bytes_list) == 4:
        return struct.unpack('<I', bytes(bytes_list))[0]

    return struct.unpack('<Q', bytes(bytes_list))[0]


def binary_get_string_from_address(binary: lief.MachO.Binary, vaddr: int):
    """Returns the string from a given MachO binary at a given virtual address.

        Note: The virtual address must be in the CSTRING section.

        Args:
            binary: A sandbox profile in its binary form.
            vaddr: An address.

        Returns:
            A string with the content stored at the given virtual address.

        Raises:
            LIEF_ERR("Can't find a segment associated with the virtual address
             0x{:x}", address);
    """

    section = get_section_from_segment(binary, "__TEXT", CSTRING_SECTION)
    if not is_vaddr_in_section(vaddr, section):
        return None

    str = ''
    while True:
        try:
            byte = binary.get_content_from_virtual_address(vaddr, 1)
        except(Exception,):
            return None

        if byte is None or len(byte) == 0:
            return None

        byte = byte[0]
        if byte == 0:
            break

        vaddr += 1
        str += chr(byte)

    return str


def untag_pointer(tagged_pointer):
    """Returns the untagged pointer.

    On iOS 12 the first 16 bits(MSB) of a pointer are used to store extra
    information. We say that the pointers from iOS 12 are tagged.
    The pointers should have the 2 first bytes 0xffff, the next digits should
    be fff0 and the pointed-to values should be multiple of 4.
    More information can be found here:
    https://bazad.github.io/2018/06/ios-12-kernelcache-tagged-pointers/

    Args:
        tagged_pointer: a pointer with the first 16 bits used to store extra
                        information.

    Returns:
        A pointer with the 'tag' removed and starting with 0xffff
        (the traditional way).
    """

    return (tagged_pointer & ((1 << 48) -1)) | (0xffff << 48)


def get_section_from_segment(binary: lief.MachO.FatBinary,
                             segment_name: str, section_name: str):
    """This can be used for retrieving const, cstring and data sections.
    Const section contains two tables: one with the names of the sandbox
    profile and one with the content of the sandbox profile.
    This section is in the __DATA segment.

    Constant string section (cstring) contains the names of the profiles.
    This section is in the __TEXT segment.

    Data section contains the structures describing the content of the
    profiles and the content itself.
    This section is in the __DATA segment.

    Args:
        binary: A sandbox profile in its binary form.
        segment_name: The segment name (can be __DATA or __TEXT).
        section_name: The section name (can be CSTRING_SECTION, CONST_SECTION,
                      DATA_SECTION, all of them are macros)

    Returns:
        A binary section with the name given.
    """

    seg = binary.get_segment(segment_name)

    if seg:
        sects = [s for s in seg.sections if s.name == section_name]
        assert len(sects) == 1
        return sects[0]

    return None


def get_xref(binary: lief.MachO.Binary, vaddr: int):
    """Custom cross reference implementation which supports tagged pointers
    from iOS 12. Searches for pointers in the given MachO binary to the given
    virtual address.

    Args:
        binary: A sandbox profile in its binary form.
        vaddr: An address.

    Returns:
        A list with all the pointers to the given virtual address.
    """

    ans = []
    word_size = binary_get_word_size(binary)
    i = 0

    for sect in binary.sections:
        content = sect.content[:len(sect.content) - len(sect.content) % word_size]
        content = [unpack(content[i:i + word_size])
                   for i in range(0, len(content), word_size)]

        if word_size == 8:
            content = [untag_pointer(p) for p in content]

        ans.extend((sect.virtual_address + i * word_size
                    for i, p in enumerate(content) if p == vaddr))

    return ans


def get_tables_section(binary: lief.MachO.Binary):
    """Searches for the section containing the sandbox operations table and
    the sandbox binary profiles for older versions of iOS.

    Args:
        binary: A sandbox profile in its binary form.

    Returns:
        A binary section.
    """

    str_sect = get_section_from_segment(binary, "__TEXT", CSTRING_SECTION)
    strs = str_sect.search_all('default\x00')

    if len(strs) > 0:
        vaddr_str = str_sect.virtual_address + strs[0]
        xref_vaddrs = get_xref(binary, vaddr_str)

        if len(xref_vaddrs) > 0:
            sects = [binary.section_from_virtual_address(x) for x in xref_vaddrs]
            sects = [s for s in sects if 'const' in s.name.lower()]
            assert len(sects) >= 1 and all([sects[0] == s for s in sects])
            return sects[0]

    seg = binary.get_segment('__DATA')
    if seg:
        sects = [s for s in seg.sections if s.name == CONST_SECTION]
        assert len(sects) <= 1

        if len(sects) == 1:
            return sects[0]

    return binary.get_section(CONST_SECTION)


def is_vaddr_in_section(vaddr, section):
    """Checks if given virtual address is inside given section.

    Args:
        vaddr: A virtual address.
        section: A section of the binary.

    Returns:
        True: if the address is inside the section
        False: Otherwise
    """

    return vaddr >= section.virtual_address \
        and vaddr < section.virtual_address + section.size


def unpack_pointer(addr_size, binary, vaddr):
    """Unpacks a pointer and untags it if it is necessary.

    Args:
        binary: A sandbox profile in its binary form.
        vaddr: A virtual address.
        addr_size: The size of an address (4 or 8).

    Returns:
        A pointer.
    """

    ptr = unpack(
        binary.get_content_from_virtual_address(vaddr, addr_size))
    if addr_size == 8:
        ptr = untag_pointer(ptr)
    return ptr


def extract_data_tables_from_section(binary: lief.MachO.Binary, to_data, section):
    """ Generic implementation of table search. A table is formed of adjacent
    pointers to data.

    Args:
        binary: A sandbox profile in its binary form.
        to_data: Function that checks if the data is valid. This function
                 returns None for invalid data and anything else otherwise.
        section: A section of the binary.

    Returns:
            An array of tables (arrays of data).
    """

    addr_size = binary_get_word_size(binary)
    start_addr = section.virtual_address
    end_addr = section.virtual_address + section.size
    tables = []
    vaddr = start_addr

    while vaddr <= end_addr - addr_size:
        ptr = unpack_pointer(addr_size, binary, vaddr)

        data = to_data(binary, ptr)
        if data is None:
            vaddr += addr_size
            continue

        table = [data]
        vaddr += addr_size

        while vaddr <= end_addr - addr_size:
            ptr = unpack_pointer(addr_size, binary, vaddr)

            data = to_data(binary, ptr)
            if data is None:
                break

            table.append(data)
            vaddr += addr_size

        if table not in tables:
            tables.append(table)

        vaddr += addr_size

    return tables


def extract_string_tables(binary: lief.MachO.Binary):
    """Extracts string tables from the given MachO binary.

    Args:
        binary: A sandbox profile in its binary form.

    Returns:
        The string tables.
    """

    return extract_data_tables_from_section(binary,
                                            binary_get_string_from_address,
                                            get_tables_section(binary))


def extract_separated_profiles(binary, string_tables):
    """Extract separated profiles from given MachO binary. It requires all
    string tables. This function is intended to be used for older version
    of iOS(<=7) because in newer versions the sandbox profiles are bundled.

    Args:
        binary: A sandbox profile in its binary form.
        string_tables: The extracted string tables.

    Returns:
        A zip object with profiles.
    """

    def get_profile_names():
        """Extracts the profile names.

            Returns:
                A list with the names of the sandbox profiles.
        """

        def transform(arr):
            if len(arr) <= 3:
                return None

            ans = []
            tmp = []
            for val in arr:
                if val in ['default', '0123456789abcdef']:
                    ans.append(tmp)
                    tmp = []
                else:
                    tmp.append(val)
            ans.append(tmp)
            return ans

        def get_sol(posible):
            ans = [arr for arr in posible
                   if 'com.apple.sandboxd' in arr]
            assert len(ans) == 1
            return ans[0]

        profile_names_v = [transform(v) for v in string_tables]
        profile_names_v = [v for v in profile_names_v if v is not None]
        profile_names_v = [x for v in profile_names_v for x in v]
        return get_sol(profile_names_v)

    def get_profile_contents():
        """Extracts the profile names.

            Returns:
                 The contents of the sandbox profiles.
        """

        def get_profile_content(binary, vaddr):
            addr_size = binary_get_word_size(binary)
            section = get_section_from_segment(binary, "__DATA", DATA_SECTION)

            if not is_vaddr_in_section(vaddr, section):
                return None

            data = binary.get_content_from_virtual_address(vaddr, 2 * addr_size)
            if len(data) != 2 * addr_size:
                return None

            data_vaddr = unpack(data[:addr_size])
            size = unpack(data[addr_size:])
            if not is_vaddr_in_section(vaddr, section):
                return None

            data = binary.get_content_from_virtual_address(data_vaddr, size)
            if len(data) != size:
                return None
            return bytes(data)

        contents_v = [v for v in
                      extract_data_tables_from_section(binary,
                                                       get_profile_content,
                                                       get_tables_section(binary))
                      if len(v) > 3]

        assert len(contents_v) == 1
        return contents_v[0]

    profile_names = get_profile_names()
    profile_contents = get_profile_contents()

    assert len(profile_names) == len(profile_contents)
    return zip(profile_names, profile_contents)


def extract_sbops(string_tables):
    """ Extracts sandbox operations from a given MachO binary.
    If the sandbox profiles are stored either in sandboxd or sandbox kernel
    extension, the operations are stored always in the kernel extension.
    The sandbox operations are stored similar to the separated sandbox profiles
    but this time we have only one table: the name table.

    Args:
        string_tables: The binary's string tables.

    Returns:
        The sandbox operations.
    """

    def transform(arr):
        if len(arr) <= 3:
            return None

        idxs = []
        for idx, val in enumerate(arr):
            if val == 'default':
                idxs.append(idx)

        return [arr[idx:] for idx in idxs]

    def get_sol(possible):
        assert len(possible) >= 1

        sol = []
        if len(possible) > 1:
            cnt = min(len(arr) for arr in possible)
            for vals in zip(*[val[:cnt] for val in possible]):
                if not all(val == vals[0] for val in vals):
                    break
                sol.append(vals[0])
        else:
            sol.append(possible[0][0])
            for pos in possible[0][1:]:
                if pos in ['HOME', 'default']:
                    break
                sol.append(pos)

        return sol

    sbops_v = [transform(v) for v in string_tables]
    sbops_v = [v for v in sbops_v if v is not None and v != []]
    sbops_v = [x for v in sbops_v for x in v]

    return get_sol(sbops_v)


def get_ios_major_version(version: str):
    """Extracts the major iOS version from a given version.

        Args:
            version: A string with the 'full' version.
        Returns:
            An integer with the major iOS version.

    """

    return int(version.split('.')[0])


def findall(searching, pattern):
    """Finds all the substring in the given string.

    Args:
        searching: A string.
        pattern: A pattern that needs to be searched in the searching string.

    Returns:
        The indexes of all substrings equal to pattern inside searching string.
    """

    i = searching.find(pattern)
    while i != -1:
        yield i
        i = searching.find(pattern, i + 1)


def check_regex(data: bytes, base_index: int, ios_version: int):
    """ Checks if the regular expression (from sandbox profile) at offset
    base_index from data is valid for newer versions of iOS(>=8).

    Args:
        data: An array of bytes.
        base_index: The starting index.
        ios_version: An integer representing the iOS version.

    Returns:
        True: if the regular expression is valid for iOS version >= 8.
        False: otherwise.
    """

    if base_index + 0x10 > len(data):
        return False

    if ios_version >= 13:
        size = struct.unpack('<H', data[base_index: base_index + 0x2])[0]
        version = struct.unpack('>I', data[base_index + 0x2: base_index + 0x6])[0]
    else:
        size = struct.unpack('<I', data[base_index: base_index + 0x4])[0]
        version = struct.unpack('>I', data[base_index + 0x4: base_index + 0x8])[0]

    if size > 0x1000 or size < 0x8 or base_index + size + 4 > len(data):
        return False

    if version != 3:
        return False

    if ios_version >= 13:
        sub_size = struct.unpack('<H', data[base_index + 0x6: base_index + 0x8])[0]
    else:
        sub_size = struct.unpack('<H', data[base_index + 0x8: base_index + 0xa])[0]

    return size == sub_size + 6


def unpack_for_newer_ios(base_index, count, data):
    """Unpacking for newer iOS versions (>= 13).

    Args:
        base_index: The starting index.
        count: Bundle size.
        data: An array of bytes.
    Returns:
        The new base index and an offset.
    """

    re_offset = base_index + 12
    op_nodes_count = struct.unpack('<H', data[base_index + 2:base_index + 4])[0]
    sb_ops_count = struct.unpack('<H', data[base_index + 4:base_index + 6])[0]
    sb_profiles_count = struct.unpack('<H', data[base_index + 6:base_index + 8])[0]
    global_table_count = struct.unpack('<B', data[base_index + 10:base_index + 11])[0]
    debug_table_count = struct.unpack('<B', data[base_index + 11:base_index + 12])[0]
    # base_index will be now at the of op_nodes
    base_index += 12 + (count + global_table_count + debug_table_count) * 2 + \
                  (2 + sb_ops_count) * 2 * sb_profiles_count + \
                  op_nodes_count * 8 + 4

    return base_index, re_offset


def check_bundle(data: bytes, base_index: int, ios_version: int):
    """Checks if the sandbox profile bundle at offset base_index from data
    is valid for the given ios_version. Note that sandbox profile bundles are
    used for newer versions of iOS(>=8).

    Args:
        data: An array of bytes.
        base_index: The starting index.
        ios_version: An integer representing the iOS version.

    Returns:
        True: if the sandbox profile bundle is valid.
        False: otherwise.
    """

    if len(data) - base_index < 50:
        return False
    re_offset, aux = struct.unpack('<2H', data[base_index + 2:base_index + 6])

    if ios_version >= 13:
        count = struct.unpack('<H', data[base_index + 8:base_index + 10])[0]
        if count < 0x10:
            return False
    elif ios_version >= 12:
        count = (aux - re_offset) * 4
        # bundle should be big
        if count < 0x10:
            return False
    else:
        count = aux

    if count > 0x1000 or re_offset < 0x10:
        return False

    if ios_version >= 13:
        base_index, re_offset = unpack_for_newer_ios(base_index, count, data)

    else:
        re_offset = base_index + re_offset * 8
        if len(data) - re_offset < count * 2:
            return False

    for off_index in range(re_offset, re_offset + 2 * count, 2):
        index = struct.unpack('<H', data[off_index:off_index + 2])[0]
        if index == 0:
            if off_index < re_offset + 2 * count - 4:
                return False
            continue

        index = base_index + index * 8

        if not check_regex(data, index, ios_version):
            return False

    return True


def extract_bundle_profiles(binary: lief.MachO.Binary, ios_version: int):
    """Extracts sandbox profile bundle from the given MachO binary which was
    extracted from a device with provided ios version.

    Args:
        binary: A sandbox profile in its binary form.
        ios_version: The major ios version.

    Returns:
        The sandbox profile bundle.
    """

    matches = []
    for section in binary.sections:
        if section.name == '__text':
            continue

        content = bytes(section.content)
        for index in findall(content, b'\x00\x80'):
            if check_bundle(content, index, ios_version):
                matches.append(content[index:])

    assert len(matches) == 1
    return matches[0]


def main(args):
    if type(args.binary) == lief.MachO.FatBinary:
        assert args.binary.size == 1
        binary = args.binary.at(0)
    else:
        binary = args.binary

    retcode = 0
    string_tables = extract_string_tables(binary)

    if args.sbops_file is not None:
        sbops = extract_sbops(string_tables)
        sbops_str = '\n'.join(sbops)
        if args.sbops_file == '-':
            print(sbops_str)
        else:
            try:
                with open(args.sbops_file, 'w') as file:
                    file.write(sbops_str + '\n')
            except IOError as exception:
                retcode = exception.errno
                print(exception, file=sys.stderr)

    if args.sbs_dir is not None:
        if args.version <= 8:
            profiles = extract_separated_profiles(binary, string_tables)
            for name, content in profiles:
                try:
                    with open(args.sbs_dir + '/' + name + '.sb.bin', 'wb') as file:
                        file.write(content)
                except IOError as exception:
                    retcode = exception.errno
                    print(exception, file=sys.stderr)
        else:
            content = extract_bundle_profiles(binary, args.version)
            try:
                with open(args.sbs_dir + '/sandbox_bundle', 'wb') as file:
                    file.write(content)
            except IOError as exception:
                retcode = exception.errno
                print(exception, file=sys.stderr)
    exit(retcode)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Sandbox profiles and operations extraction tool(iOS <9)')
    parser.add_argument('binary', metavar='BINARY', type=lief.MachO.parse,
                        help='path to sandbox(seatbelt) kernel exenstion' +
                        '(iOS 4-12) in order to extract sandbox operations OR ' +
                        'path to sandboxd(iOS 5-8) / sandbox(seatbelt) kernel extension' +
                        '(iOS 4 and 9-12) in order to extract sandbox profiles')
    parser.add_argument('version', metavar='VERSION',
                        type=get_ios_major_version, help='iOS version for given binary')
    parser.add_argument('-o', '--output-sbops', dest='sbops_file', type=str,
                        default=None,
                        help='path to sandbox profile operations store file')
    parser.add_argument('-O', '--output-profiles', dest='sbs_dir', type=str,
                        default=None,
                        help='path to directory in which sandbox profiles should be stored')

    args = parser.parse_args()
    exit(main(args))


================================================
FILE: reverse-sandbox/filters/filters_ios11.json
================================================
{
    "0x01":{
        "name":"",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x02":{
        "name":"mount-relative",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x03":{
        "name":"xattr",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x04":{
        "name":"file-mode",
        "arg_process_fn":"get_filter_arg_octal_integer"
    },
    "0x05":{
        "name":"ipc-posix-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x06":{
        "name":"global-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x07":{
        "name":"local-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x08":{
        "name":"local",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x09":{
        "name":"remote",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x0a":{
        "name":"control-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x0b":{
        "name":"socket-domain",
        "arg_process_fn":"get_filter_arg_socket_domain"
    },
    "0x0c":{
        "name":"socket-type",
        "arg_process_fn":"get_filter_arg_socket_type"
    },
    "0x0d":{
        "name":"socket-protocol",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x0e":{
        "name":"target",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x0f":{
        "name":"fsctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x10":{
        "name":"ioctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x11":{
        "name":"iokit-user-client-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x12":{
        "name":"iokit-property",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x13":{
        "name":"iokit-connection",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x14":{
        "name":"device-major",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x15":{
        "name":"device-minor",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x16":{
        "name":"device-conforms-to",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x17":{
        "name":"extension",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x18":{
        "name":"extension-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x19":{
        "name":"appleevent-destination",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1a":{
        "name":"debug-mode",
        "arg_process_fn":"get_none"
    },
    "0x1b":{
        "name":"right-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1c":{
        "name":"preference-domain",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1d":{
        "name":"vnode-type",
        "arg_process_fn":"get_filter_arg_vnode_type"
    },
    "0x1e":{
        "name":"require-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x1f":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x20":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x21":{
        "name":"kext-bundle-id",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x22":{
        "name":"info-type",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x23":{
        "name":"notification-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x24":{
        "name":"notification-payload",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x25":{
        "name":"semaphore-owner",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x26":{
        "name":"sysctl-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x27":{
        "name":"process-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x28":{
        "name":"rootless-boot-device-filter",
        "arg_process_fn":"get_none"
    },
    "0x29":{
        "name":"rootless-file-filter",
        "arg_process_fn":"get_none"
    },
    "0x2a":{
        "name":"rootless-disk-filter",
        "arg_process_fn":"get_none"
    },
    "0x2b":{
        "name":"rootless-proc-filter",
        "arg_process_fn":"get_none"
    },
    "0x2c":{
        "name":"privilege-id",
        "arg_process_fn":"get_filter_arg_privilege_id"
    },
    "0x2d":{
        "name":"process-attribute",
        "arg_process_fn":"get_filter_arg_process_attribute"
    },
    "0x2e":{
        "name":"uid",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x2f":{
        "name":"nvram-variable",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x30":{
        "name":"csr",
        "arg_process_fn":"get_filter_arg_csr"
    },
    "0x31":{
        "name":"host-special-port",
        "arg_process_fn":"get_filter_arg_host_port"
    },
    "0x81":{
        "name":"regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x82":{
        "name":"mount-relative-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x83":{
        "name":"xattr-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x85":{
        "name":"ipc-posix-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x86":{
        "name":"global-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x87":{
        "name":"local-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x91":{
        "name":"iokit-user-client-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x92":{
        "name":"iokit-property-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x93":{
        "name":"iokit-connection-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x98":{
        "name":"extension-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x99":{
        "name":"appleevent-destination-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x9b":{
        "name":"right-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x9c":{
        "name":"preference-domain-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa0":{
        "name":"entitlement-value-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa1":{
        "name":"kext-bundle-id-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa3":{
        "name":"notification-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa6":{
        "name":"sysctl-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa7":{
        "name":"process-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    }
}

================================================
FILE: reverse-sandbox/filters/filters_ios12.json
================================================
{
    "0x01":{
        "name":"",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x02":{
        "name":"mount-relative-literal",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x03":{
        "name":"xattr",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x04":{
        "name":"file-mode",
        "arg_process_fn":"get_filter_arg_octal_integer"
    },
    "0x05":{
        "name":"ipc-posix-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x06":{
        "name":"global-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x07":{
        "name":"local-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x08":{
        "name":"local",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x09":{
        "name":"remote",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x0a":{
        "name":"control-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x0b":{
        "name":"socket-domain",
        "arg_process_fn":"get_filter_arg_socket_domain"
    },
    "0x0c":{
        "name":"socket-type",
        "arg_process_fn":"get_filter_arg_socket_type"
    },
    "0x0d":{
        "name":"socket-protocol",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x0e":{
        "name":"target",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x0f":{
        "name":"fsctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x10":{
        "name":"ioctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x11":{
        "name":"iokit-user-client-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x12":{
        "name":"iokit-property",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x13":{
        "name":"iokit-connection",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x14":{
        "name":"device-major",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x15":{
        "name":"device-minor",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x16":{
        "name":"device-conforms-to",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x17":{
        "name":"extension",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x18":{
        "name":"extension-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x19":{
        "name":"appleevent-destination",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1a":{
        "name":"debug-mode",
        "arg_process_fn":"get_none"
    },
    "0x1b":{
        "name":"right-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1c":{
        "name":"preference-domain",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1d":{
        "name":"vnode-type",
        "arg_process_fn":"get_filter_arg_vnode_type"
    },
    "0x1e":{
        "name":"require-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x1f":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x20":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x21":{
        "name":"kext-bundle-id",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x22":{
        "name":"info-type",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x23":{
        "name":"notification-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x24":{
        "name":"notification-payload",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x25":{
        "name":"semaphore-owner",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x26":{
        "name":"sysctl-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x27":{
        "name":"process-path",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x28":{
        "name":"rootless-boot-device-filter",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x29":{
        "name":"rootless-disk-filter",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x2a":{
        "name":"privilege-id",
        "arg_process_fn":"get_filter_arg_privilege_id"
    },
    "0x2b":{
        "name":"process-attribute",
        "arg_process_fn":"get_filter_arg_process_attribute"
    },
    "0x2c":{
        "name":"uid",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x2d":{
        "name":"nvram-variable",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x2e":{
        "name":"csr",
        "arg_process_fn":"get_filter_arg_csr"
    },
    "0x2f":{
        "name":"host-special-port",
        "arg_process_fn":"get_filter_arg_host_port"
    },
    "0x30":{
        "name":"filesystem-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x31":{
        "name":"boot-arg",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x32":{
        "name":"xpc-service-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x33":{
        "name":"signing-identifier",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x34":{
        "name":"signal-number",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x35":{
        "name":"target-signing-identifier",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x36":{
        "name":"reboot-flags",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x37":{
        "name":"datavault-disk-filter",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x38":{
        "name":"extension-path-ancestor",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x39":{
        "name":"file-attribute",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x3a":{
        "name":"storage-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x3b":{
        "name":"storage-class-extension",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x3c":{
        "name":"iokit-usb-interface-class",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x3d":{
        "name":"iokit-usb-interface-subclass",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x3e":{
        "name":"ancestor-signing-identifier",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x3f":{
        "name":"frequire-ancestor-with-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x81":{
        "name":"regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x82":{
        "name":"mount-relative-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x83":{
        "name":"xattr-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x85":{
        "name":"ipc-posix-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x86":{
        "name":"global-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x87":{
        "name":"local-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x8a":{
        "name":"control-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x91":{
        "name":"iokit-user-client-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x92":{
        "name":"iokit-property-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x93":{
        "name":"iokit-connection-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x98":{
        "name":"extension-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x99":{
        "name":"appleevent-destination-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x9b":{
        "name":"right-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x9c":{
        "name":"preference-domain-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa0":{
        "name":"entitlement-value-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa1":{
        "name":"kext-bundle-id-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa2":{
        "name":"info-type-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa3":{
        "name":"notification-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa6":{
        "name":"sysctl-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa7":{
        "name":"process-path-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xad":{
        "name":"nvram-variable-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb0":{
        "name":"filesystem-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb1":{
        "name":"boot-arg-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb2":{
        "name":"xpc-service-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb3":{
        "name":"signing-identifier-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb5":{
        "name":"target-signing-identifier-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xbe":{
        "name":"ancestor-signing-identifier-regex",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    }
}


================================================
FILE: reverse-sandbox/filters/filters_ios13.json
================================================
{
    "0x01":{
        "name":"",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x02":{
        "name":"mount-relative-literal",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x03":{
        "name":"xattr",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x04":{
        "name":"file-mode",
        "arg_process_fn":"get_filter_arg_octal_integer"
    },
    "0x05":{
        "name":"ipc-posix-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x06":{
        "name":"global-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x07":{
        "name":"local-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x08":{
        "name":"local",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x09":{
        "name":"remote",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x0a":{
        "name":"control-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x0b":{
        "name":"socket-domain",
        "arg_process_fn":"get_filter_arg_socket_domain"
    },
    "0x0c":{
        "name":"socket-type",
        "arg_process_fn":"get_filter_arg_socket_type"
    },
    "0x0d":{
        "name":"socket-protocol",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x0e":{
        "name":"target",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x0f":{
        "name":"fsctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x10":{
        "name":"ioctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x11":{
        "name":"iokit-user-client-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x12":{
        "name":"iokit-property",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x13":{
        "name":"iokit-connection",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x14":{
        "name":"device-major",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x15":{
        "name":"device-minor",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x16":{
        "name":"device-conforms-to",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x17":{
        "name":"extension",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x18":{
        "name":"extension-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x19":{
        "name":"appleevent-destination",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1a":{
        "name":"debug-mode",
        "arg_process_fn":"get_none"
    },
    "0x1b":{
        "name":"right-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1c":{
        "name":"preference-domain",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1d":{
        "name":"vnode-type",
        "arg_process_fn":"get_filter_arg_vnode_type"
    },
    "0x1e":{
        "name":"require-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x1f":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x20":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x21":{
        "name":"kext-bundle-id",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x22":{
        "name":"info-type",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x23":{
        "name":"notification-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x24":{
        "name":"notification-payload",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x25":{
        "name":"semaphore-owner",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x26":{
        "name":"sysctl-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x27":{
        "name":"process-path",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x28":{
        "name":"rootless-boot-device-filter",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x29":{
        "name":"rootless-disk-filter",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x2a":{
        "name":"privilege-id",
        "arg_process_fn":"get_filter_arg_privilege_id"
    },
    "0x2b":{
        "name":"process-attribute",
        "arg_process_fn":"get_filter_arg_process_attribute"
    },
    "0x2c":{
        "name":"uid",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x2d":{
        "name":"nvram-variable",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x2e":{
        "name":"csr",
        "arg_process_fn":"get_filter_arg_csr"
    },
    "0x2f":{
        "name":"host-special-port",
        "arg_process_fn":"get_filter_arg_host_port"
    },
    "0x30":{
        "name":"filesystem-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x31":{
        "name":"boot-arg",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x32":{
        "name":"xpc-service-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x33":{
        "name":"signing-identifier",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x34":{
        "name":"signal-number",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x35":{
        "name":"target-signing-identifier",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x36":{
        "name":"reboot-flags",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x37":{
        "name":"datavault-disk-filter",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x38":{
        "name":"extension-path-ancestor",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x39":{
        "name":"file-attribute",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x3a":{
        "name":"storage-class",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x3b":{
        "name":"storage-class-extension",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x3c":{
        "name":"iokit-usb-interface-class",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x3d":{
        "name":"iokit-usb-interface-subclass",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x3e":{
        "name":"ancestor-signing-identifier",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x3f":{
        "name":"require-ancestor-with-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x40":{
        "name":"persona-type",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x41":{
        "name":"syscall-number",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x42":{
        "name":"syscall-mask",
        "arg_process_fn":"get_none"
    },
    "0x43":{
        "name":"require-target-with-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x44":{
        "name":"iokit-registry-entry-attribute",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x45":{
        "name":"user-intent-extension",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x46":{
        "name":"snapshot-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x81":{
        "name":"regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x82":{
        "name":"mount-relative-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x83":{
        "name":"xattr-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x85":{
        "name":"ipc-posix-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x86":{
        "name":"global-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x87":{
        "name":"local-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x8a":{
        "name":"control-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x91":{
        "name":"iokit-user-client-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x92":{
        "name":"iokit-property-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x93":{
        "name":"iokit-connection-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x98":{
        "name":"extension-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x99":{
        "name":"appleevent-destination-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x9b":{
        "name":"right-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x9c":{
        "name":"preference-domain-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa0":{
        "name":"entitlement-value-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa1":{
        "name":"kext-bundle-id-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa2":{
        "name":"info-type-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa3":{
        "name":"notification-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa6":{
        "name":"sysctl-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa7":{
        "name":"process-path-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xad":{
        "name":"nvram-variable-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb0":{
        "name":"filesystem-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb1":{
        "name":"boot-arg-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb2":{
        "name":"xpc-service-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb3":{
        "name":"signing-identifier-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb5":{
        "name":"target-signing-identifier-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xbe":{
        "name":"ancestor-signing-identifier-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xc6":{
        "name":"snapshot-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    }
}


================================================
FILE: reverse-sandbox/filters/filters_ios14.json
================================================
{
    "0x01":{
        "name":"",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x02":{
        "name":"mount-relative-literal",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x03":{
        "name":"xattr",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x04":{
        "name":"file-mode",
        "arg_process_fn":"get_filter_arg_octal_integer"
    },
    "0x05":{
        "name":"ipc-posix-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x06":{
        "name":"global-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x07":{
        "name":"local-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x08":{
        "name":"local",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x09":{
        "name":"remote",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x0a":{
        "name":"control-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x0b":{
        "name":"socket-domain",
        "arg_process_fn":"get_filter_arg_socket_domain"
    },
    "0x0c":{
        "name":"socket-type",
        "arg_process_fn":"get_filter_arg_socket_type"
    },
    "0x0d":{
        "name":"socket-protocol",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x0e":{
        "name":"target",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x0f":{
        "name":"fsctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x10":{
        "name":"ioctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x11":{
        "name":"iokit-user-client-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x12":{
        "name":"iokit-property",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x13":{
        "name":"iokit-connection",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x14":{
        "name":"device-major",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x15":{
        "name":"device-minor",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x16":{
        "name":"device-conforms-to",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x17":{
        "name":"extension",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x18":{
        "name":"extension-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x19":{
        "name":"appleevent-destination",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1a":{
        "name":"debug-mode",
        "arg_process_fn":"get_none"
    },
    "0x1b":{
        "name":"right-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1c":{
        "name":"preference-domain",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1d":{
        "name":"vnode-type",
        "arg_process_fn":"get_filter_arg_vnode_type"
    },
    "0x1e":{
        "name":"require-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x1f":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x20":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x21":{
        "name":"kext-bundle-id",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x22":{
        "name":"info-type",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x23":{
        "name":"notification-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x24":{
        "name":"notification-payload",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x25":{
        "name":"semaphore-owner",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x26":{
        "name":"sysctl-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x27":{
        "name":"process-path",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x28":{
        "name":"rootless-boot-device-filter",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x29":{
        "name":"rootless-disk-filter",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x2a":{
        "name":"privilege-id",
        "arg_process_fn":"get_filter_arg_privilege_id"
    },
    "0x2b":{
        "name":"process-attribute",
        "arg_process_fn":"get_filter_arg_process_attribute"
    },
    "0x2c":{
        "name":"uid",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x2d":{
        "name":"nvram-variable",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x2e":{
        "name":"csr",
        "arg_process_fn":"get_filter_arg_csr"
    },
    "0x2f":{
        "name":"host-special-port",
        "arg_process_fn":"get_filter_arg_host_port"
    },
    "0x30":{
        "name":"filesystem-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x31":{
        "name":"boot-arg",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x32":{
        "name":"xpc-service-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x33":{
        "name":"signing-identifier",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x34":{
        "name":"signal-number",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x35":{
        "name":"target-signing-identifier",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x36":{
        "name":"reboot-flags",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x37":{
        "name":"datavault-disk-filter",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x38":{
        "name":"extension-path-ancestor",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x39":{
        "name":"file-attribute",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x3a":{
        "name":"storage-class",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x3b":{
        "name":"storage-class-extension",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x3c":{
        "name":"iokit-usb-interface-class",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x3d":{
        "name":"iokit-usb-interface-subclass",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x3e":{
        "name":"ancestor-signing-identifier",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x3f":{
        "name":"require-ancestor-with-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x40":{
        "name":"persona-type",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x41":{
        "name":"syscall-number",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x42":{
        "name":"syscall-mask",
        "arg_process_fn":"get_none"
    },
    "0x43":{
        "name":"require-target-with-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x44":{
        "name":"iokit-registry-entry-attribute",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x45":{
        "name":"user-intent-extension",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x46":{
        "name":"snapshot-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x47":{
        "name":"mach-derived-port-role",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x48":{
        "name":"message-number",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x49":{
        "name":"message-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x4a":{
        "name":"iokit-method-number",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x4b":{
        "name":"iokit-trap-number",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x4c":{
        "name":"machtrap-number",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x4d":{
        "name":"machtrap-mask",
        "arg_process_fn":"get_none"
    },
    "0x4e":{
        "name":"kernel-mig-routine",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x4f":{
        "name":"kernel-mig-routine-mask",
        "arg_process_fn":"get_none"
    },
    "0x81":{
        "name":"regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x82":{
        "name":"mount-relative-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x83":{
        "name":"xattr-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x85":{
        "name":"ipc-posix-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x86":{
        "name":"global-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x87":{
        "name":"local-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x8a":{
        "name":"control-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x91":{
        "name":"iokit-user-client-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x92":{
        "name":"iokit-property-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x93":{
        "name":"iokit-connection-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x98":{
        "name":"extension-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x99":{
        "name":"appleevent-destination-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x9b":{
        "name":"right-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x9c":{
        "name":"preference-domain-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa0":{
        "name":"entitlement-value-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa1":{
        "name":"kext-bundle-id-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa2":{
        "name":"info-type-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa3":{
        "name":"notification-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa6":{
        "name":"sysctl-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xa7":{
        "name":"process-path-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xad":{
        "name":"nvram-variable-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb0":{
        "name":"filesystem-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb1":{
        "name":"boot-arg-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb2":{
        "name":"xpc-service-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb3":{
        "name":"signing-identifier-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xb5":{
        "name":"target-signing-identifier-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xbe":{
        "name":"ancestor-signing-identifier-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xc6":{
        "name":"snapshot-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xc7":{
        "name":"mach-derived-port-role-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0xc9":{
        "name":"message-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    }
}


================================================
FILE: reverse-sandbox/filters/filters_ios4.json
================================================
{
    "0x01":{
        "name":"regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x02":{
        "name":"xattr",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x03":{
        "name":"file-mode",
        "arg_process_fn":"get_filter_arg_octal_integer"
    },
    "0x04":{
        "name":"ipc-posix-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x05":{
        "name":"global-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x06":{
        "name":"local-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x07":{
        "name":"local",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x08":{
        "name":"remote",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x09":{
        "name":"socket-domain",
        "arg_process_fn":"get_filter_arg_socket_domain"
    },
    "0x0a":{
        "name":"target",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x0b":{
        "name":"iokit-user-client-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x0c":{
        "name":"extension",
        "arg_process_fn":"get_none"
    },
    "0x0d":{
        "name":"debug-mode",
        "arg_process_fn":"get_none"
    }
}


================================================
FILE: reverse-sandbox/filters/filters_ios5.json
================================================
{
    "0x01":{
        "name":"",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x02":{
        "name":"mount-relative",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x04":{
        "name":"file-mode",
        "arg_process_fn":"get_filter_arg_octal_integer"
    },
    "0x05":{
        "name":"ipc-posix-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x06":{
        "name":"global-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x07":{
        "name":"local-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x08":{
        "name":"local",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x09":{
        "name":"remote",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x0a":{
        "name":"control-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x0b":{
        "name":"socket-domain",
        "arg_process_fn":"get_filter_arg_socket_domain"
    },
    "0x0c":{
        "name":"socket-type",
        "arg_process_fn":"get_filter_arg_socket_type"
    },
    "0x0d":{
        "name":"socket-protocol",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x0e":{
        "name":"target",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x0f":{
        "name":"iokit-user-client-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x10":{
        "name":"iokit-property",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x11":{
        "name":"iokit-connection",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x12":{
        "name":"extension",
        "arg_process_fn":"get_none"
    },
    "0x13":{
        "name":"mach-extension",
        "arg_process_fn":"get_none"
    },
    "0x14":{
        "name":"appleevent-destination",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x15":{
        "name":"debug-mode",
        "arg_process_fn":"get_none"
    },
    "0x16":{
        "name":"right-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x81":{
        "name":"regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x82":{
        "name":"mount-relative-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x83":{
        "name":"xattr",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x85":{
        "name":"ipc-posix-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x86":{
        "name":"global-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x87":{
        "name":"local-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x8a":{
        "name":"control-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x8f":{
        "name":"iokit-user-client-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x90":{
        "name":"iokit-property-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x91":{
        "name":"iokit-connection-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x93":{
        "name":"extension-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x94":{
        "name":"appleevent-destination-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x96":{
        "name":"right-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    }
}


================================================
FILE: reverse-sandbox/filters/filters_ios6.json
================================================
{
    "0x01":{
        "name":"",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x02":{
        "name":"mount-relative",
        "arg_process_fn":"get_filter_arg_string_by_offset_with_type"
    },
    "0x04":{
        "name":"file-mode",
        "arg_process_fn":"get_filter_arg_octal_integer"
    },
    "0x05":{
        "name":"ipc-posix-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x06":{
        "name":"global-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x07":{
        "name":"local-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x08":{
        "name":"local",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x09":{
        "name":"remote",
        "arg_process_fn":"get_filter_arg_network_address"
    },
    "0x0a":{
        "name":"control-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x0b":{
        "name":"socket-domain",
        "arg_process_fn":"get_filter_arg_socket_domain"
    },
    "0x0c":{
        "name":"socket-type",
        "arg_process_fn":"get_filter_arg_socket_type"
    },
    "0x0d":{
        "name":"socket-protocol",
        "arg_process_fn":"get_filter_arg_integer"
    },
    "0x0e":{
        "name":"target",
        "arg_process_fn":"get_filter_arg_owner"
    },
    "0x0f":{
        "name":"fsctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x10":{
        "name":"ioctl-command",
        "arg_process_fn":"get_filter_arg_ctl"
    },
    "0x11":{
        "name":"iokit-user-client-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x12":{
        "name":"iokit-property",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x13":{
        "name":"iokit-connection",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x14":{
        "name":"extension",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x15":{
        "name":"extension-class",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x16":{
        "name":"appleevent-destination",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x17":{
        "name":"debug-mode",
        "arg_process_fn":"get_none"
    },
    "0x18":{
        "name":"right-name",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x19":{
        "name":"preference-domain",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1a":{
        "name":"tty",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x1b":{
        "name":"require-entitlement",
        "arg_process_fn":"get_filter_arg_string_by_offset_no_skip"
    },
    "0x1c":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_boolean"
    },
    "0x1d":{
        "name":"entitlement-value",
        "arg_process_fn":"get_filter_arg_string_by_offset"
    },
    "0x81":{
        "name":"regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x82":{
        "name":"mount-relative-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x83":{
        "name":"xattr",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x85":{
        "name":"ipc-posix-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x86":{
        "name":"global-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x87":{
        "name":"local-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x8a":{
        "name":"control-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x91":{
        "name":"iokit-user-client-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x92":{
        "name":"iokit-property-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x93":{
        "name":"iokit-connection-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x95":{
        "name":"extension-class-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x96":{
        "name":"appleevent-destination-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x98":{
        "name":"right-name-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x99":{
        "name":"preference-domain-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    },
    "0x9d":{
        "name":"entitlement-value-regex",
        "arg_process_fn":"get_filter_arg_regex_by_id"
    }
}


================================================
FILE: reverse-sandbox/filters.py
================================================
import json

def read_filters(file_path):
    temp = {}
    filters = {}
    with open(file_path) as data:
        temp = json.load(data)

        for key, value in temp.items():
            filters[int(str(key), 16)] = value

    return filters


class Filters(object):

    filters_ios4 = read_filters('filters/filters_ios4.json')
    filters_ios5 = read_filters('filters/filters_ios5.json')
    filters_ios6 = read_filters('filters/filters_ios6.json')
    filters_ios11 = read_filters('filters/filters_ios11.json')
    filters_ios12 = read_filters('filters/filters_ios12.json')
    filters_ios13 = read_filters('filters/filters_ios13.json')
    filters_ios14 = read_filters('filters/filters_ios14.json')

    @staticmethod
    def get_filters(ios_major_version):
        if ios_major_version <= 4:
            return Filters.filters_ios4
        if ios_major_version == 5:
            return Filters.filters_ios5
        if ios_major_version == 6:
            return Filters.filters_ios6
        if ios_major_version <= 11:
            return Filters.filters_ios11
        if ios_major_version <= 12:
            return Filters.filters_ios12
        if ios_major_version <= 13:
            return Filters.filters_ios13
        return Filters.filters_ios14

    @staticmethod
    def exists(ios_major_version, id):
        return id in Filters.get_filters(ios_major_version)

    @staticmethod
    def get(ios_major_version, id):
        return Filters.get_filters(ios_major_version).get(id, None)


================================================
FILE: reverse-sandbox/logger.config
================================================
[loggers]
keys=root

[logger_root]
level=NOTSET
handlers=file,screen

[formatters]
keys=simple,complex

[formatter_simple]
format=%(asctime)s - %(name)s - %(levelname)s - %(message)s

[formatter_complex]
format=%(asctime)s - %(name)s - %(levelname)s - (%(module)s:%(lineno)d) - %(message)s

[handlers]
keys=file,screen

[handler_screen]
class=StreamHandler
formatter=simple
level=INFO
args=(sys.stderr,)

[handler_file]
class=logging.FileHandler
formatter=complex
level=DEBUG
args=('reverse.log',)


================================================
FILE: reverse-sandbox/operation_node.py
================================================
#!/usr/bin/python3

import sys
import struct
import re
import logging
import logging.config

logging.config.fileConfig("logger.config")
logger = logging.getLogger(__name__)

class TerminalNode():
    """Allow or Deny end node in binary sandbox format

    A terminal node, when reached, either denies or allows the rule.
    A node has a type (allow or deny) and a set of flags. Flags are
    currently unused.
    """

    TERMINAL_NODE_TYPE_ALLOW = 0x00
    TERMINAL_NODE_TYPE_DENY = 0x01
    type = None
    flags = None

    def __eq__(self, other):
        return self.type == other.type and self.flags == other.flags

    def __str__(self):
        if self.type == self.TERMINAL_NODE_TYPE_ALLOW:
            return "allow"
        elif self.type == self.TERMINAL_NODE_TYPE_DENY:
            return "deny"
        else:
            return "unknown"

    def is_allow(self):
        return self.type == self.TERMINAL_NODE_TYPE_ALLOW

    def is_deny(self):
        return self.type == self.TERMINAL_NODE_TYPE_DENY


class NonTerminalNode():
    """Intermediary node consisting of a filter to match

    The non-terminal node, when matched, points to a new node, and
    when unmatched, to another node.

    A non-terminal node consists of the filter to match, its argument and
    the match and unmatch nodes.
    """

    filter_id = None
    filter = None
    argument_id = None
    argument = None
    match_offset = None
    match = None
    unmatch_offset = None
    unmatch = None

    def __eq__(self, other):
        return self.filter_id == other.filter_id and self.argument_id == other.argument_id and self.match_offset == other.match_offset and self.unmatch_offset == other.unmatch_offset

    def simplify_list(self, arg_list):
        result_list = []
        for a in arg_list:
            if len(a) == 0:
                continue
            tmp_list = list(result_list)
            match_found = False
            for r in tmp_list:
                if len(r) == 0:
                    continue
                if a == r or a+"/" == r or a == r+"/":
                    match_found = True
                    result_list.remove(r)
                    if a[-1] == '/':
                        result_list.append(a + "^^^")
                    else:
                        result_list.append(a + "/^^^")
            if match_found == False:
                result_list.append(a)

        return result_list

    def str_debug(self):
        if self.filter:
            if self.argument:
                if type(self.argument) is list:
                    if len(self.argument) == 1:
                        ret_str = ""
                    else:
                        self.argument = self.simplify_list(self.argument)
                        if len(self.argument) == 1:
                            ret_str = ""
                        else:
                            ret_str = "(require-any "
                    for s in self.argument:
                        curr_filter = self.filter
                        regex_added = False
                        prefix_added = False
                        if len(s) == 0:
                            s = ".+"
                            if not regex_added:
                                regex_added = True
                                if self.filter == "literal":
                                    curr_filter = "regex"
                                else:
                                    curr_filter += "-regex"
                        else:
                            if s[-4:] == "/^^^":
                                curr_filter = "subpath"
                                s = s[:-4]
                            if '\\' in s or '|' in s or ('[' in s and ']' in s) or '+' in s:
                                if curr_filter == "subpath":
                                    s = s + "/?"
                                if self.filter == "literal":
                                    curr_filter = "regex"
                                else:
                                    curr_filter += "-regex"
                                s = s.replace('\\\\.', '[.]')
                                s = s.replace('\\.', '[.]')
                            if "${" in s and "}" in s:
                                if not prefix_added:
                                    prefix_added = True
                                    curr_filter += "-prefix"
                        if "regex" in curr_filter:
                            ret_str += '(%04x, %04x) (%s #"%s")\n' % (self.match_offset, self.unmatch_offset, curr_filter, s)
                        else:
                            ret_str += '(%s "%s")\n' % (curr_filter, s)
                    if len(self.argument) == 1:
                        ret_str = ret_str[:-1]
                    else:
                        ret_str = ret_str[:-1] + ")"
                    return ret_str
                s = self.argument
                curr_filter = self.filter
                if not "regex" in curr_filter:
                    if '\\' in s or '|' in s or ('[' in s and ']' in s) or '+' in s:
                        if self.filter == "literal":
                            curr_filter = "regex"
                        else:
                            curr_filter += "-regex"
                        s = s.replace('\\\\.', '[.]')
                        s = s.replace('\\.', '[.]')
                if "${" in s and "}" in s:
                    if not "prefix" in curr_filter:
                        curr_filter += "-prefix"
                return "(%04x, %04x) (%s %s)" % (self.match_offset, self.unmatch_offset, curr_filter, s)
            else:
                return "(%04x, %04x) (%s)" % (self.match_offset, self.unmatch_offset, self.filter)
        return "(%02x %04x %04x %04x)" % (self.filter_id, self.argument_id, self.match_offset, self.unmatch_offset)

    def __str__(self):
        if self.filter:
            if self.argument:
                if type(self.argument) is list:
                    if len(self.argument) == 1:
                        ret_str = ""
                    else:
                        self.argument = self.simplify_list(self.argument)
                        if len(self.argument) == 1:
                            ret_str = ""
                        else:
                            ret_str = "(require-any "
                    for s in self.argument:
                        curr_filter = self.filter
                        regex_added = False
                        prefix_added = False
                        if len(s) == 0:
                            s = ".+"
                            if not regex_added:
                                regex_added = True
                                if self.filter == "literal":
                                    curr_filter = "regex"
                                else:
                                    curr_filter += "-regex"
                        else:
                            if s[-4:] == "/^^^":
                                curr_filter = "subpath"
                                s = s[:-4]
                            if '\\' in s or '|' in s or ('[' in s and ']' in s) or '+' in s:
                                if curr_filter == "subpath":
                                    s = s + "/?"
                                if self.filter == "literal":
                                    curr_filter = "regex"
                                else:
                                    curr_filter += "-regex"
                                s = s.replace('\\\\.', '[.]')
                                s = s.replace('\\.', '[.]')
                            if "${" in s and "}" in s:
                                if not prefix_added:
                                    prefix_added = True
                                    curr_filter += "-prefix"
                        if "regex" in curr_filter:
                            ret_str += '(%s #"%s")\n' % (curr_filter, s)
                        else:
                            ret_str += '(%s "%s")\n' % (curr_filter, s)
                    if len(self.argument) == 1:
                        ret_str = ret_str[:-1]
                    else:
                        ret_str = ret_str[:-1] + ")"
                    return ret_str
                s = self.argument
                curr_filter = self.filter
                if not "regex" in curr_filter:
                    if '\\' in s or '|' in s or ('[' in s and ']' in s) or '+' in s:
                        if self.filter == "literal":
                            curr_filter = "regex"
                        else:
                            curr_filter += "-regex"
                        s = s.replace('\\\\.', '[.]')
                        s = s.replace('\\.', '[.]')
                if "${" in s and "}" in s:
                    if not "prefix" in curr_filter:
                        curr_filter += "-prefix"
                return "(%s %s)" % (curr_filter, s)
            else:
                return "(%s)" % (self.filter)
        return "(%02x %04x %04x %04x)" % (self.filter_id, self.argument_id, self.match_offset, self.unmatch_offset)

    def str_not(self):
        if self.filter:
            if self.argument:
                if type(self.argument) is list:
                    if len(self.argument) == 1:
                        ret_str = ""
                    else:
                        self.argument = self.simplify_list(self.argument)
                        if len(self.argument) == 1:
                            ret_str = ""
                        else:
                            ret_str = "(require-all "
                    for s in self.argument:
                        curr_filter = self.filter
                        regex_added = False
                        prefix_added = False
                        if len(s) == 0:
                            s = ".+"
                            if not regex_added:
                                regex_added = True
                                if self.filter == "literal":
                                    curr_filter = "regex"
                                else:
                                    curr_filter += "-regex"
                        else:
                            if s[-4:] == "/^^^":
                                curr_filter = "subpath"
                                s = s[:-4]
                            if '\\' in s or '|' in s or ('[' in s and ']' in s) or '+' in s:
                                if curr_filter == "subpath":
                                    s = s + "/?"
                                if self.filter == "literal":
                                    curr_filter = "regex"
                                else:
                                    curr_filter += "-regex"
                                s = s.replace('\\\\.', '[.]')
                                s = s.replace('\\.', '[.]')
                            if "${" in s and "}" in s:
                                if not prefix_added:
                                    prefix_added = True
                                    curr_filter += "-prefix"
                        if "regex" in curr_filter:
                            ret_str += '(require-not (%s #"%s"))\n' % (curr_filter, s)
                        else:
                            ret_str += '(require-not (%s "%s"))\n' % (curr_filter, s)
                    if len(self.argument) == 1:
                        ret_str = ret_str[:-1]
                    else:
                        ret_str = ret_str[:-1] + ")"
                    return ret_str
                s = self.argument
                curr_filter = self.filter
                if not "regex" in curr_filter:
                    if '\\' in s or '|' in s or ('[' in s and ']' in s) or '+' in s:
                        if self.filter == "literal":
                            curr_filter = "regex"
                        else:
                            curr_filter += "-regex"
                        s = s.replace('\\\\.', '[.]')
                        s = s.replace('\\.', '[.]')
                if "${" in s and "}" in s:
                    if not "prefix" in curr_filter:
                        curr_filter += "-prefix"
                return "(%s %s)" % (curr_filter, s)
            else:
                return "(%s)" % (self.filter)
        return "(%02x %04x %04x %04x)" % (self.filter_id, self.argument_id, self.match_offset, self.unmatch_offset)

    def values(self):
        if self.filter:
            return (self.filter, self.argument)
        return ("%02x" % self.filter_id, "%04x" % (self.argument_id))

    def is_entitlement_start(self):
        return self.filter_id == 0x1e or self.filter_id == 0xa0

    def is_entitlement(self):
        return self.filter_id == 0x1e or self.filter_id == 0x1f or self.filter_id == 0x20 or self.filter_id == 0xa0

    def is_last_regular_expression(self):
        return self.filter_id == 0x81 and self.argument_id == num_regex-1

    def convert_filter(self, convert_fn, f, regex_list, ios_major_version,
            keep_builtin_filters, global_vars, base_addr):
        (self.filter, self.argument) = convert_fn(f, ios_major_version,
            keep_builtin_filters, global_vars, regex_list, self.filter_id,
            self.argument_id, base_addr)

    def is_non_terminal_deny(self):
        if self.match.is_non_terminal() and self.unmatch.is_terminal():
            return self.unmatch.terminal.is_deny()

    def is_non_terminal_allow(self):
        if self.match.is_non_terminal() and self.unmatch.is_terminal():
            return self.unmatch.terminal.is_allow()

    def is_non_terminal_non_terminal(self):
        return self.match.is_non_terminal() and self.unmatch.is_non_terminal()

    def is_allow_non_terminal(self):
        if self.match.is_terminal() and self.unmatch.is_non_terminal():
            return self.match.terminal.is_allow()

    def is_deny_non_terminal(self):
        if self.match.is_terminal() and self.unmatch.is_non_terminal():
            return self.match.terminal.is_deny()

    def is_deny_allow(self):
        if self.match.is_terminal() and self.unmatch.is_terminal():
            return self.match.terminal.is_deny() and self.unmatch.terminal.is_allow()

    def is_allow_deny(self):
        if self.match.is_terminal() and self.unmatch.is_terminal():
            return self.match.terminal.is_allow() and self.unmatch.terminal.is_deny()


class OperationNode():
    """A rule item in the binary sandbox profile

    It may either be a teminal node (end node) or a non-terminal node
    (intermediary node). Each node type uses another class, as defined
    above.
    """

    OPERATION_NODE_TYPE_NON_TERMINAL = 0x00
    OPERATION_NODE_TYPE_TERMINAL = 0x01
    offset = None
    raw = []
    type = None
    terminal = None
    non_terminal = None

    def __init__(self, offset):
        self.offset = offset

    def is_terminal(self):
        return self.type == self.OPERATION_NODE_TYPE_TERMINAL

    def is_non_terminal(self):
        return self.type == self.OPERATION_NODE_TYPE_NON_TERMINAL

    def parse_terminal(self, ios_major_version):
        self.terminal = TerminalNode()
        self.terminal.parent = self
        self.terminal.type = \
            self.raw[2 if ios_major_version <12 else 1] & 0x01
        self.terminal.flags = \
            self.raw[2 if ios_major_version <12 else 1] & 0xfe

    def parse_non_terminal(self):
        self.non_terminal = NonTerminalNode()
        self.non_terminal.parent = self
        self.non_terminal.filter_id = self.raw[1]
        self.non_terminal.argument_id = self.raw[2] + (self.raw[3] << 8)
        self.non_terminal.match_offset = self.raw[4] + (self.raw[5] << 8)
        self.non_terminal.unmatch_offset = self.raw[6] + (self.raw[7] << 8)

    def parse_raw(self, ios_major_version):
        self.type = self.raw[0]
        if self.is_terminal():
            self.parse_terminal(ios_major_version)
        elif self.is_non_terminal():
            self.parse_non_terminal()

    def convert_filter(self, convert_fn, f, regex_list, ios_major_version,
            keep_builtin_filters, global_vars, base_addr):
        if self.is_non_terminal():
            self.non_terminal.convert_filter(convert_fn, f, regex_list,
                ios_major_version, keep_builtin_filters, global_vars, base_addr)

    def str_debug(self):
        ret = "(%02x) " % (int)(self.offset)
        if self.is_terminal():
            ret += "terminal: "
            ret += str(self.terminal)
        if self.is_non_terminal():
            ret += "non-terminal: "
            ret += str(self.non_terminal)
        return ret

    def __str__(self):
        ret = ""
        if self.is_terminal():
            ret += str(self.terminal)
        if self.is_non_terminal():
            ret += str(self.non_terminal)
        return ret

    def str_not(self):
        ret = ""
        if self.is_terminal():
            ret += str(self.terminal)
        if self.is_non_terminal():
            ret += self.non_terminal.str_not()
        return ret

    def values(self):
        if self.is_terminal():
            return (None, None)
        else:
            return self.non_terminal.values()

    def __eq__(self, other):
        return self.raw == other.raw

    def __hash__(self):
        return struct.unpack('<I', ''.join([chr(v) for v in self.raw[:4]]))[0]


# Operation nodes processed so far.
processed_nodes = []

# Number of regular expressions.
num_regex = 0

# Operation nodes offset.
operations_offset = 0


def has_been_processed(node):
    global processed_nodes
    return node in processed_nodes


def build_operation_node(raw, offset, ios_major_version):
    global operations_offset
    node = OperationNode((offset - operations_offset) / 8) # why offset / 8 ?
    node.raw = raw
    node.parse_raw(ios_major_version)
    return node


def build_operation_nodes(f, num_operation_nodes, ios_major_version):
    global operations_offset
    operation_nodes = []

    if ios_major_version <= 12:
        operations_offset = 0
    else:
        operations_offset = f.tell()
    for i in range(num_operation_nodes):
        offset = f.tell()
        raw = struct.unpack("<8B", f.read(8))
        operation_nodes.append(build_operation_node(raw, offset,
            ios_major_version))

    # Fill match and unmatch fields for each node in operation_nodes.
    for i in range(len(operation_nodes)):
        if operation_nodes[i].is_non_terminal():
            for j in range(len(operation_nodes)):
                if operation_nodes[i].non_terminal.match_offset == operation_nodes[j].offset:
                    operation_nodes[i].non_terminal.match = operation_nodes[j]
                if operation_nodes[i].non_terminal.unmatch_offset == operation_nodes[j].offset:
                    operation_nodes[i].non_terminal.unmatch = operation_nodes[j]

    return operation_nodes


def find_operation_node_by_offset(operation_nodes, offset):
    for node in operation_nodes:
        if node.offset == offset:
            return node
    return None


def ong_mark_not(g, node, parent_node, nodes_to_process):
    g[node]["not"] = True
    tmp = node.non_terminal.match
    node.non_terminal.match = node.non_terminal.unmatch
    node.non_terminal.unmatch = tmp
    tmp_offset = node.non_terminal.match_offset
    node.non_terminal.match_offset = node.non_terminal.unmatch_offset
    node.non_terminal.unmatch_offset = tmp_offset


def ong_end_path(g, node, parent_node, nodes_to_process):
    g[node]["decision"] = str(node.non_terminal.match.terminal)
    g[node]["type"].add("final")


def ong_add_to_path(g, node, parent_node, nodes_to_process):
    if not has_been_processed(node.non_terminal.match):
        g[node]["list"].add(node.non_terminal.match)
        nodes_to_process.add((node, node.non_terminal.match))


def ong_add_to_parent_path(g, node, parent_node, nodes_to_process):
    if not has_been_processed(node.non_terminal.unmatch):
        if parent_node:
            g[parent_node]["list"].add(node.non_terminal.unmatch)
        nodes_to_process.add((parent_node, node.non_terminal.unmatch))


def build_operation_node_graph(node, default_node):
    if node.is_terminal():
        return None

    if default_node.is_non_terminal():
        return None

    # If node is non-terminal and has already been processed, then it's a jump rule to a previous operation.
    if has_been_processed(node):
        return None

    # Create operation node graph.
    g = {}
    nodes_to_process = set()
    nodes_to_process.add((None, node))
    while nodes_to_process:
        (parent_node, current_node) = nodes_to_process.pop()
        if not current_node in g.keys():
            g[current_node] = {"list": set(), "decision": None,
                "type": set(["normal"]), "reduce": None, "not": False}
        if not parent_node:
            g[current_node]["type"].add("start")

        if default_node.terminal.is_deny():
            # In case of non-terminal match and deny as unmatch, add match to path.
            if current_node.non_terminal.is_non_terminal_deny():
                ong_add_to_path(g, current_node, parent_node, nodes_to_process)
            # In case of non-terminal match and allow as unmatch, do a not (reverse), end match path and add unmatch to parent path.
            elif current_node.non_terminal.is_non_terminal_allow():
                ong_mark_not(g, current_node, parent_node, nodes_to_process)
                ong_end_path(g, current_node, parent_node, nodes_to_process)
                ong_add_to_parent_path(g, current_node, parent_node, nodes_to_process)
            # In case of non-terminals, add match to path and unmatch to parent path.
            elif current_node.non_terminal.is_non_terminal_non_terminal():
                ong_add_to_path(g, current_node, parent_node, nodes_to_process)
                ong_add_to_parent_path(g, current_node, parent_node, nodes_to_process)
            # In case of allow as match and non-terminal unmatch, end path and add unmatch to parent path.
            elif current_node.non_terminal.is_allow_non_terminal():
                ong_end_path(g, current_node, parent_node, nodes_to_process)
                ong_add_to_parent_path(g, current_node, parent_node, nodes_to_process)
            # In case of deny as match and non-terminal unmatch, do a not (reverse), and add match to path.
            elif current_node.non_terminal.is_deny_non_terminal():
                ong_mark_not(g, current_node, parent_node, nodes_to_process)
                ong_add_to_path(g, current_node, parent_node, nodes_to_process)
            # In case of deny as match and allow as unmatch, do a not (reverse), and end match path (completely).
            elif current_node.non_terminal.is_deny_allow():
                ong_mark_not(g, current_node, parent_node, nodes_to_process)
                ong_end_path(g, current_node, parent_node, nodes_to_process)
            # In case of allow as match and deny as unmatch, end match path (completely).
            elif current_node.non_terminal.is_allow_deny():
                ong_end_path(g, current_node, parent_node, nodes_to_process)
        elif default_node.terminal.is_allow():
            # In case of non-terminal match and deny as unmatch, do a not (reverse), end match path and add unmatch to parent path.
            if current_node.non_terminal.is_non_terminal_deny():
                ong_mark_not(g, current_node, parent_node, nodes_to_process)
                ong_end_path(g, current_node, parent_node, nodes_to_process)
                ong_add_to_parent_path(g, current_node, parent_node, nodes_to_process)
            # In case of non-terminal match and allow as unmatch, add match to path.
            elif current_node.non_terminal.is_non_terminal_allow():
                ong_add_to_path(g, current_node, parent_node, nodes_to_process)
            # In case of non-terminals, add match to path and unmatch to parent path.
            elif current_node.non_terminal.is_non_terminal_non_terminal():
                ong_add_to_path(g, current_node, parent_node, nodes_to_process)
                ong_add_to_parent_path(g, current_node, parent_node, nodes_to_process)
            # In case of allow as match and non-terminal unmatch, do a not (reverse), and add match to path.
            elif current_node.non_terminal.is_allow_non_terminal():
                ong_mark_not(g, current_node, parent_node, nodes_to_process)
                ong_add_to_path(g, current_node, parent_node, nodes_to_process)
            # In case of deny as match and non-terminal unmatch, end path and add unmatch to parent path.
            elif current_node.non_terminal.is_deny_non_terminal():
                ong_end_path(g, current_node, parent_node, nodes_to_process)
                ong_add_to_parent_path(g, current_node, parent_node, nodes_to_process)
            # In case of deny as match and allow as unmatch, end match path (completely).
            elif current_node.non_terminal.is_deny_allow():
                ong_end_path(g, current_node, parent_node, nodes_to_process)
            # In case of allow as match and deny as unmatch, do a not (reverse), and end match path (completely).
            elif current_node.non_terminal.is_allow_deny():
                ong_mark_not(g, current_node, parent_node, nodes_to_process)
                ong_end_path(g, current_node, parent_node, nodes_to_process)

    processed_nodes.append(node)
    print_operation_node_graph(g)
    g = clean_edges_in_operation_node_graph(g)
    while True:
        (g, more) = clean_nodes_in_operation_node_graph(g)
        if more == False:
            break
    logger.debug("*** after cleaning nodes:")
    print_operation_node_graph(g)

    return g


def print_operation_node_graph(g):
    if not g:
        return
    message = ""
    for node_iter in g.keys():
        message += "0x%x (%s) (%s) (decision: %s): [ " % ((int)(node_iter.offset), str(node_iter), g[node_iter]["type"], g[node_iter]["decision"])
        for edge in g[node_iter]["list"]:
            message += "0x%x (%s) " % ((int)(edge.offset), str(edge))
        message += "]\n"
    logger.debug(message)


def remove_edge_in_operation_node_graph(g, node_start, node_end):
    if node_end in g[node_start]["list"]:
        g[node_start]["list"].remove(node_end)
    return g


def remove_node_in_operation_node_graph(g, node_to_remove):
    for n in g[node_to_remove]["list"]:
        g = remove_edge_in_operation_node_graph(g, node_to_remove, n)
    node_list = list(g.keys())
    for n in node_list:
        if node_to_remove in g[n]["list"]:
            g = remove_edge_in_operation_node_graph(g, n, node_to_remove)
    del g[node_to_remove]
    return g


paths = []
current_path = []


def _get_operation_node_graph_paths(g, node):
    global paths, current_path
    logger.debug("getting path for " + node.str_debug())
    current_path.append(node)
    debug_message = "current_path: [ "
    for n in current_path:
        debug_message += n.str_debug() + ", "
    debug_message += "]"
    logger.debug(debug_message)
    if "final" in g[node]["type"]:
        copy_path = list(current_path)
        paths.append(copy_path)
    else:
        for next_node in g[node]["list"]:
            _get_operation_node_graph_paths(g, next_node)
    current_path.pop()


def get_operation_node_graph_paths(g, start_node):
    global paths, current_path
    paths = []
    current_path = []
    _get_operation_node_graph_paths(g, start_node)
    return paths


nodes_traversed_for_removal = []
def _remove_duplicate_node_edges(g, node, start_list):
    global nodes_traversed_for_removal
    nodes_traversed_for_removal.append(node)

    nexts = list(g[node]["list"])
    for n in nexts:
        if n in start_list:
            g = remove_edge_in_operation_node_graph(g, node, n)
        else:
            if not n in nodes_traversed_for_removal:
                _remove_duplicate_node_edges(g, n, start_list)


def remove_duplicate_node_edges(g, start_list):
    for n in start_list:
        logger.debug("removing from node: " + n.str_debug())
        _remove_duplicate_node_edges(g, n, start_list)


def clean_edges_in_operation_node_graph(g):
    """From the initial graph remove edges that are redundant.
    """
    global nodes_traversed_for_removal
    start_nodes = []
    final_nodes = []
    for node_iter in g.keys():
        if "start" in g[node_iter]["type"]:
            start_nodes.append(node_iter)
        if "final" in g[node_iter]["type"]:
            final_nodes.append(node_iter)

    # Remove edges to start nodes.
    for snode in start_nodes:
        for node_iter in g.keys():
            g = remove_edge_in_operation_node_graph(g, node_iter, snode)

    for snode in start_nodes:
        nodes_bag = [ snode ]
        while True:
            node = nodes_bag.pop()
            nodes_traversed_for_removal = []
            logger.debug("%%% going through " + node.str_debug())
            remove_duplicate_node_edges(g, g[node]["list"])
            nodes_bag.extend(g[node]["list"])
            if not nodes_bag:
                break

    # Traverse graph and built all paths. If end node and start node of
    # two or more paths are similar, remove edges.
    for snode in start_nodes:
        logger.debug("traversing node " + str(snode))
        paths = get_operation_node_graph_paths(g, snode)
        debug_message = "for start node " + str(snode) + str(" paths are")
        for p in paths:
            debug_message += "[ "
            for n in p:
                debug_message += n.str_debug() + " "
            debug_message += "]\n"
        logger.debug(debug_message)

        for i in range(0, len(paths)):
            for j in range(i+1, len(paths)):
                # Step over equal length paths.
                if len(paths[i]) == len(paths[j]):
                    continue
                elif len(paths[i]) < len(paths[j]):
                    p = paths[i]
                    q = paths[j]
                else:
                    p = paths[j]
                    q = paths[i]
                # If similar final nodes, remove edge.
                debug_message = ""
                debug_message += "short path: ["
                for n in p:
                    debug_message += str(n)
                debug_message += "]\n"
                debug_message += "long path: ["
                for n in q:
                    debug_message += str(n)
                debug_message += "]"
                if p[len(p)-1] == q[len(q)-1]:
                    for k in range(0, len(p)):
                        if p[len(p)-1-k] == q[len(q)-1-k]:
                            continue
                        else:
                            g = remove_edge_in_operation_node_graph(g, q[len(q)-1-k], q[len(q)-k])
                            break


    return g


def clean_nodes_in_operation_node_graph(g):
    made_change = False
    node_list = list(g.keys())
    for node_iter in node_list:
        if "final" in g[node_iter]["type"]:
            continue
        if g[node_iter]["list"]:
            continue
        logger.warn("going to remove" + str(node_iter))
        made_change = True
        g = remove_node_in_operation_node_graph(g, node_iter)
    return (g, made_change)


replace_occurred = False

class ReducedVertice():
    TYPE_SINGLE = "single"
    TYPE_START = "start"
    TYPE_REQUIRE_ANY = "require-any"
    TYPE_REQUIRE_ALL = "require-all"
    TYPE_REQUIRE_ENTITLEMENT = "require-entitlement"
    type = TYPE_SINGLE
    is_not = False
    value = None
    decision = None

    def __init__(self, type=TYPE_SINGLE, value=None, decision=None, is_not=False):
        self.type = type
        self.value = value
        self.decision = decision
        self.is_not = is_not

    def set_value(self, value):
        self.value = value

    def set_type(self, type):
        self.type = type

    def _replace_in_list(self, lst, old, new):
        global replace_occurred
        tmp_list = list(lst)
        for i, v in enumerate(tmp_list):
            if isinstance(v.value, list):
                self._replace_in_list(v.value, old, new)
            else:
                if v == old:
                    lst[i] = new
                    replace_occurred = True
                    return

    def replace_in_list(self, old, new):
        if isinstance(self.value, list):
            self._replace_in_list(self.value, old, new)

    def _replace_sublist_in_list(self, lst, old, new):
        global replace_occurred
        all_found = True
        for v in old:
            if v not in lst:
                all_found = False
                break
        if all_found:
            for v in old:
                lst.remove(v)
            lst.append(new)
            replace_occurred = True
            return

        for i, v in enumerate(lst):
            if isinstance(v.value, list):
                self._replace_sublist_in_list(v.value, old, new)
            else:
                return

    def replace_sublist_in_list(self, old, new):
        if isinstance(self.value, list):
            self._replace_sublist_in_list(self.value, old, new)

    def set_decision(self, decision):
        self.decision = decision

    def set_type_single(self):
        self.type = self.TYPE_SINGLE

    def set_type_start(self):
        self.type = self.TYPE_START

    def set_type_require_entitlement(self):
        self.type = self.TYPE_REQUIRE_ENTITLEMENT

    def set_type_require_any(self):
        self.type = self.TYPE_REQUIRE_ANY

    def set_type_require_all(self):
        self.type = self.TYPE_REQUIRE_ALL

    def set_integrated_vertice(self, integrated_vertice):
        (n, i) = self.value
        self.value = (n, integrated_vertice)

    def is_type_single(self):
        return self.type == self.TYPE_SINGLE

    def is_type_start(self):
        return self.type == self.TYPE_START

    def is_type_require_entitlement(self):
        return self.type == self.TYPE_REQUIRE_ENTITLEMENT

    def is_type_require_all(self):
        return self.type == self.TYPE_REQUIRE_ALL

    def is_type_require_any(self):
        return self.type == self.TYPE_REQUIRE_ANY

    def recursive_str(self, level, recursive_is_not):
        result_str = ""
        if self.is_type_single():
            if self.is_not and not recursive_is_not:
                value = str(self.value)
                if "(require-any" in value:
                    result_str = self.value.str_not()
                else:
                    result_str += "(require-not " + str(self.value) + ")"
            else:
                result_str += str(self.value)
        elif self.is_type_require_entitlement():
            ent_str = ""
            (n, i) = self.value
            if i == None:
                ent_str += str(n.value)
            else:
                ent_str += str(n.value)[:-1] + " "
                ent_str += i.recursive_str(level, self.is_not)
                ent_str += ")"
            if self.is_not:
                result_str += "(require-not " + ent_str + ")"
            else:
                result_str += ent_str
        else:
            if level == 1:
                result_str += "\n" + 13*' '
            result_str += "(" + self.type
            level += 1
            for i, v in enumerate(self.value):
                if i == 0:
                    result_str += " " + v.recursive_str(level, recursive_is_not)
                else:
                    result_str += "\n" + 13*level*' ' + v.recursive_str(level, recursive_is_not)
            result_str += ")"
        return result_str

    def recursive_str_debug(self, level, recursive_is_not):
        result_str = ""
        if self.is_type_single():
            if self.is_not and not recursive_is_not:
                result_str += "(require-not " + self.value.str_debug() + ")"
            else:
                result_str += self.value.str_debug()
        elif self.is_type_require_entitlement():
            ent_str = ""
            (n, i) = self.value
            if i == None:
                ent_str += n.value.str_debug()
            else:
                ent_str += n.value.str_debug()[:-1] + " "
                ent_str += i.recursive_str_debug(level, self.is_not)
                ent_str += ")"
            if self.is_not:
                result_str += "(require-not " + ent_str + ")"
            else:
                result_str += ent_str
        else:
            if level == 1:
                result_str += "\n" + 13*' '
            result_str += "(" + self.type
            level += 1
            for i, v in enumerate(self.value):
                if i == 0:
                    result_str += " " + v.recursive_str_debug(level, recursive_is_not)
                else:
                    result_str += "\n" + 13*level*' ' + v.recursive_str_debug(level, recursive_is_not)
            result_str += ")"
        return result_str

    def recursive_xml_str(self, level, recursive_is_not):
        result_str = ""
        if self.is_type_single():
            if self.is_not and not recursive_is_not:
                result_str += level*"\t" + "<require type=\"require-not\">\n"
                (name, argument) = self.value.values()
                if argument == None:
                    result_str += (level+1)*"\t" + "<filter name=\"" + str(name) + "\" />\n"
                else:
                    arg = str(argument).replace('&', '&amp;').replace('"', '&quot;').replace('\'', '&apos;').replace('<', '&lt;').replace('>', '&gt;')
                    result_str += (level+1)*"\t" + "<filter name=\"" + str(name) + "\" argument=\"" + arg + "\" />\n"
                result_str += level*"\t" + "</require>\n"
            else:
                (name, argument) = self.value.values()
                if argument == None:
                    result_str += level*"\t" + "<filter name=\"" + str(name) + "\" />\n"
                else:
                    arg = str(argument).replace('&', '&amp;').replace('"', '&quot;').replace('\'', '&apos;').replace('<', '&lt;').replace('>', '&gt;')
                    result_str += level*"\t" + "<filter name=\"" + str(name) + "\" argument=\"" + arg + "\" />\n"
        elif self.is_type_require_entitlement():
            if self.is_not:
                result_str += level*"\t" + "<require type=\"require-not\">\n"
                level += 1
            result_str += level*"\t" + "<require type=\"require-entitlement\""
            (n, i) = self.value
            if i == None:
                _tmp = str(n.value)[21:-1].replace('&', '&amp;').replace('"', '&quot;').replace('\'', '&apos;').replace('<', '&lt;').replace('>', '&gt;')
                result_str += " value=\"" + _tmp + "\" />\n"
            else:
                _tmp = str(n.value)[21:-1].replace('&', '&amp;').replace('"', '&quot;').replace('\'', '&apos;').replace('<', '&lt;').replace('>', '&gt;')
                result_str += " value=\"" + _tmp + "\">\n"
                result_str += i.recursive_xml_str(level+1, self.is_not)
                result_str += level*"\t" + "</require>\n"
            if self.is_not:
                level -= 1
                result_str += level*"\t" + "</require>\n"
        else:
            result_str += level*"\t" + "<require type=\"" + self.type + "\">\n"
            for i, v in enumerate(self.value):
                result_str += v.recursive_xml_str(level+1, recursive_is_not)
            result_str += level*"\t" + "</require>\n"
        return result_str

    def __str__(self):
        return self.recursive_str(1, False)

    def str_debug(self):
        return self.recursive_str_debug(1, False)

    def str_simple(self):
        if self.is_type_single():
            return self.value.str_debug()
        elif self.is_type_require_any():
            return "require-any"
        elif self.is_type_require_all():
            return "require-all"
        elif self.is_type_require_entitlement():
            return self.value.str_debug()[1:-1]
        elif self.is_type_start():
            return "start"
        else:
            return "unknown-type"

    def str_print_debug(self):
        if self.is_type_single():
            return (self.value.str_debug(), None)
        elif self.is_type_require_any():
            return ("(require-any", ")")
        elif self.is_type_require_all():
            return ("(require-all", ")")
        elif self.is_type_require_entitlement():
            return (self.value.str_debug()[:-1], ")")
        elif self.is_type_start():
            return (None, None)
        else:
            return ("unknown-type", None)

    def str_print(self):
        if self.is_type_single():
            return (str(self.value), None)
        elif self.is_type_require_any():
            return ("(require-any", ")")
        elif self.is_type_require_all():
            return ("(require-all", ")")
        elif self.is_type_require_entitlement():
            return (str(self.value)[:-1], ")")
        elif self.is_type_start():
            return (None, None)
        else:
            return ("unknown-type", None)

    def str_print_not(self):
        result_str = ""
        if self.is_type_single():
            if self.is_not:
                value = str(self.value)
                if "(require-any" in value:
                    result_str = self.value.str_not()
                else:
                    result_str += "(require-not " + str(self.value) + ")"
        return result_str

    def xml_str(self):
        return self.recursive_xml_str(3, False)


class ReducedEdge():
    start = None
    end = None

    def __init__(self, start=None, end=None):
        self.start = start
        self.end = end

    def str_debug(self):
        return self.start.str_debug() + " -> " + self.end.str_debug()

    def str_simple(self):
        #print "start: %s" % (self.start.str_simple())
        #print "end: %s" % (self.end.str_simple())
        return "%s -----> %s" % (self.start.str_simple(), self.end.str_simple())

    def __str__(self):
        return str(self.start) + " -> " + str(self.end)


class ReducedGraph():
    vertices = []
    edges = []
    final_vertices = []
    reduce_changes_occurred = False

    def __init__(self):
        self.vertices = []
        self.edges = []
        self.final_vertices = []
        self.reduce_changes_occurred = False

    def add_vertice(self, v):
        self.vertices.append(v)

    def add_edge(self, e):
        self.edges.append(e)

    def add_edge_by_vertices(self, v_start, v_end):
        e = ReducedEdge(v_start, v_end)
        self.edges.append(e)

    def set_final_vertices(self):
        self.final_vertices = []
        for v in self.vertices:
            is_final = True
            for e in self.edges:
                if v == e.start:
                    is_final = False
                    break
            if is_final:
                self.final_vertices.append(v)

    def contains_vertice(self, v):
        return v in self.vertices

    def contains_edge(self, e):
        return e in self.edges

    def contains_edge_by_vertices(self, v_start, v_end):
        for e in self.edges:
            if e.start == v_start and e.end == v_end:
                return True
        return False

    def get_vertice_by_value(self, value):
        for v in self.vertices:
            if v.is_type_single():
                if v.value == value:
                    return v

    def get_edge_by_vertices(self, v_start, v_end):
        for e in self.edges:
            if e.start == v_start and e.end == v_end:
                return e
        return None

    def remove_vertice(self, v):
        edges_copy = list(self.edges)
        for e in edges_copy:
            if e.start == v or e.end == v:
                self.edges.remove(e)
        if v in self.vertices:
            self.vertices.remove(v)

    def remove_vertice_update_decision(self, v):
        edges_copy = list(self.edges)
        for e in edges_copy:
            if e.start == v:
                self.edges.remove(e)
            if e.end == v:
                e.start.decision = v.decision
                self.edges.remove(e)
        if v in self.vertices:
            self.vertices.remove(v)

    def remove_edge(self, e):
        if e in self.edges:
            self.edges.remove(e)

    def remove_edge_by_vertices(self, v_start, v_end):
        e = self.get_edge_by_vertices(v_start, v_end)
        if e:
            self.edges.remove(e)

    def replace_vertice_in_edge_start(self, old, new):
        global replace_occurred
        for e in self.edges:
            if e.start == old:
                e.start = new
                replace_occurred = True
            else:
                if isinstance(e.start.value, list):
                    e.start.replace_in_list(old, new)
                    if replace_occurred:
                        e.start.decision = new.decision

    def replace_vertice_in_edge_end(self, old, new):
        global replace_occurred
        for e in self.edges:
            if e.end == old:
                e.end = new
                replace_occurred = True
            else:
                if isinstance(e.end.value, list):
                    e.end.replace_in_list(old, new)
                    if replace_occurred:
                        e.end.decision = new.decision

    def replace_vertice_in_single_vertices(self, old, new):
        for v in self.vertices:
            if len(self.get_next_vertices(v)) == 0 and len(self.get_prev_vertices(v)) == 0:
                if isinstance(v.value, list):
                    v.replace_in_list(old, new)

    def replace_vertice_list(self, old, new):
        for v in self.vertices:
            if isinstance(v.value, list):
                v.replace_sublist_in_list(old, new)
            if set(self.get_next_vertices(v)) == set(old):
                for n in old:
                    self.remove_edge_by_vertices(v, n)
                self.add_edge_by_vertices(v, new)
            if set(self.get_prev_vertices(v)) == set(old):
                for n in old:
                    self.remove_edge_by_vertices(n, v)
                self.add_edge_by_vertices(new, v)

    def get_next_vertices(self, v):
        next_vertices = []
        for e in self.edges:
            if e.start == v:
                next_vertices.append(e.end)
        return next_vertices

    def get_prev_vertices(self, v):
        prev_vertices = []
        for e in self.edges:
            if e.end == v:
                prev_vertices.append(e.start)
        return prev_vertices

    def get_start_vertices(self):
        start_vertices = []
        for v in self.vertices:
            if not self.get_prev_vertices(v):
                start_vertices.append(v)
        return start_vertices

    def get_end_vertices(self):
        end_vertices = []
        for v in self.vertices:
            if not self.get_next_vertices(v):
                end_vertices.append(v)
        return end_vertices

    def reduce_next_vertices(self, v):
        next_vertices = self.get_next_vertices(v)
        if len(next_vertices) <= 1:
            return
        self.reduce_changes_occurred = True
        new_vertice = ReducedVertice("require-any", next_vertices, next_vertices[0].decision)
        add_to_final = False
        for n in next_vertices:
            self.remove_edge_by_vertices(v, n)
        self.replace_vertice_list(next_vertices, new_vertice)
        for n in next_vertices:
            if n in self.final_vertices:
                self.final_vertices.remove(n)
                add_to_final = True
            # If no more next vertices, remove vertice.
            if not self.get_next_vertices(n):
                if n in self.vertices:
                    self.vertices.remove(n)
        self.add_edge_by_vertices(v, new_vertice)
        self.add_vertice(new_vertice)
        if add_to_final:
            self.final_vertices.append(new_vertice)

    def reduce_prev_vertices(self, v):
        prev_vertices = self.get_prev_vertices(v)
        if len(prev_vertices) <= 1:
            return
        self.reduce_changes_occurred = True
        new_vertice = ReducedVertice("require-any", prev_vertices, v.decision)
        for p in prev_vertices:
            self.remove_edge_by_vertices(p, v)
        self.replace_vertice_list(prev_vertices, new_vertice)
        for p in prev_vertices:
            # If no more prev vertices, remove vertice.
            if not self.get_prev_vertices(p):
                if p in self.vertices:
                    self.vertices.remove(p)
        self.add_vertice(new_vertice)
        self.add_edge_by_vertices(new_vertice, v)

    def reduce_vertice_single_prev(self, v):
        global replace_occurred
        prev = self.get_prev_vertices(v)
        if len(prev) != 1:
            logger.debug("not a single prev for node")
            return
        p = prev[0]
        nexts = self.get_next_vertices(p)
        if len(nexts) > 1 or nexts[0] != v:
            logger.debug("multiple nexts for prev")
            return
        require_all_vertices = []
        if p.is_type_require_all():
            require_all_vertices.extend(p.value)
        else:
            require_all_vertices.append(p)
        if v.is_type_require_all():
            require_all_vertices.extend(v.value)
        else:
            require_all_vertices.append(v)
        new_vertice = ReducedVertice("require-all", require_all_vertices, v.decision)
        self.remove_edge_by_vertices(p, v)
        replace_occurred = False
        self.replace_vertice_in_edge_start(v, new_vertice)
        self.replace_vertice_in_edge_end(p, new_vertice)
        self.replace_vertice_in_single_vertices(p, new_vertice)
        self.replace_vertice_in_single_vertices(v, new_vertice)
        self.remove_vertice(p)
        self.remove_vertice(v)
        if not replace_occurred:
            self.add_vertice(new_vertice)
        if v in self.final_vertices:
            self.final_vertices.remove(v)
            self.final_vertices.append(new_vertice)

    def reduce_vertice_single_next(self, v):
        global replace_occurred
        next = self.get_next_vertices(v)
        if len(next) != 1:
            return
        n = next[0]
        prevs = self.get_prev_vertices(n)
        if len(prevs) > 1 or prevs[0] != v:
            return
        require_all_vertices = []
        if v.is_type_require_all():
            require_all_vertices.extend(v.value)
        else:
            require_all_vertices.append(v)
        if n.is_type_require_all():
            require_all_vertices.extend(n.value)
        else:
            require_all_vertices.append(n)
        new_vertice = ReducedVertice("require-all", require_all_vertices, n.decision)
        self.remove_edge_by_vertices(v, n)
        replace_occurred = False
        self.replace_vertice_in_edge_start(n, new_vertice)
        self.replace_vertice_in_edge_end(e, new_vertice)
        self.replace_vertice_in_single_vertices(v, new_vertice)
        self.replace_vertice_in_single_vertices(n, new_vertice)
        self.remove_vertice(v)
        self.remove_vertice(n)
        if not replace_occurred:
            self.add_vertice(new_vertice)
        if n in self.final_vertices:
            self.final_vertices.remove(n)
            self.final_vertices.append(new_vertice)

    def reduce_graph(self):
        self.set_final_vertices()

        logger.debug("before everything:\n" + self.str_simple())
        # Do until no more changes.
        while True:
            self.reduce_changes_occurred = False
            copy_vertices = list(self.vertices)
            for v in copy_vertices:
                self.reduce_next_vertices(v)
            if self.reduce_changes_occurred == False:
                break
        logger.debug("after next:\n" + self.str_simple())
        # Do until no more changes.
        while True:
            self.reduce_changes_occurred = False
            copy_vertices = list(self.vertices)
            for v in copy_vertices:
                self.reduce_prev_vertices(v)
            if self.reduce_changes_occurred == False:
                break
        logger.debug("after next/prev:\n" + self.str_simple())

        # Reduce graph starting from final vertices. Keep going until
        # final vertices don't change during an iteration.
        while True:
            copy_final_vertices = list(self.final_vertices)
            for v in copy_final_vertices:
                logger.debug("reducing single prev vertex: " + v.str_debug())
                self.reduce_vertice_single_prev(v)
                logger.debug("### new graph is:")
                logger.debug(self.str_simple())
            if set(copy_final_vertices) == set(self.final_vertices):
                break
        for e in self.edges:
            v = e.end
            logger.debug("reducing single prev vertex: " + v.str_debug())
            self.reduce_vertice_single_prev(v)
        logger.debug("after everything:\n" + self.str_simple())

    def reduce_graph_with_metanodes(self):
        # Add require-any metanode if current node has multiple successors.
        copy_vertices = list(self.vertices)
        for v in copy_vertices:
            nlist = self.get_next_vertices(v)
            if len(nlist) >= 2:
                new_node = ReducedVertice("require-any", None, None)
                self.add_vertice(new_node)
                self.add_edge_by_vertices(v, new_node)
                for n in nlist:
                    self.remove_edge_by_vertices(v, n)
                    self.add_edge_by_vertices(new_node, n)

        start_list = self.get_start_vertices()
        new_node = ReducedVertice("start", None, None)
        self.add_vertice(new_node)
        for s in start_list:
            self.add_edge_by_vertices(new_node, s)

        # Add require-all metanode if current node has a require-any as a predecessor and is followed by another node.
        copy_vertices = list(self.vertices)
        for v in copy_vertices:
            prev_vertices = list(self.get_prev_vertices(v))
            next_vertices = list(self.get_next_vertices(v))
            for p in prev_vertices:
                if (p.is_type_require_any() or p.is_type_start()) and next_vertices:
                    # Except for when a require-entitlement ending block.
                    if v.is_type_require_entitlement():
                        has_next_nexts = False
                        for n in next_vertices:
                            if n.is_type_require_any():
                                for n2 in self.get_next_vertices(n):
                                    if self.get_next_vertices(n2):
                                        has_next_nexts = True
                                        break
                            else:
                                if self.get_next_vertices(n):
                                    has_next_nexts = True
                                    break
                        if not has_next_nexts:
                            continue
                    new_node = ReducedVertice("require-all", None, None)
                    self.add_vertice(new_node)
                    self.remove_edge_by_vertices(p, v)
                    self.add_edge_by_vertices(p, new_node)
                    self.add_edge_by_vertices(new_node, v)

    def str_simple_with_metanodes(self):
        logger.debug("==== vertices:\n")
        for v in self.vertices:
            logger.debug(v.str_simple())
        logger.debug("==== edges:\n")
        for e in self.edges:
            logger.debug(e.str_simple())

    def str_simple(self):
        message = "==== vertices:\n"
        for v in self.vertices:
            message += "decision: " + str(v.decision) + "\t" + v.str_debug() + "\n"
        message += "==== final vertices:\n"
        for v in self.final_vertices:
            message += "decision: " + str(v.decision) + "\t" + v.str_debug() + "\n"
        message += "==== edges:\n"
        for e in self.edges:
            message += "\t" + e.str_debug() + "\n"
        return message

    def __str__(self):
        result_str = ""
        for v in self.vertices:
            result_str += "(" + str(v.decision) + " "
            if len(self.get_next_vertices(v)) == 0 and len(self.get_next_vertices(v)) == 0:
                if v in self.final_vertices:
                    result_str += str(v) + "\n"
            result_str += ")\n"
        for e in self.edges:
            result_str += str(e) + "\n"
        result_str += "\n"
        return result_str

    def remove_builtin_filters(self):
        copy_vertices = list(self.vertices)
        for v in copy_vertices:
            if re.search("###\$\$\$\*\*\*", str(v)):
                self.remove_vertice_update_decision(v)

    def reduce_integrated_vertices(self, integrated_vertices):
        if len(integrated_vertices) == 0:
            return (None, None)
        if len(integrated_vertices) > 1:
            return (ReducedVertice("require-any", integrated_vertices, integrated_vertices[0].decision), integrated_vertices[0].decision)
        require_all_vertices = []
        v = integrated_vertices[0]
        decision = None
        while True:
            if not re.search("entitlement-value #t", str(v)):
                require_all_vertices.append(v)
            next_vertices = self.get_next_vertices(v)
            if decision == None and v.decision != None:
                decision = v.decision
            self.remove_vertice(v)
            if v in self.final_vertices:
                self.final_vertices.remove(v)
            if next_vertices:
                v = next_vertices[0]
            else:
                break
        if len(require_all_vertices) == 0:
            return (None, v.decision)
        if len(require_all_vertices) == 1:
            return (ReducedVertice(value=require_all_vertices[0].value, decision=require_all_vertices[0].decision, is_not=require_all_vertices[0].is_not), v.decision)
        return (ReducedVertice("require-all", require_all_vertices, require_all_vertices[len(require_all_vertices)-1].decision), v.decision)

    def aggregate_require_entitlement(self, v):
        next_vertices = []
        prev_vertices = self.get_prev_vertices(v)
        integrated_vertices = []
        for n in self.get_next_vertices(v):
            if not re.search("entitlement-value", str(n)):
                next_vertices.append(n)
                break
            integrated_vertices.append(n)
            current_list = [ n ]
            while current_list:
                current = current_list.pop()
                for n2 in self.get_next_vertices(current):
                    if not re.search("entitlement-value", str(n2)):
                        self.remove_edge_by_vertices(current, n2)
                        next_vertices.append(n2)
                    else:
                        current_list.append(n2)
        new_vertice = ReducedVertice(type="require-entitlement", value=(v, None), decision=None, is_not=v.is_not)
        for p in prev_vertices:
            self.remove_edge_by_vertices(p, v)
            self.add_edge_by_vertices(p, new_vertice)
        for n in next_vertices:
            self.remove_edge_by_vertices(v, n)
            self.add_edge_by_vertices(new_vertice, n)
        for i in integrated_vertices:
            self.remove_edge_by_vertices(v, i)
        self.remove_vertice(v)
        self.add_vertice(new_vertice)
        if v in self.final_vertices:
            self.final_vertices.remove(v)
            self.final_vertices.append(new_vertice)
        (new_integrate, decision) = self.reduce_integrated_vertices(integrated_vertices)
        for i in integrated_vertices:
            self.remove_vertice(i)
            if i in self.final_vertices:
                self.final_vertices.remove(i)
        new_vertice.set_integrated_vertice(new_integrate)
        new_vertice.set_decision(decision)

    def aggregate_require_entitlement_nodes(self):
        copy_vertices = list(self.vertices)
        idx = 0
        while idx < len(copy_vertices):
            v = copy_vertices[idx]
            if re.search("require-entitlement", str(v)):
                self.aggregate_require_entitlement(v)
            idx += 1

    def cleanup_filters(self):
        self.remove_builtin_filters()
        self.aggregate_require_entitlement_nodes()

    def remove_builtin_filters_with_metanodes(self):
        copy_vertices = list(self.vertices)
        for v in copy_vertices:
            if re.search("###\$\$\$\*\*\*", v.str_simple()):
                self.remove_vertice(v)
            elif re.search("entitlement-value #t", v.str_simple()):
                self.remove_vertice(v)
            elif re.search("entitlement-value-regex #\"\.\"", v.str_simple()):
                v.value.non_terminal.argument = "#\".+\""
            elif re.search("global-name-regex #\"\.\"", v.str_simple()):
                v.value.non_terminal.argument = "#\".+\""
            elif re.search("local-name-regex #\"\.\"", v.str_simple()):
                v.value.non_terminal.argument = "#\".+\""

    def replace_require_entitlement_with_metanodes(self, v):
        prev_list = self.get_prev_vertices(v)
        next_list = self.get_next_vertices(v)
        new_node = ReducedVertice(type="require-entitlement", value=v.value, decision=None, is_not=v.is_not)
        self.add_vertice(new_node)
        self.remove_vertice(v)
        for p in prev_list:
            self.add_edge_by_vertices(p, new_node)
        for n in next_list:
            self.add_edge_by_vertices(new_node, n)

    def aggregate_require_entitlement_with_metanodes(self):
        copy_vertices = list(self.vertices)
        for v in copy_vertices:
            if re.search("require-entitlement", str(v)):
                self.replace_require_entitlement_with_metanodes(v)

    def cleanup_filters_with_metanodes(self):
        self.remove_builtin_filters_with_metanodes()
        self.aggregate_require_entitlement_with_metanodes()

    def print_vertices_with_operation(self, operation, out_f):
        allow_vertices = [v for v in self.vertices if v.decision == "allow"]
        deny_vertices = [v for v in self.vertices if v.decision == "deny"]
        if allow_vertices:
            out_f.write("(allow %s " % (operation))
            if len(allow_vertices) > 1:
                for v in allow_vertices:
                    out_f.write("\n" + 8*" " + str(v))
            else:
                out_f.write(str(allow_vertices[0]))
            out_f.write(")\n")
        if deny_vertices:
            out_f.write("(deny %s " % (operation))
            if len(deny_vertices) > 1:
                for v in deny_vertices:
                    out_f.write("\n" + 8*" " + str(v))
            else:
                out_f.write(str(deny_vertices[0]))
            out_f.write(")\n")

    def print_vertices_with_operation_metanodes(self, operation, default_is_allow, out_f):
        # Return if only start node in list.
        if len(self.vertices) == 1 and self.vertices[0].is_type_start():
            return
        # Use reverse of default rule.
        if default_is_allow:
            out_f.write("(deny %s" % (operation))
        else:
            out_f.write("(allow %s" % (operation))
        vlist = []
        start_list = self.get_start_vertices()
        start_list.reverse()
        vlist.insert(0, (None, 0))
        for s in start_list:
            vlist.insert(0, (s, 1))
        while True:
            if not vlist:
                break
            (cnode, indent) = vlist.pop(0)
            if not cnode:
                out_f.write(")")
                continue
            (first, last) = cnode.str_print()
            if first:
                if cnode.is_not:
                    if cnode.str_print_not() != "":
                        out_f.write("\n" + indent * "\t" + cnode.str_print_not())
                    else:
                        out_f.write("\n" + indent * "\t" + "(require-not " + first)
                        if cnode.is_type_require_any() or cnode.is_type_require_all() or cnode.is_type_require_entitlement():
                            vlist.insert(0, (None, indent))
                        else:
                            out_f.write(")")
                else:
                    out_f.write("\n" + indent * "\t" + first)
            if last:
                vlist.insert(0, (None, indent))
            next_vertices_list = self.get_next_vertices(cnode)
            if next_vertices_list:
                if cnode.is_type_require_any() or cnode.is_type_require_all() or cnode.is_type_require_entitlement():
                    indent += 1
                next_vertices_list.reverse()
                if cnode.is_type_require_entitlement():
                    pos = 0
                    for n in next_vertices_list:
                        if (n.is_type_single() and not re.search("entitlement-value", n.str_simple())) or \
                                n.is_type_require_entitlement():
                            vlist.insert(pos + 1, (n, indent-1))
                        else:
                            vlist.insert(0, (n, indent))
                            pos += 1
                else:
                    for n in next_vertices_list:
                        vlist.insert(0, (n, indent))
        out_f.write("\n")

    def dump_xml(self, operation, out_f):
        allow_vertices = [v for v in self.vertices if v.decision == "allow"]
        deny_vertices = [v for v in self.vertices if v.decision == "deny"]
        if allow_vertices:
            out_f.write("\t<operation name=\"%s\" action=\"allow\">\n" % (operation))
            out_f.write("\t\t<filters>\n")
            for v in allow_vertices:
                out_f.write(v.xml_str())
            out_f.write("\t\t</filters>\n")
            out_f.write("\t</operation>\n")
        if deny_vertices:
            out_f.write("\t<operation name=\"%s\" action=\"deny\">\n" % (operation))
            out_f.write("\t\t<filters>\n")
            for v in deny_vertices:
                out_f.write(v.xml_str())
            out_f.write("\t\t</filters>\n")
            out_f.write("\t</operation>\n")


def reduce_operation_node_graph(g):
    # Create reduced graph.
    rg = ReducedGraph()
    for node_iter in g.keys():
        rv = ReducedVertice(value=node_iter, decision=g[node_iter]["decision"], is_not=g[node_iter]["not"])
        rg.add_vertice(rv)

    for node_iter in g.keys():
        rv = rg.get_vertice_by_value(node_iter)
        for node_next in g[node_iter]["list"]:
            rn = rg.get_vertice_by_value(node_next)
            rg.add_edge_by_vertices(rv, rn)

    # Handle special case for require-not (require-enitlement (...)).
    l = len(g.keys())
    for idx, node_iter in enumerate(g.keys()):
        rv = rg.get_vertice_by_value(node_iter)
        if not re.search("require-entitlement", str(rv)):
            continue
        if not rv.is_not:
            continue
        c_idx = idx
        while True:
            c_idx += 1
            if c_idx >= l:
                break
            rn = rg.get_vertice_by_value(list(g.keys())[c_idx])
            if not re.search("entitlement-value", str(rn)):
                break
            prevs_rv = rg.get_prev_vertices(rv)
            prevs_rn = rg.get_prev_vertices(rn)
            if sorted(prevs_rv) != sorted(prevs_rn):
                continue
            for pn in prevs_rn:
                rg.remove_edge_by_vertices(rn, pn)
            rg.add_edge_by_vertices(rv, rn)

    rg.cleanup_filters_with_metanodes()
    for node_iter in g.keys():
        rv = rg.get_vertice_by_value(node_iter)
    rg.reduce_graph_with_metanodes()
    return rg


def main():
    if len(sys.argv) != 4:
        print >> sys.stderr, "Usage: %s binary_sandbox_file operations_file ios_version" % (sys.argv[0])
        sys.exit(-1)

    ios_major_version = int(sys.argv[3].split('.')[0])
    # Read sandbox operations.
    sb_ops = [l.strip() for l in open(sys.argv[2])]
    num_sb_ops = len(sb_ops)
    logger.info("num_sb_ops:", num_sb_ops)

    f = open(sys.argv[1], "rb")
    operation_nodes = build_operation_nodes(f, num_sb_ops, ios_major_version)

    global num_regex
    f.seek(4)
    num_regex = struct.unpack("<H", f.read(2))[0]
    logger.debug("num_regex: %02x" % (num_regex))
    f.seek(6)
    sb_ops_offsets = struct.unpack("<%dH" % (num_sb_ops), f.read(2*num_sb_ops))

    # Extract node for 'default' operation (index 0).
    default_node = find_operation_node_by_offset(operation_nodes, sb_ops_offsets[0])
    print("(%s default)" % (default_node.terminal))

    # For each operation expand operation node.
    #for idx in range(1, len(sb_ops_offsets)):
    for idx in range(10, 11):
        offset = sb_ops_offsets[idx]
        operation = sb_ops[idx]
        node = find_operation_node_by_offset(operation_nodes, offset)
        if not node:
            logger.info("operation %s (index %d) has no operation node", operation, idx)
            continue
        logger.debug("expanding operation %s (index %d, offset: %02x)", operation, idx, offset)
        g = build_operation_node_graph(node, default_node)
        logger.debug("reducing operation %s (index %d, offset: %02x)", operation, idx, offset)
        print_operation_node_graph(g)
        if g:
            rg = reduce_operation_node_graph(g)
            rg.print_vertices_with_operation(operation)
        else:
            if node.terminal:
                if node.terminal.type != default_node.terminal.type:
                    print("(%s %s)" % (node.terminal, operation))


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: reverse-sandbox/regex_parser_v1.py
================================================
import logging
import struct

logging.config.fileConfig("logger.config")
logger = logging.getLogger(__name__)

def parse_character(node_type, node_arg, node_transition, node_idx):
    value = chr(node_arg & 0xff)
    if value == ".":
        value = "[.]"
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": value}

def parse_end(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "end",
        "value": 0}

def parse_jump_forward(node_type, node_arg, node_transition, node_idx):
    jump_to = node_arg
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "jump_forward",
        "value": jump_to}

def parse_jump_backward(node_type, node_arg, node_transition, node_idx):
    jump_to = node_transition
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "jump_backward",
        "value": jump_to}

def parse_beginning_of_line(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": "^"}

def parse_end_of_line(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": "$"}

def parse_dot(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": "."}

def parse_character_class(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "class",
        "value": node_arg}

def parse_character_neg_class(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "class_exclude",
        "value": node_arg}

def parse_parantheses_open(node_type, node_arg, node_transition, node_idx):
    return parse_jump_backward(node_type, node_arg, node_transition,
        node_idx)
    '''
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": "("}
    '''

def parse_parantheses_close(node_type, node_arg, node_transition, node_idx):
    return parse_jump_backward(node_type, node_arg, node_transition,
        node_idx)
    '''
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": ")"}
    '''

node_type_dispatch_table = {
  0x10: parse_character,
  0x22: parse_end,
  0x23: parse_parantheses_close,
  0x24: parse_parantheses_open,
  0x25: parse_jump_forward,
  0x30: parse_dot,
  0x31: parse_jump_backward,
  0x32: parse_beginning_of_line,
  0x33: parse_end_of_line,
  0x34: parse_character_class,
  0x35: parse_character_neg_class,
}


def node_parse(re, i, regex_list, node_idx):
    node_type = struct.unpack('>I',
        ''.join([chr(x) for x in re[i:i+4]]))[0]
    node_transition = struct.unpack('>I',
        ''.join([chr(x) for x in re[i+4:i+8]]))[0]
    node_arg = struct.unpack('>I',
        ''.join([chr(x) for x in re[i+8:i+12]]))[0]
    i += 12

    logger.debug('node idx:{:#010x} type: {:#02x} arg: {:#010x}' \
        ' transition: {:#010x}'.format(node_idx, node_type,node_arg,
            node_transition))
    assert(node_type in node_type_dispatch_table)
    regex_list.append(
        node_type_dispatch_table[node_type](
            node_type, node_arg, node_transition, node_idx))
    return i

def class_parse(re, i, classes, class_idx):
    def transform(x):
        c = chr(x)
        if c in '[]-':
            return '\\' + c
        else:
            return c

    class_size = struct.unpack('>I',
        ''.join([chr(x) for x in re[i:i+4]]))[0]
    i += 0x4
    content = struct.unpack('>{}I'.format(class_size),
        ''.join([chr(x) for x in re[i:i+4*class_size]]))
    i += 0x4 * class_size
    assert(class_size % 2 == 0)

    cls = ''
    for idx in range(0, class_size, 2):
        start = content[idx]
        end = content[idx+1]
        if start != end:
            cls += '{}-{}'.format(transform(start), transform(end))
        else:
            cls += transform(start)

    logger.debug('class idx = {:#x} size = {:#x} content=[{}]'.format(
        class_idx, class_size, cls))
    classes.append(cls)
    return i

class RegexParser(object):

    @staticmethod
    def parse(re, i, regex_list):
        node_count = struct.unpack('>I',
            ''.join([chr(x) for x in re[i:i+0x4]]))[0]
        logger.debug('node count = {:#x}'.format(node_count))

        start_node = struct.unpack('>I',
            ''.join([chr(x) for x in re[i+0x4:i+0x8]]))[0]
        logger.debug('start node = {:#x}'.format(start_node))

        end_node = struct.unpack('>I',
            ''.join([chr(x) for x in re[i+0x8:i+0xC]]))[0]
        logger.debug('end node = {:#x}'.format(end_node))

        cclass_count = struct.unpack('>I',
            ''.join([chr(x) for x in re[i+0xC:i+0x10]]))[0]
        logger.debug('character class count = {:#x}'.format(cclass_count))

        submatch_count = struct.unpack('>I',
            ''.join([chr(x) for x in re[i+0x10:i+0x14]]))[0]
        i += 0x14
        logger.debug('submatch count = {:#x}'.format(submatch_count))


        for node_idx in range(node_count):
            i = node_parse(re, i, regex_list, node_idx)

        classes = []
        for class_idx in range(cclass_count):
            i = class_parse(re, i, classes, class_idx)

        for node in regex_list:
            if node['type'] == 'class':
                node['value'] = '[{}]'.format(classes[node['value']])
            elif node['type'] == 'class_exclude':
                node['value'] = '[{}]'.format(classes[node['value']])

        regex_list[start_node]['start_node'] = True


================================================
FILE: reverse-sandbox/regex_parser_v2.py
================================================
import logging
import struct

logging.config.fileConfig("logger.config")
logger = logging.getLogger(__name__)

def parse_character(node_type, node_arg, node_transition, node_idx):
    value = chr(node_arg & 0xff)
    if value == ".":
        value = "[.]"
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": value}

def parse_end(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "end",
        "value": 0}

def parse_jump_forward(node_type, node_arg, node_transition, node_idx):
    jump_to = node_arg
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "jump_forward",
        "value": jump_to}

def parse_jump_backward(node_type, node_arg, node_transition, node_idx):
    jump_to = node_transition
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "jump_backward",
        "value": jump_to}

def parse_beginning_of_line(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": "^"}

def parse_end_of_line(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": "$"}

def parse_dot(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": "."}

def parse_character_class(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "class",
        "value": node_arg}

def parse_character_neg_class(node_type, node_arg, node_transition, node_idx):
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "class_exclude",
        "value": node_arg}

def parse_parantheses_open(node_type, node_arg, node_transition, node_idx):
    return parse_jump_backward(node_type, node_arg, node_transition,
        node_idx)
    '''
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": "("}
    '''

def parse_parantheses_close(node_type, node_arg, node_transition, node_idx):
    return parse_jump_backward(node_type, node_arg, node_transition,
        node_idx)
    '''
    return {
        "pos": node_idx,
        "nextpos": node_transition,
        "type": "character",
        "value": ")"}
    '''

node_type_dispatch_table = {
  0x10: parse_character,
  0x22: parse_end,
  0x25: parse_jump_forward,
  0x26: parse_jump_forward,
  0x27: parse_jump_forward,
  0x28: parse_jump_forward,
  0x30: parse_dot,
  0x31: parse_jump_backward,
  0x32: parse_beginning_of_line,
  0x33: parse_end_of_line,
  0x34: parse_character_class,
  0x35: parse_character_neg_class,
}


def node_parse(re, i, regex_list, node_idx):
    node_type = struct.unpack('<B',
        ''.join([chr(x) for x in re[i:i+1]]))[0]
    node_transition = struct.unpack('<H',
        ''.join([chr(x) for x in re[i+1:i+3]]))[0]
    pad = struct.unpack('<B',
        ''.join([chr(x) for x in re[i+3:i+4]]))[0]
    node_arg = struct.unpack('<I',
        ''.join([chr(x) for x in re[i+4:i+8]]))[0]
    i += 8

    logger.debug('node idx:{:#06x} type: {:#02x} arg: {:#010x}' \
        ' transition: {:#06x}'.format(node_idx, node_type,node_arg,
            node_transition))

    assert(pad == 0 or node_type == 0x22)
    assert(node_type in node_type_dispatch_table)
    regex_list.append(
        node_type_dispatch_table[node_type](
            node_type, node_arg, node_transition, node_idx))
    return i

def classes_parse(re, i, cclass_count):
    def transform(x):
        c = chr(x)
        if c in '[]-':
            return '\\' + c
        else:
            return c
    def transform_range(start, end):
        if start != end:
            return '{}-{}'.format(transform(start), transform(end))
        return transform(start)
    def transform_content(content):
        cls = ''
        assert(len(content) % 2 == 0)
        for idx in range(0, len(content), 2):
            start = content[idx]
            end = content[idx+1]
            cls += transform_range(start, end)
        return cls

    if cclass_count == 0:
        return

    classes_magic, classes_size = struct.unpack('<II',
        ''.join([chr(x) for x in re[i:i+8]]))
    i += 0x8
    logger.debug('classes magic = {:#x} size = {:#x}'.format(
        classes_magic, classes_size))
    assert(len(re) - i == classes_size)
    starts = struct.unpack('<{}I'.format(cclass_count),
        ''.join([chr(x) for x in re[i:i+4*cclass_count]]))
    i += 0x4 * cclass_count

    lens = struct.unpack('<{}B'.format(cclass_count),
        ''.join([chr(x) for x in re[i:i+cclass_count]]))
    i += cclass_count

    contents = [re[i+start:i+start+clen] for start, clen in zip(starts, lens)]
    return [transform_content(content) for content in contents]

class RegexParser(object):

    @staticmethod
    def parse(re, i, regex_list):
        magic = struct.unpack('<I',
            ''.join([chr(x) for x in re[i:i+0x4]]))[0]
        logger.debug('magic = {:#x}'.format(magic))

        node_count = struct.unpack('<I',
            ''.join([chr(x) for x in re[i+0x4:i+0x8]]))[0]
        logger.debug('node count = {:#x}'.format(node_count))

        start_node = struct.unpack('<I',
            ''.join([chr(x) for x in re[i+0x8:i+0xC]]))[0]
        logger.debug('start node = {:#x}'.format(start_node))

        end_node = struct.unpack('<I',
            ''.join([chr(x) for x in re[i+0xC:i+0x10]]))[0]
        logger.debug('end node = {:#x}'.format(end_node))

        cclass_count = struct.unpack('<I',
            ''.join([chr(x) for x in re[i+0x10:i+0x14]]))[0]
        logger.debug('character class count = {:#x}'.format(cclass_count))
        i += 0x14

        for node_idx in range(node_count):
            i = node_parse(re, i, regex_list, node_idx)

        classes = classes_parse(re, i, cclass_count)

        for node in regex_list:
            if node['type'] == 'class':
                node['value'] = '[{}]'.format(classes[node['value']])
            elif node['type'] == 'class_exclude':
                node['value'] = '[{}]'.format(classes[node['value']])

        regex_list[start_node]['start_node'] = True


================================================
FILE: reverse-sandbox/regex_parser_v3.py
================================================
import logging
import struct

logging.config.fileConfig("logger.config")
logger = logging.getLogger(__name__)

def parse_character(re, i, regex_list):
    value = chr(re[i+1])
    if value == ".":
        value = "[.]"
    regex_list.append({
        "pos": i-6,
        "nextpos": i+2-6,
        "type": "character",
        "value": value}
        )
    return i + 1

def parse_beginning_of_line(i, regex_list):
    regex_list.append({
        "pos": i-6,
        "nextpos": i+1-6,
        "type": "character",
        "value": "^"}
        )

def parse_end_of_line(i, regex_list):
    regex_list.append({
        "pos": i-6,
        "nextpos": i+1-6,
        "type": "character",
        "value": "$"}
        )

def parse_any_character(i, regex_list):
    regex_list.append({
        "pos": i-6,
        "nextpos": i+1-6,
        "type": "character",
        "value": "."}
        )

def parse_jump_forward(re, i, regex_list):
    jump_to = re[i+1] + (re[i+2] << 8)
    regex_list.append({
        "pos": i-6,
        "nextpos": i+3-6,
        "type": "jump_forward",
        "value": jump_to}
        )
    return i + 2

def parse_jump_backward(re, i, regex_list):
    jump_to = re[i+1] + (re[i+2] << 8)
    regex_list.append({
        "pos": i-6,
        "nextpos": i+3-6,
        "type": "jump_backward",
        "value": jump_to}
        )
    logger.debug("(0xa) i: %d (0x%x), re[i, i+1, i+2]: 0x%x, 0x%x, 0x%x", i, i, re[i], re[i+1], re[i+2])
    logger.debug("value: 0x%x", jump_to)
    return i+2

def parse_character_class(re, i, regex_list):
    num = (re[i] >> 4)
    i = i+1
    logger.debug("i: %d, num: %d", i, num)
    values = []
    value = "["
    for j in range(0, num):
        values.append(re[i+2*j])
        values.append(re[i+2*j+1])
    first = values[0]
    last = values[2*num-1]
    # In case of excludes.
    if (first > last):
        node_type = "class_exclude"
        value += "^"
        for j in range(len(values)-1, 0, -1):
            values[j] = values[j-1]
        values[0] = last
        for j in range(0, len(values)):
            if j % 2 == 0:
                values[j] = values[j]+1
            else:
                values[j] = values[j]-1
    else:
        node_type = "class"
    for j in range(0, len(values), 2):
        if values[j] < values[j+1]:
            value += "%s-%s" % (chr(values[j]), chr(values[j+1]))
        else:
            value += "%s" % (chr(values[j]))
    value += "]"
    regex_list.append({
        "pos": i-6-1,
        "nextpos": i + 2 * num - 6,
        "type": node_type,
        "value": value
        })
    message = "values: [", ", ".join([hex(j) for j in values]), "]"
    logger.debug(message)

    return i + 2 * num - 1

def parse_end(re, i, regex_list):
    regex_list.append({
        "pos": i-6,
        "nextpos": i+2-6,
        "type": "end",
        "value": 0
        })
    return i + 1

def parse(re, i, regex_list):
    # Actual character.
    if re[i] == 0x02:
        i = parse_character(re, i, regex_list)
    # Beginning of line.
    elif re[i] == 0x19:
        parse_beginning_of_line(i, regex_list)
    # End of line.
    elif re[i] == 0x29:
        parse_end_of_line(i, regex_list)
    # Any character.
    elif re[i] == 0x09:
        parse_any_character(i, regex_list)
    # Jump forward.
    elif re[i] == 0x2f:
        i = parse_jump_forward(re, i, regex_list)
    # Jump backward.
    elif re[i] & 0xf == 0xa:
        i = parse_jump_backward(re, i, regex_list)
    # Character class.
    elif re[i] & 0xf == 0xb:
        i = parse_character_class(re, i, regex_list)
    elif re[i] & 0xf == 0x5:
        i = parse_end(re, i, regex_list)
    else:
        logger.warning("##########unknown", hex(re[i]))

    return i + 1

class RegexParser(object):

    @staticmethod
    def parse(re, i, regex_list):
        length = struct.unpack('<H', ''.join([chr(x) for x in re[i:i+2]]))[0]
        logger.debug("re.length: 0x%x", length)
        i += 2
        assert(length == len(re)-i)
        while i < len(re):
            i = parse(re, i, regex_list)

        regex_list[0]["start_node"]=True


================================================
FILE: reverse-sandbox/reverse_sandbox.py
================================================
#!/usr/bin/env python3

"""
iOS/OS X sandbox decompiler

Heavily inspired from Dion Blazakis' previous work
    https://github.com/dionthegod/XNUSandbox/tree/master/sbdis
Excellent information from Stefan Essers' slides and work
    http://www.slideshare.net/i0n1c/ruxcon-2014-stefan-esser-ios8-containers-sandboxes-and-entitlements
    https://github.com/sektioneins/sandbox_toolkit
"""

import sys
import struct
import logging
import logging.config
import argparse
import os
import re
import operation_node
import sandbox_filter
import sandbox_regex


logging.config.fileConfig("logger.config")
logger = logging.getLogger(__name__)


def extract_string_from_offset(f, offset, ios_version):
    """Extract string (literal) from given offset."""
    if ios_version >= 13:
        f.seek(get_base_addr(f, ios_version) + offset * 8)
        len = struct.unpack("<H", f.read(2))[0]-1
    else:
        f.seek(offset * 8)
        len = struct.unpack("<I", f.read(4))[0]-1
    return '%s' % f.read(len)


def create_operation_nodes(infile, regex_list, num_operation_nodes,
        ios_major_version, keep_builtin_filters, global_vars):
    # Read sandbox operations.
    operation_nodes = operation_node.build_operation_nodes(infile,
        num_operation_nodes, ios_major_version)
    logger.info("operation nodes")
    
    for idx, node in enumerate(operation_nodes):
        logger.info("%d: %s", idx, node.str_debug())
    
    for n in operation_nodes:
        n.convert_filter(sandbox_filter.convert_filter_callback, infile,
                    regex_list, ios_major_version, keep_builtin_filters,
                    global_vars, get_base_addr(infile, ios_major_version))
    logger.info("operation nodes after filter conversion")
    for idx, node in enumerate(operation_nodes):
        logger.info("%d: %s", idx, node.str_debug())

    return operation_nodes


def process_profile(infile, outfname, sb_ops, ops_to_reverse, op_table, operation_nodes):
    outfile = open(outfname, "wt")
    outfile_xml = open(outfname + ".xml", "wt")

    # Print version.
    outfile.write("(version 1)\n")

    outfile_xml.write('<?xml version="1.0" encoding="us-ascii" standalone="yes"?>\n')
    outfile_xml.write('<!DOCTYPE operations [\n')
    outfile_xml.write('<!ELEMENT operations (operation*)>\n')
    outfile_xml.write('<!ELEMENT operation (filters?)>\n')
    outfile_xml.write('<!ELEMENT filters (filter | require)*>\n')
    outfile_xml.write('<!ELEMENT require (filter | require)*>\n')
    outfile_xml.write('<!ELEMENT filter (#PCDATA)>\n')
    outfile_xml.write('<!ATTLIST operation\n')
    outfile_xml.write('\tname CDATA #REQUIRED\n')
    outfile_xml.write('\taction (deny|allow) #REQUIRED>\n')
    outfile_xml.write('<!ATTLIST require\n')
    outfile_xml.write('\ttype (require-all|require-any|require-not|require-entitlement) #REQUIRED\n')
    outfile_xml.write('\tvalue CDATA #IMPLIED>\n')
    outfile_xml.write('<!ATTLIST filter\n')
    outfile_xml.write('\tname CDATA #REQUIRED\n')
    outfile_xml.write('\targument CDATA #IMPLIED>\n')
    outfile_xml.write(']>\n')
    outfile_xml.write("<operations>\n")

    # Extract node for 'default' operation (index 0).
    default_node = operation_node.find_operation_node_by_offset(operation_nodes, op_table[0])
    outfile.write("(%s default)\n" % (default_node.terminal))
    outfile_xml.write("\t<operation name=\"default\" action=\"%s\" />\n" % (default_node.terminal))

    # For each operation expand operation node.
    for idx in range(1, len(op_table)):
        offset = op_table[idx]
        operation = sb_ops[idx]
        # Go past operations not in list, in case list is not empty.
        if ops_to_reverse:
            if operation not in ops_to_reverse:
                continue
        logger.info("parsing operation %s (index %d)", operation, idx)
        node = operation_node.find_operation_node_by_offset(operation_nodes, offset)
        if not node:
            logger.info("operation %s (index %d) has no operation node", operation, idx)
            continue
        g = operation_node.build_operation_node_graph(node, default_node)
        if g:
            rg = operation_node.reduce_operation_node_graph(g)
            rg.str_simple_with_metanodes()
            rg.print_vertices_with_operation_metanodes(operation, default_node.terminal.is_allow(), outfile)
            #rg.dump_xml(operation, outfile_xml)
        else:
            logger.info("no graph for operation %s (index %d)", operation, idx)
            if node.terminal and default_node.terminal:
                if node.terminal.type != default_node.terminal.type:
                    outfile.write("(%s %s)\n" % (node.terminal, operation))
                    outfile_xml.write("\t<operation name=\"%s\" action=\"%s\" />\n" % (operation, node.terminal))

    outfile.close()
    outfile_xml.write("</operations>\n")
    outfile_xml.close()

def get_ios_major_version(release):
    """
    Returns major version of release
    """
    return int(release.split('.')[0])

def is_ios_more_than_10_release(release):
    """
    Returns True if release is using newer (iOS >= 10) binary sandbox profile format.
    """
    major_version = get_ios_major_version(release)
    if major_version < 10:
        return False
    return True


def display_sandbox_profiles(f, re_table_offset, num_sb_ops, ios_version):
    logger.info("Printing sandbox profiles from bundle")
    if ios_version >= 13:
        f.seek(6)
    elif ios_version >= 12:
        f.seek(12)
    elif ios_version >= 10:
        f.seek(10)
    else:
        f.seek(6)
    num_profiles = struct.unpack("<H", f.read(2))[0]

    if ios_version >= 13:
        f.seek(2)
        num_operation_nodes = struct.unpack("<H", f.read(2))[0]
        print(hex(num_operation_nodes))
    else:
        # Place file pointer to start of operation nodes area.
        if ios_version >= 12:
            f.seek(14 + (num_sb_ops + 2) * 2 * num_profiles)
        elif ios_version >= 10:
            f.seek(12 + (num_sb_ops + 2) * 2 * num_profiles)
        else:
            f.seek(8 + (num_sb_ops + 2) * 2 * num_profiles)
        while True:
            word = struct.unpack("<H", f.read(2))[0]
            if word != 0:
                f.seek(-2, 1)
                break
        start = f.tell()
        end = re_table_offset * 8
        num_operation_nodes = (end - start) / 8

    logger.info("number of operation nodes: %u" % num_operation_nodes)

    for i in range(0, num_profiles):
        if ios_version >= 13:
            f.seek(8)
            regex_table_count = struct.unpack('<H', f.read(2))[0]
            f.seek(10)
            global_table_count = struct.unpack('<B', f.read(1))[0]
            f.seek(11)
            debug_table_count = struct.unpack('<B', f.read(1))[0]
            f.seek(12 + (regex_table_count + global_table_count + \
                    debug_table_count) * 2 + (num_sb_ops + 2) * 2 * i)
        elif ios_version >= 12:
            f.seek(14 + (num_sb_ops + 2) * 2 * i)
        elif ios_version >= 10:
            f.seek(12 + (num_sb_ops + 2) * 2 * i)
        else:
            f.seek(8 + (num_sb_ops + 2) * 2 * i)

        name_offset = struct.unpack("<H", f.read(2))[0]
        boundary = struct.unpack("<H", f.read(2))[0]
        name = extract_string_from_offset(f, name_offset, ios_version)

        print(name)

    logger.info("Found %d sandbox profiles." % num_profiles)


def get_global_vars(f, vars_offset, num_vars, base_offset):
    global_vars = []
    for i in range(0, num_vars):
        if base_offset > 0:
            f.seek(vars_offset + i*2)
        else:
            f.seek(vars_offset*8 + i*2)
        current_offset = struct.unpack("<H", f.read(2))[0]
        f.seek(base_offset + current_offset * 8)
        if base_offset > 0:
            len = struct.unpack("<H", f.read(2))[0]
        else:
            len = struct.unpack("<I", f.read(4))[0]
        s = f.read(len-1)
        global_vars.append(s)
    logger.info("global variables are {:s}".format(", ".join(s for s in global_vars)))
    return global_vars

def get_base_addr(f, ios_version):
    if ios_version >= 13:
        # extract operation node table count
        f.seek(2)
        op_nodes_count = struct.unpack('<H', f.read(2))[0]

        # extract sandbox operations count
        f.seek(4)
        sb_ops_count = struct.unpack('<H', f.read(2))[0]

        # extract sandbox profile count
        f.seek(6)
        sb_profiles_count = struct.unpack('<H', f.read(2))[0]

        # extract regular expressions count
        f.seek(8)
        regex_table_count = struct.unpack('<H', f.read(2))[0]

        # extract global table count
        f.seek(10)
        global_table_count = struct.unpack('<B', f.read(1))[0]

        # extract debug table count
        f.seek(11)
        debug_table_count = struct.unpack('<B', f.read(1))[0]

        return 12 + (regex_table_count + global_table_count + debug_table_count)*2 \
                + (2 + sb_ops_count) * 2 * sb_profiles_count + op_nodes_count * 8 + 4
    return 0


def main():
    """Reverse Apple binary sandbox file to SBPL (Sandbox Profile Language) format.

    Sample run:
        python reverse_sandbox.py -r 7.1.1 container.sb.bin
        python reverse_sandbox.py -r 7.1.1 -d out container.sb.bin
        python reverse_sandbox.py -r 7.1.1 -d out container.sb.bin -n network-inbound network-outbound
        python reverse_sandbox.py -r 9.0.2 -d out sandbox_bundle_iOS_9.0 -n network-inbound network-outbound -p container
    """

    parser = argparse.ArgumentParser()
    parser.add_argument("filename", help="path to the binary sandbox profile")
    parser.add_argument("-r", "--release", help="iOS release version for sandbox profile", required=True)
    parser.add_argument("-o", "--operations_file", help="file with list of operations", required=True)
    parser.add_argument("-p", "--profile", nargs='+', help="profile to reverse (for bundles) (default is to reverse all operations)")
    parser.add_argument("-n", "--operation", nargs='+', help="particular operation(s) to reverse (default is to reverse all operations)")
    parser.add_argument("-d", "--directory", help="directory where to write reversed profiles (default is current directory)")
    parser.add_argument("-psb", "--print_sandbox_profiles", action="store_true", help="print sandbox profiles of a given bundle (only for iOS versions 9+)")
    parser.add_argument("-kbf", "--keep_builtin_filters", help="keep builtin filters in output", action="store_true")

    args = parser.parse_args()

    if args.filename is None:
        parser.print_usage()
        print("no sandbox profile/bundle file to reverse")
        sys.exit(1)

    # Read sandbox operations.
    sb_ops = [l.strip() for l in open(args.operations_file)]
    num_sb_ops = len(sb_ops)
    logger.info("num_sb_ops: %d", num_sb_ops)

    ops_to_reverse = []
    if args.operation:
        for op in args.operation:
            if op not in sb_ops:
                parser.print_usage()
                print("unavailable operation: {}".format(op))
                sys.exit(1)
            ops_to_reverse.append(op)

    if args.directory:
        out_dir = args.directory
    else:
        out_dir = os.getcwd()

    f = open(args.filename, "rb")

    if get_ios_major_version(args.release) >= 6:
        header = struct.unpack("<H", f.read(2))[0]
        logger.debug("header: 0x%x", header)
    else:
        logger.debug("header: none for iOS <6; using 0")
        header = 0

    if get_ios_major_version(args.release) >= 13:
        re_table_offset = 12
    else:
        re_table_offset = struct.unpack("<H", f.read(2))[0]
    
    if get_ios_major_version(args.release) >= 12:
        f.seek(8)
    re_table_count = struct.unpack("<H", f.read(2))[0]

    logger.debug("re_table_offset: 0x%x", re_table_offset)
    logger.debug("re_table_count: 0x%x", re_table_count)

    logger.debug("\n\nregular expressions:\n")
    regex_list = []
    if re_table_count > 0:
        if get_ios_major_version(args.release) >= 13:
            f.seek(re_table_offset)
        else:
            f.seek(re_table_offset * 8)
        
        re_offsets_table = struct.unpack("<%dH" % re_table_count, f.read(2 * re_table_count))
        for offset in re_offsets_table:
            if get_ios_major_version(args.release) >= 13:
                f.seek(get_base_addr(f, get_ios_major_version(args.release)) + offset * 8)
                re_length = struct.unpack("<H", f.read(2))[0]
            else:
                f.seek(offset * 8)
                re_length = struct.unpack("<I", f.read(4))[0]
            
            re = struct.unpack("<%dB" % re_length, f.read(re_length))
            logger.debug("total_re_length: 0x%x", re_length)
            re_debug_str = "re: [", ", ".join([hex(i) for i in re]), "]"
            logger.debug(re_debug_str)
            regex_list.append(sandbox_regex.parse_regex(re))
    logger.debug(regex_list)

    if args.print_sandbox_profiles:
        if header == 0x8000:
            display_sandbox_profiles(f, re_table_offset, num_sb_ops, get_ios_major_version(args.release))
        else:
            print("cannot print sandbox profiles list; filename {} is not a sandbox bundle".format(args.filename))
        sys.exit(0)

    global_vars = None

    # In case of sandbox profile bundle, go through each profile.
    if header == 0x8000:
        logger.info("using profile bundle")
        if get_ios_major_version(args.release) >= 13:
            # get the regex table entries
            f.seek(8)
            regex_table_count = struct.unpack('<H', f.read(2))[0]
            vars_offset = 12 + regex_table_count * 2
            f.seek(10)
            num_vars = struct.unpack("<B", f.read(1))[0]
            logger.info("{:d} global vars at offset 0x{:0x}".format(num_vars, vars_offset))
            global_vars = get_global_vars(f, vars_offset, num_vars, get_base_addr(f, get_ios_major_version(args.release)))
            f.seek(6)
        elif get_ios_major_version(args.release) >= 12:
            f.seek(4)
            vars_offset = struct.unpack("<H", f.read(2))[0]
            f.seek(10)
            num_vars = struct.unpack("<B", f.read(1))[0]
            logger.info("{:d} global vars at offset 0x{:0x}".format(num_vars, vars_offset))
            global_vars = get_global_vars(f, vars_offset, num_vars, 0)
            f.seek(12)
        elif get_ios_major_version(args.release) >= 10:
            f.seek(6)
            vars_offset = struct.unpack("<H", f.read(2))[0]
            num_vars = struct.unpack("<H", f.read(2))[0]
            logger.info("{:d} global vars at offset 0x{:0x}".format(num_vars, vars_offset))
            global_vars = get_global_vars(f, vars_offset, num_vars, 0)
            f.seek(10)
        else:
            f.seek(6)
        num_profiles = struct.unpack("<H", f.read(2))[0]
        logger.info("number of profiles in bundle: %d", num_profiles)

        if get_ios_major_version(args.release) >= 13:
            f.seek(2)
            num_operation_nodes = struct.unpack("<H", f.read(2))[0]
            f.seek(get_base_addr(f, get_ios_major_version(args.release)) - num_operation_nodes*8)
        else:
            # Place file pointer to start of operation nodes area.
            if get_ios_major_version(args.release) >= 12:
                f.seek(14 + (num_sb_ops + 2) * 2 * num_profiles)
            elif get_ios_major_version(args.release) >= 10:
                f.seek(12 + (num_sb_ops + 2) * 2 * num_profiles)
            else:
                f.seek(8 + (num_sb_ops + 2) * 2 * num_profiles)
            while True:
                word = struct.unpack("<H", f.read(2))[0]
                if word != 0:
                    f.seek(-2, 1)
                    break
            start = f.tell()
            end = re_table_offset * 8
            num_operation_nodes = (end - start) / 8
        logger.info("number of operation nodes: %u" % num_operation_nodes)

        operation_nodes = create_operation_nodes(f, regex_list,
            num_operation_nodes, get_ios_major_version(args.release),
            args.keep_builtin_filters, global_vars)

        for i in range(0, num_profiles):
            if get_ios_major_version(args.release) >= 13:
                f.seek(8)
                regex_table_count = struct.unpack('<H', f.read(2))[0]
                f.seek(10)
                global_table_count = struct.unpack('<B', f.read(1))[0]
                f.seek(11)
                debug_table_count = struct.unpack('<B', f.read(1))[0]
                f.seek(12 + (regex_table_count + global_table_count + \
                        debug_table_count) * 2 + (num_sb_ops + 2) * 2 * i)
            elif get_ios_major_version(args.release) >= 12:
                f.seek(14 + (num_sb_ops + 2) * 2 * i)
            elif get_ios_major_version(args.release) >= 10:
                f.seek(12 + (num_sb_ops + 2) * 2 * i)
            else:
                f.seek(8 + (num_sb_ops + 2) * 2 * i)

            name_offset = struct.unpack("<H", f.read(2))[0]
            boundary = struct.unpack("<H", f.read(2))[0]
            name = extract_string_from_offset(f, name_offset,
                                get_ios_major_version(args.release))

            # Go past profiles not in list, in case list is defined.
            if args.profile:
                if name not in args.profile:
                    continue
            logger.info("profile name (offset 0x%x): %s" % (name_offset, name))

            if get_ios_major_version(args.release) >= 13:
                f.seek(8)
                regex_table_count = struct.unpack('<H', f.read(2))[0]
                f.seek(10)
                global_table_count = struct.unpack('<B', f.read(1))[0]
                f.seek(11)
                debug_table_count = struct.unpack('<B', f.read(1))[0]
                f.seek(12 + (regex_table_count + global_table_count + \
                        debug_table_count) * 2 + (num_sb_ops + 2) * 2 * i + 4)
            elif get_ios_major_version(args.release) >= 12:
                f.seek(14 + (num_sb_ops + 2) * 2 * i + 4)
            elif get_ios_major_version(args.release) >= 10:
                f.seek(12 + (num_sb_ops + 2) * 2 * i + 4)
            else:
                f.seek(8 + (num_sb_ops + 2) * 2 * i + 4)
            op_table = struct.unpack("<%dH" % num_sb_ops, f.read(2 * num_sb_ops))
            for idx in range(1, len(op_table)):
                offset = op_table[idx]
                operation = sb_ops[idx]
                logger.info("operation %s (index %u) starts at node offset %u (0x%x)", operation, idx, offset, offset)
            out_fname = os.path.join(out_dir, name + ".sb")
            process_profile(f, out_fname, sb_ops, ops_to_reverse, op_table, operation_nodes)

    else:
        if get_ios_major_version(args.release) >= 12:
            f.seek(4)
            vars_offset = struct.unpack("<H", f.read(2))[0]
            f.seek(10)
            num_vars = struct.unpack("<B", f.read(1))[0]
            logger.info("{:d} global vars at offset 0x{:0x}".format(num_vars, vars_offset))
            global_vars = get_global_vars(f, vars_offset, num_vars, 0)
            f.seek(12)
        elif get_ios_major_version(args.release) >= 10:
            f.seek(6)
            vars_offset = struct.unpack("<H", f.read(2))[0]
            num_vars = struct.unpack("<H", f.read(2))[0]
            logger.info("{:d} global vars at offset 0x{:0x}".format(num_vars, vars_offset))
            global_vars = get_global_vars(f, vars_offset, num_vars, 0)
            f.seek(10)
        elif get_ios_major_version(args.release) >= 6:
            f.seek(6)
        else:
            f.seek(4)
        op_table = struct.unpack("<%dH" % num_sb_ops, f.read(2 * num_sb_ops))
        for idx in range(1, len(op_table)):
            offset = op_table[idx]
            operation = sb_ops[idx]
            logger.info("operation %s (index %u) starts at node offset %u (0x%x)", operation, idx, offset, offset)

        # Place file pointer to start of operation nodes area.
        while True:
            word = struct.unpack("<H", f.read(2))[0]
            if word != 0:
                f.seek(-2, 1)
                break
        start = f.tell()
        end = re_table_offset * 8
        num_operation_nodes = (end - start) / 8
        logger.info("number of operation nodes: %d ; start: %#x" % (num_operation_nodes, start))

        operation_nodes = create_operation_nodes(f, regex_list,
            num_operation_nodes, get_ios_major_version(args.release),
            args.keep_builtin_filters, global_vars)
        out_fname = os.path.join(out_dir, os.path.splitext(os.path.basename(args.filename))[0])
        process_profile(f, out_fname, sb_ops, ops_to_reverse, op_table, operation_nodes)

    f.close()


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: reverse-sandbox/reverse_string.py
================================================
import sys
import struct
import logging
import time


logging.config.fileConfig("logger.config")
logger = logging.getLogger(__name__)


class ReverseStringState:
    binary_string = ""
    len = 0
    pos = 0
    base = ""
    base_stack = []
    token = ""
    token_stack = []
    output_strings = []
    STATE_UNKNOWN = 0
    STATE_TOKEN_BYTE_READ = 1
    STATE_CONCAT_BYTE_READ = 2
    STATE_CONCAT_SAVE_BYTE_READ = 3
    STATE_END_BYTE_READ = 4
    STATE_SPLIT_BYTE_READ = 5
    STATE_TOKEN_READ = 6
    STATE_RANGE_BYTE_READ = 7
    STATE_CONSTANT_READ = 8
    STATE_SINGLE_BYTE_READ = 9
    STATE_PLUS_READ = 10
    STATE_RESET_STRING = 11
    state_stack = []
    state = STATE_UNKNOWN
    state_byte = 0x00

    def __init__(self, binary_string):
        self.binary_string = binary_string
        self.len = 0
        self.pos = 0
        self.base = ""
        self.token = ""
        self.tokens = []
        self.token_stack = []
        self.base_stack = []
        self.output_strings = []
        self.state_stack = []
        self.state = self.STATE_UNKNOWN
        self.state_byte = 0x00

    def update_state_unknown(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_UNKNOWN

    def update_state_token_byte_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_TOKEN_BYTE_READ

    def update_state_concat_byte_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_CONCAT_BYTE_READ

    def update_state_concat_save_byte_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_CONCAT_SAVE_BYTE_READ

    def update_state_end_byte_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_END_BYTE_READ

    def update_state_split_byte_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_SPLIT_BYTE_READ

    def update_state_range_byte_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_RANGE_BYTE_READ

    def update_state_token_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_TOKEN_READ

    def update_state_reset_string(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_RESET_STRING

    def update_state_constant_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_CONSTANT_READ

    def update_state_single_byte_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_SINGLE_BYTE_READ

    def update_state_plus_read(self):
        self.state_stack.append(self.state)
        self.state = self.STATE_PLUS_READ

    def update_state(self, b):
        self.state_byte = b
        if b == 0x0a:
            self.update_state_end_byte_read()
        elif b == 0x0f:
            self.update_state_concat_byte_read()
        elif b >= 0x80:
            self.update_state_split_byte_read()
        elif b == 0x00 or b == 0x07:
            self.update_state_unknown()
        elif b == 0x05:
            self.update_state_reset_string()
        elif b == 0x08:
            self.update_state_concat_save_byte_read()
            # XXX: Read two bytes. I don't know what they do.
            self.get_next_byte()
            self.get_next_byte()
        elif b >= 0x10 and b < 0x3f:
            self.update_state_constant_read()
        elif b == 0x0b:
            self.update_state_range_byte_read()
        elif b == 0x02:
            self.update_state_plus_read()
        elif b == 0x06:
            self.update_state_reset_string()
        else:
            self.update_state_token_byte_read()

    def get_next_byte(self):
        if self.is_end():
            return 0x00
        b = struct.unpack("<B", self.binary_string[self.pos:self.pos+1])[0]
        logger.debug("read byte 0x{:02x}".format(b))
        self.pos += 1
        return b

    def get_length_minus_1(self):
        b = struct.unpack("<B", self.binary_string[self.pos-1:self.pos])[0]
        logger.debug("b is 0x{:02x} ({:d})".format(b, b))
        if b == 0x04:
            b = struct.unpack("<B", self.binary_string[self.pos:self.pos+1])[0]
            logger.debug("got larger length 0x{:02x} ({:d})".format(b, b))
            self.pos += 1
            return b + 0x41
        else:
            logger.debug("got length 0x{:02x} ({:d})".format(b, b))
            return b - 0x3f

    def read_token(self, substr_len):
        self.token_stack.append(self.token)
        self.token = self.binary_string[self.pos:self.pos+substr_len]
        logger.debug("got token \"{:s}\"".format(self.token))
        self.pos += substr_len

    def update_base(self):
        self.base += self.token
        self.token = ""
        logger.debug("update base to \"{:s}\"".format(self.base))

    def update_base_stack(self):
        self.base_stack.append(self.base)
        self.update_base()

    def end_current_token(self):
        self.output_strings.append(self.base+self.token)
        logger.debug("output string \"{:s}\"".format(self.base+self.token))
        self.token = ""

    def get_last_byte(self):
        return struct.unpack("<B", self.binary_string[self.pos-1:self.pos])[0]

    def get_substring(self, substr_len):
        substr = self.binary_string[self.pos:self.pos+substr_len]
        logger.debug(" ".join("0x{:02x}".format(ord(c)) for c in substr))
        self.pos += substr_len
        return substr

    def end_with_subtokens(self, subtokens):
        for s in subtokens:
            self.output_strings.append(self.base+self.token+s)
            logger.debug("output string with subtokens \"{:s}\"".format(self.base+self.token+s))
        self.token = ""

    def is_end(self):
        if self.pos >= len(self.binary_string):
            return True
        return False

    def reset_base(self):
        if len(self.base_stack) >= 1:
            self.base = self.base_stack.pop()

    def reset_base_full(self):
        self.base_stack = []
        self.base = ""


class SandboxString:
    rss_stack = []


    def parse_byte_string(self, s, global_vars):
        rss = ReverseStringState(s)
        base = ""
        reset_base = False
        tokens = []
        token = ""

        while True:
            if rss.state == rss.STATE_UNKNOWN:
                logger.debug("state is STATE_UNKNOWN")
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_TOKEN_READ:
                logger.debug("state is STATE_TOKEN_READ")
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_TOKEN_BYTE_READ:
                logger.debug("state is STATE_TOKEN_BYTE_READ")
                # String starts with length.
                prev_state = rss.state_stack[len(rss.state_stack)-1]
                if prev_state != rss.STATE_TOKEN_READ:
                    token_len = rss.get_length_minus_1()
                    rss.read_token(token_len)
                    rss.update_state_token_read()
                else:
                    logger.warn("read token byte from token state")
                    break
            elif rss.state == rss.STATE_CONSTANT_READ:
                logger.debug("state is STATE_CONSTANT_READ")
                b = rss.get_last_byte()
                if b >= 0x10 and b < 0x3f:
                    rss.token = "${" + global_vars[b-0x10] + "}"
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_CONCAT_BYTE_READ:
                logger.debug("state is STATE_CONCAT_BYTE_READ")
                if rss.state_stack[len(rss.state_stack)-1] == rss.STATE_TOKEN_READ or \
                        rss.state_stack[len(rss.state_stack)-1] == rss.STATE_CONSTANT_READ or \
                        rss.state_stack[len(rss.state_stack)-1] == rss.STATE_RANGE_BYTE_READ or \
                        rss.state_stack[len(rss.state_stack)-1] == rss.STATE_SINGLE_BYTE_READ or \
                        rss.state_stack[len(rss.state_stack)-1] == rss.STATE_PLUS_READ:
                    rss.update_base()
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_CONCAT_SAVE_BYTE_READ:
                logger.debug("state is STATE_CONCAT_SAVE_BYTE_READ")
                if rss.state_stack[len(rss.state_stack)-1] == rss.STATE_TOKEN_READ or \
                        rss.state_stack[len(rss.state_stack)-1] == rss.STATE_CONSTANT_READ or \
                        rss.state_stack[len(rss.state_stack)-1] == rss.STATE_RANGE_BYTE_READ or \
                        rss.state_stack[len(rss.state_stack)-1] == rss.STATE_SINGLE_BYTE_READ or \
                        rss.state_stack[len(rss.state_stack)-1] == rss.STATE_PLUS_READ:
                    rss.update_base_stack()
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_END_BYTE_READ:
                logger.debug("state is STATE_END_BYTE_READ")
                rss.end_current_token()
                rss.reset_base()
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_RANGE_BYTE_READ:
                logger.debug("state is STATE_RANGE_BYTE_READ")
                rss.update_base_stack()
                b = rss.get_next_byte()
                b_array = []
                all_ascii = True
                token = ""
                for i in range(0, b+1):
                    b1 = rss.get_next_byte()
                    b2 = rss.get_next_byte()
                    if b1 < 0x20 or b1 > 0x7f or b2 < 0x20 or b2 > 0x7f:
                        all_ascii = False
                    b_array.append((b1,b2))
                if all_ascii == False:
                    (b1, b2) = b_array[0]
                    (b3, b4) = b_array[1]
                    if b2 == 0xff and b3 == 0x00:
                        if b1-1 == b4+1:    # single char exclude
                            token = "[^{:c}]".format(b1-1)
                        else:               # range exclude
                            token = "[^{:c}-{:c}]".format(b4+1, b1-1)
                    else:
                        token = "[TODO]"
                else:
                    token = "["
                    for (b1, b2) in b_array:
                        token += "{:c}-{:c}".format(b1, b2)
                    token += "]"
                rss.token = token
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_SPLIT_BYTE_READ:
                logger.debug("state is STATE_SPLIT_BYTE_READ")
                substr_len = rss.get_last_byte() - 0x7f
                substr = rss.get_substring(substr_len)
                subtokens = self.parse_byte_string(substr, global_vars)
                rss.end_with_subtokens(subtokens)
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_SINGLE_BYTE_READ:
                logger.debug("state is STATE_SINGLE_BYTE_READ")
                rss.read_token(1)
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_RESET_STRING:
                logger.debug("state is STATE_RESET_STRING")
                rss.end_current_token()
                rss.reset_base_full()
                b = rss.get_next_byte()
                rss.update_state(b)
            elif rss.state == rss.STATE_PLUS_READ:
                logger.debug("state is STATE_PLUS_READ")
                if rss.state_stack[len(rss.state_stack)-1] == rss.STATE_CONCAT_BYTE_READ:
                    rss.token = "+"
                    rss.update_base()
                else:
                    logger.warn("previous state is not concat")
                rss.read_token(1)
                b = rss.get_next_byte()
                rss.update_state(b)
            else:
                logger.warn("unknown state ({:d})".format(rss.state))
                break

            if rss.is_end():
                break

        # String must end in a STATE_END_BYTE_READ byte.
        if rss.state == rss.STATE_END_BYTE_READ:
            logger.debug("state is STATE_END_BYTE_READ")
            rss.end_current_token()
        elif rss.state == rss.STATE_UNKNOWN or rss.state == rss.STATE_CONCAT_BYTE_READ:
            pass
        elif rss.state_stack[len(rss.state_stack)-1] == rss.STATE_END_BYTE_READ:
            pass
        else:
            logger.warn("last state is not STATE_END_BYTE_READ ({:d})".format(rss.state))
            logger.warn("previous state ({:d})".format(rss.state_stack[len(rss.state_stack)-1]))

        logger.info("initial string: " + " ".join("0x{:02x}".format(ord(c)) for c in s))
        logger.info("output_strings (num: {:d}): {:s}".format(len(rss.output_strings), ",".join('"{:s}"'.format(s) for s in rss.output_strings)))
        return rss.output_strings


    def __init__(self):
        self.rss_stack = []


def main():
    s = sys.stdin.read()
    ss = SandboxString()
    my_global_vars = ["FRONT_USER_HOME", "HOME", "PROCESS_TEMP_DIR"]
    l = ss.parse_byte_string(s[4:], my_global_vars)
    print(list(set(l)))


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: reverse-sandbox/sandbox_filter.py
================================================
#!/usr/bin/env python

import struct
import re
import logging
import logging.config
import reverse_sandbox
import reverse_string

from filters import Filters


logging.config.fileConfig("logger.config")
logger = logging.getLogger(__name__)

ios_major_version = 4
keep_builtin_filters = False
global_vars = []
base_addr = 0

def get_filter_arg_string_by_offset(f, offset):
    """Extract string (literal) from given offset."""
    f.seek(base_addr + offset * 8)
    if ios_major_version >= 13:
        len = struct.unpack("<H", f.read(2))[0]
        s = f.read(len)
        logger.info("binary string is " + s.encode("hex"))
        ss = reverse_string.SandboxString()
        myss = ss.parse_byte_string(s, global_vars)
        actual_string = ""
        for sss in myss:
            actual_string = actual_string + sss + " "
        actual_string = actual_string[:-1]
        logger.info("actual string is " + actual_string)
        return myss
    len = struct.unpack("<I", f.read(4))[0]
    if ios_major_version >= 10:
        f.seek(offset * 8)
        s = f.read(4+len)
        logger.info("binary string is " + s.encode("hex"))
        ss = reverse_string.SandboxString()
        myss = ss.parse_byte_string(s[4:], global_vars)
        actual_string = ""
        for sss in myss:
            actual_string = actual_string + sss + " "
        actual_string = actual_string[:-1]
        logger.info("actual string is " + actual_string)
        return myss
    type = struct.unpack("<B", f.read(1))[0]
    return '"%s"' % f.read(len)


def get_filter_arg_string_by_offset_with_type(f, offset):
    """Extract string from given offset and consider type byte."""
    global ios_major_version
    global keep_builtin_filters
    f.seek(base_addr + offset * 8)
    if ios_major_version >= 13:
        len = struct.unpack("<H", f.read(2))[0]
        s = f.read(len)
        logger.info("binary string is " + s.encode("hex"))
        ss = reverse_string.SandboxString()
        myss = ss.parse_byte_string(s, global_vars)
        append = "literal"
        actual_string = ""
        for sss in myss:
            actual_string = actual_string + sss + " "
        actual_string = actual_string[:-1]
        logger.info("actual string is " + actual_string)
        return (append, myss)
    len = struct.unpack("<I", f.read(4))[0]
    if ios_major_version >= 10:
        f.seek(base_addr + offset * 8)
        s = f.read(4+len)
        logger.info("binary string is " + s.encode("hex"))
        ss = reverse_string.SandboxString()
        myss = ss.parse_byte_string(s[4:], global_vars)
        append = "literal"
        actual_string = ""
        for sss in myss:
            actual_string = actual_string + sss + " "
        actual_string = actual_string[:-1]
        logger.info("actual string is " + actual_string)
        return (append, myss)
    type = struct.unpack("<B", f.read(1))[0]
    append = ""
    if type == 0x00 or type == 0x04 or type == 0x02:
        append += "literal"
    elif type == 0x01 or type == 0x05:
        append += "subpath"
    elif type == 0x0c or type == 0x0e or type == 0x14 or type == 0x16:
        append += "literal-prefix"
    elif type == 0x0d or type == 0x15:
        append += "subpath-prefix"
    elif type == 0x06 or type == 0x24:
        append += "prefix"
    else:
        logger.warn("Unknown type for string type: {} at offset {}".format(type, offset * 8))
    actual_string = f.read(len)
    if actual_string == "/private/var/tmp/launchd/sock" and keep_builtin_filters == False:
        return (append, "###$$$***")
    return (append, '"%s"' % actual_string)


def get_filter_arg_string_by_offset_no_skip(f, offset):
    """Extract string from given offset and ignore type byte."""
    f.seek(base_addr + offset * 8)
    if ios_major_version >= 13:
        len = struct.unpack("<H", f.read(2))[0]-1
    else:
        len = struct.unpack("<I", f.read(4))[0]-1
    return '"%s"' % f.read(len)


def get_filter_arg_network_address(f, offset):
    """Convert 4 bytes value to network address (host and port)."""
    f.seek(base_addr + offset * 8)

    host, port = struct.unpack("<HH", f.read(4))
    host_port_string = ""
    if host == 0x1:
        proto = "ip4"
        host_port_string += "*"
    elif host == 0x2:
        proto = "ip6"
        host_port_string += "*"
    elif host == 0x3:
        proto = "ip"
        host_port_string += "*"
    elif host == 0x5:
        proto = "tcp4"
        host_port_string += "*"
    elif host == 0x6:
        proto = "tcp6"
        host_port_string += "*"
    elif host == 0x7:
        proto = "tcp"
        host_port_string += "*"
    elif host == 0x9:
        proto = "udp4"
        host_port_string += "*"
    elif host == 0xa:
        proto = "udp6"
        host_port_string += "*"
    elif host == 0xb:
        proto = "udp"
        host_port_string += "*"
    elif host == 0x101:
        proto = "ip4"
        host_port_string += "localhost"
    elif host == 0x102:
        proto = "ip6"
        host_port_string += "localhost"
    elif host == 0x103:
        proto = "ip"
        host_port_string += "localhost"
    elif host == 0x105:
        proto = "tcp4"
        host_port_string += "localhost"
    elif host == 0x106:
        proto = "tcp6"
        host_port_string += "localhost"
    elif host == 0x107:
        proto = "tcp"
        host_port_string += "localhost"
    elif host == 0x109:
        proto = "udp4"
        host_port_string += "localhost"
    elif host == 0x10a:
        proto = "udp6"
        host_port_string += "localhost"
    elif host == 0x10b:
        proto = "udp"
        host_port_string += "localhost"
    else:
        proto = "unknown"
        host_port_string += "0x%x" % host

    if port == 0:
        host_port_string += ":*"
    else:
        host_port_string += ":%d" % (port)
    return '%s "%s"' % (proto, host_port_string)


def get_filter_arg_integer(f, arg):
    """Convert integer value to decimal string representation."""
    return '%d' % arg


def get_filter_arg_octal_integer(f, arg):
    """Convert integer value to octal string representation."""
    return '#o%04o' % arg


def get_filter_arg_boolean(f, arg):
    """Convert boolean value to scheme boolean string representation."""
    if arg == 1:
        return '#t'
    else:
        return '#f'


regex_list = []
def get_filter_arg_regex_by_id(f, regex_id):
    """Get regular expression by index."""
    global keep_builtin_filters
    return_string = ""
    global regex_list
    for regex in regex_list[regex_id]:
        if re.match("^/com\\\.apple\\\.sandbox\$", regex) and keep_builtin_filters == False:
            return "###$$$***"
        return_string += ' #"%s"' % (regex)
    return return_string[1:]


def get_filter_arg_ctl(f, arg):
    """Convert integer value to IO control string."""
    letter = chr(arg >> 8)
    number = arg & 0xff
    return '(_IO "%s" %d)' % (letter, number)


def get_filter_arg_vnode_type(f, arg):
    """Convert integer to file (vnode) type string."""
    arg_types = {
            0x01: "REGULAR-FILE",
            0x02: "DIRECTORY",
            0x03: "BLOCK-DEVICE",
            0x04: "CHARACTER-DEVICE",
            0x05: "SYMLINK",
            0x06: "SOCKET",
            0x07: "FIFO",
            0xffff: "TTY"
            }
    if arg in arg_types.keys():
        return '%s' % (arg_types[arg])
    else:
        return '%d' % arg


def get_filter_arg_owner(f, arg):
    """Convert integer to process owner string."""
    arg_types = {
            0x01: "self",
            0x02: "pgrp",
            0x03: "others",
            0x04: "children",
            0x05: "same-sandbox"
            }
    if arg in arg_types.keys():
        return '%s' % (arg_types[arg])
    else:
        return '%d' % arg


def get_filter_arg_socket_domain(f, arg):
    """Convert integer to socket domain string."""
    arg_types = {
            0: "AF_UNSPEC",
            1: "AF_UNIX",
            2: "AF_INET",
            3: "AF_IMPLINK",
            4: "AF_PUP",
            5: "AF_CHAOS",
            6: "AF_NS",
            7: "AF_ISO",
            8: "AF_ECMA",
            9: "AF_DATAKIT",
            10: "AF_CCITT",
            11: "AF_SNA",
            12: "AF_DECnet",
            13: "AF_DLI",
            14: "AF_LAT",
            15: "AF_HYLINK",
            16: "AF_APPLETALK",
            17: "AF_ROUTE",
            18: "AF_LINK",
            19: "pseudo_AF_XTP",
            20: "AF_COIP",
            21: "AF_CNT",
            22: "pseudo_AF_RTIP",
            23: "AF_IPX",
            24: "AF_SIP",
            25: "pseudo_AF_PIP",
            27: "AF_NDRV",
            28: "AF_ISDN",
            29: "pseudo_AF_KEY",
            30: "AF_INET6",
            31: "AF_NATM",
            32: "AF_SYSTEM",
            33: "AF_NETBIOS",
            34: "AF_PPP",
            35: "pseudo_AF_HDRCMPLT",
            36: "AF_RESERVED_36",
            37: "AF_IEEE80211",
            38: "AF_UTUN",
            40: "AF_MAX"
            }
    if arg in arg_types.keys():
        return '%s' % (arg_types[arg])
    else:
        return '%d' % arg


def get_filter_arg_socket_type(f, arg):
    """Convert integer to socket type string."""
    arg_types = {
        0x01: "SOCK_STREAM",
        0x02: "SOCK_DGRAM",
        0x03: "SOCK_RAW",
        0x04: "SOCK_RDM",
        0x05: "SOCK_SEQPACKET"
        }
    if arg in arg_types.keys():
        return '"%s"' % (arg_types[arg])
    else:
        return '%d' % arg


def get_none(f, arg):
    """Dumb callback function"""
    return None


def get_filter_arg_privilege_id(f, arg):
    """Convert integer to privilege id string."""
    arg_types = {
            1000: "PRIV_ADJTIME",
            1001: "PRIV_PROC_UUID_POLICY",
            1002: "PRIV_GLOBAL_PROC_INFO",
            1003: "PRIV_SYSTEM_OVERRIDE",
            1004: "PRIV_HW_DEBUG_DATA",
            1005: "PRIV_SELECTIVE_FORCED_IDLE",
            1006: "PRIV_PROC_TRACE_INSPECT",
            1008: "PRIV_KERNEL_WORK_INTERNAL",
            6000: "PRIV_VM_PRESSURE",
            6001: "PRIV_VM_JETSAM",
            6002: "PRIV_VM_FOOTPRINT_LIMIT",
            10000: "PRIV_NET_PRIVILEGED_TRAFFIC_CLASS",
            10001: "PRIV_NET_PRIVILEGED_SOCKET_DELEGATE",
            10002: "PRIV_NET_INTERFACE_CONTROL",
            10003: "PRIV_NET_PRIVILEGED_NETWORK_STATISTICS",
            10004: "PRIV_NET_PRIVILEGED_NECP_POLICIES",
            10005: "PRIV_NET_RESTRICTED_AWDL",
            10006: "PRIV_NET_PRIVILEGED_NECP_MATCH",
            11000: "PRIV_NETINET_RESERVEDPORT",
            14000: "PRIV_VFS_OPEN_BY_ID",
        }
    if arg in arg_types.keys():
        return '"%s"' % (arg_types[arg])
    else:
        return '%d' % arg


def get_filter_arg_process_attribute(f, arg):
    """Convert integer to process attribute string."""
    arg_types = {
            0: 'is-plugin',
            1: 'is-installer',
            2: 'is-restricted',
            3: 'is-initproc',
        }
    if arg in arg_types.keys():
        return '%s' % (arg_types[arg])
    else:
        return '%d' % arg


def get_filter_arg_csr(f, arg):
    """Convert integer to csr string."""
    arg_types = {
            1: 'CSR_ALLOW_UNTRUSTED_KEXTS',
            2: 'CSR_ALLOW_UNRESTRICTED_FS',
            4: 'CSR_ALLOW_TASK_FOR_PID',
            8: 'CSR_ALLOW_KERNEL_DEBUGGER',
            16: 'CSR_ALLOW_APPLE_INTERNAL',
            32: 'CSR_ALLOW_UNRESTRICTED_DTRACE',
            64: 'CSR_ALLOW_UNRESTRICTED_NVRAM',
            128: 'CSR_ALLOW_DEVICE_CONFIGURATION',
        }
    if arg in arg_types.keys():
        return '"%s"' % (arg_types[arg])
    else:
        return '%d' % arg


def get_filter_arg_host_port(f, arg):
    """Convert integer to host special port string."""
    arg_types = {
            8: 'HOST_DYNAMIC_PAGER_PORT',
            9: 'HOST_AUDIT_CONTROL_PORT',
            10: 'HOST_USER_NOTIFICATION_PORT',
            11: 'HOST_AUTOMOUNTD_PORT',
            12: 'HOST_LOCKD_PORT',
            13: 'unknown: 13',
            14: 'HOST_SEATBELT_PORT',
            15: 'HOST_KEXTD_PORT',
            16: 'HOST_CHUD_PORT',
            17: 'HOST_UNFREED_PORT',
            18: 'HOST_AMFID_PORT',
            19: 'HOST_GSSD_PORT',
            20: 'HOST_TELEMETRY_PORT',
            21: 'HOST_ATM_NOTIFICATION_PORT',
            22: 'HOST_COALITION_PORT',
            23: 'HOST_SYSDIAGNOSE_PORT',
            24: 'HOST_XPC_EXCEPTION_PORT',
            25: 'HOST_CONTAINERD_PORT',
        }
    if arg in arg_types.keys():
        return '"%s"' % (arg_types[arg])
    else:
        return '%d' % arg


"""An array (dictionary) of filter converting items

A filter is identied by a filter id and a filter argument. They are
both stored in binary format (numbers) inside the binary sandbox
profile file.

Each item in the dictionary is identied by the filter id (used in
hexadecimal). The value of each item is the string form of the filter id
and the callback function used to convert the binary form the filter
argument to a string form.

While there is a one-to-one mapping between the binary form and the
string form of the filter id, that is not the case for the filter
argument. To convert the binary form of the filter argument to its
string form we use one of the callback functions above; almost all
callback function names start with get_filter_arg_.
"""

def convert_filter_callback(f, ios_major_version_arg, keep_builtin_filters_arg,
        global_vars_arg, re_list, filter_id, filter_arg, base_addr_arg):
    """Convert filter from binary form to string.

    Binary form consists of filter id and filter argument:
      * filter id is the index inside the filters array above
      * filter argument is an actual parameter (such as a port number),
        a file offset or a regular expression index

    The string form consists of the name of the filter (as extracted
    from the filters array above) and a string representation of the
    filter argument. The string form of the filter argument if obtained
    from the binary form through the use of the callback function (as
    extracted frm the filters array above).

    Function arguments are:
      f: the binary sandbox profile file
      regex_list: list of regular expressions
      filter_id: the binary form of the filter id
      filter_arg: the binary form of the filter argument
    """

    global regex_list
    global ios_major_version
    global keep_builtin_filters
    global global_vars
    global base_addr
    keep_builtin_filters = keep_builtin_filters_arg
    ios_major_version = ios_major_version_arg
    global_vars = global_vars_arg
    regex_list = re_list
    base_addr = base_addr_arg

    if not Filters.exists(ios_major_version, filter_id):
        logger.warn("filter_id {} not in keys".format(filter_id))
        return (None, None)
    filter = Filters.get(ios_major_version, filter_id)
    if not filter["arg_process_fn"]:
        logger.warn("no function for filter {}".format(filter_id))
        return (None, None)
    if filter["arg_process_fn"] == "get_filter_arg_string_by_offset_with_type":
        (append, result) = globals()[filter["arg_process_fn"]](f, filter_arg)
        if filter_id == 0x01 and append == "path":
            append = "subpath"
        if result == None and filter["name"] != "debug-mode":
            logger.warn("result of calling string offset for filter {} is none".format(filter_id))
            return (None, None)
        return (filter["name"] + append, result)
    result = globals()[filter["arg_process_fn"]](f, filter_arg)
    if result == None and not ((filter["name"] in ["debug-mode", "syscall-mask", "machtrap-mask", "kernel-mig-routine-mask"]) or
            (filter["name"] in ["extension", "mach-extension"]
                and ios_major_version <= 5)):
        logger.warn("result of calling arg_process_fn for filter {} is none".format(filter_id))
        return (None, None)
    return (filter["name"], result)


================================================
FILE: reverse-sandbox/sandbox_regex.py
================================================
#!/usr/bin/env python3

import struct
import logging
import logging.config

logging.config.fileConfig("logger.config")
logger = logging.getLogger(__name__)

from regex_parser_v1 import RegexParser as RegexParserV1
from regex_parser_v2 import RegexParser as RegexParserV2
from regex_parser_v3 import RegexParser as RegexParserV3

class Node():
    """Representation of a node inside a regex non-deterministic automaton

    The most important attribute is the node type, which may be any of
    the four macros TYPE_... below.
    """

    TYPE_JUMP_FORWARD = 1
    TYPE_JUMP_BACKWARD = 2
    TYPE_CHARACTER = 3
    TYPE_END = 4
    FLAG_WHITE = 1
    FLAG_GREY = 2
    FLAG_BLACK = 3
    name = ""
    type = None
    value = None
    flag = "white"

    def __init__(self, name=None, type=None, value=''):
        self.name = name
        self.type = type
        self.value = value
        self.flag = self.FLAG_WHITE

    def set_name(self, name):
        self.name = name

    def set_type_jump_forward(self):
        self.type = self.TYPE_JUMP_FORWARD

    def set_type_jump_backward(self):
        self.type = self.TYPE_JUMP_BACKWARD

    def set_type_character(self):
        self.type = self.TYPE_CHARACTER

    def set_type_end(self):
        self.type = self.TYPE_END

    def is_type_end(self):
        return self.type == self.TYPE_END

    def is_type_jump(self):
        return self.type == self.TYPE_JUMP_BACKWARD or self.type == self.TYPE_JUMP_FORWARD

    def is_type_jump_backward(self):
        return self.type == self.TYPE_JUMP_BACKWARD

    def is_type_jump_forward(self):
        return self.type == self.TYPE_JUMP_FORWARD

    def is_type_character(self):
        return self.type == self.TYPE_CHARACTER

    def set_value(self, value):
        self.value = value

    def set_flag_white(self):
        self.flag = self.FLAG_WHITE

    def set_flag_grey(self):
        self.flag = self.FLAG_GREY

    def set_flag_black(self):
        self.flag = self.FLAG_BLACK

    def __str__(self):
        if self.type == self.TYPE_JUMP_BACKWARD:
            return "(%s: jump backward)" % (self.name)
        elif self.type == self.TYPE_JUMP_FORWARD:
            return "(%s: jump forward)" % (self.name)
        elif self.type == self.TYPE_END:
            return "(%s: end)" % (self.name)
        else:
            return "(%s: %s)" % (self.name, self.value)


class Graph():
    """Representation of a regex NDA (Non-Deterministic Automaton)

    Use this class to convert a regex list of items into its canonical
    regular expression string.
    """

    graph_dict = {}
    canon_graph_dict = {}
    node_list = []
    start_node = None
    end_states = []
    start_state = 0
    regex = []
    unified_regex = ""

    def __init__(self):
        self.graph_dict = {}

    def add_node(self, node, next_list=None):
        self.graph_dict[node] = next_list

    def has_node(self, node):
        return node in graph_dict.keys()

    def update_node(self, node, next_list):
        self.graph_dict[node] = next_list

    def add_new_next_to_node(self, node, next):
        self.graph_dict[node].append(next)

    def __str__(self):
        # Get maximum node number.
        max = -1
        for node in self.graph_dict.keys():
            if max < int(node.name):
                max = int(node.name)

        # Create graph list for ordered listing of nodes.
        graph_list = [None] * (max+1)
        for node in self.graph_dict.keys():
            actual_string = str(node) + ":"
            for next_node in self.graph_dict[node]:
                actual_string += " " + str(next_node)
            graph_list[int(node.name)] = actual_string

        # Store node graph in ret_string.
        ret_string = "\n-- Node graph --\n"
        for s in graph_list:
            if s:
                ret_string += s + "\n"

        # Store canonical graph in ret_string.
        ret_string += "\n-- Canonical graph --\n"
        for state in self.canon_graph_dict.keys():
            if state == self.start_state:
                ret_string += "> "
            elif state in self.end_states:
                ret_string += "# "
            else:
                ret_string += "  "
            ret_string += "%d: %s\n" % (state, self.canon_graph_dict[state])
        ret_string += "\n"
        return ret_string

    def get_node_for_idx(self, idx):
        if idx >= len(self.node_list) or idx < 0:
            return None
        return self.node_list[idx]

    def get_re_index_for_pos(self, regex_list, pos):
        for idx, item in enumerate(regex_list):
            if item["pos"] == pos:
                return idx
        for idx, item in enumerate(regex_list):
            if item["pos"]-1 == pos:
                return idx
        return -1

    def get_next_idx_for_regex_item(self, regex_list, regex_item):
        result = self.get_re_index_for_pos(regex_list, regex_item["nextpos"])
        assert(result >= 0)
        return result

    def fill_from_regex_list(self, regex_list):
        # First create list of nodes. No pointers/links at this point.
        # Create a node for each item.
        self.node_list = []
        for idx, item in enumerate(regex_list):
            node = Node(name="%s" % (idx))
            if item["type"] == "jump_backward":
                node.set_type_jump_backward()
            elif item["type"] == "jump_forward":
                node.set_type_jump_forward()
            elif item["type"] == "end":
                node.set_type_end()
            else:
                node.set_type_character()
                node.set_value(item["value"])

            if 'start_node' in item and item['start_node'] == True:
                assert(self.start_node == None)
                self.start_node = node
            self.node_list.append(node)

        self.graph_dict = {}
        for idx, node in enumerate(self.node_list):
            # If node is end node ignore.
            if node.is_type_end():
                 self.graph_dict[node] = []
            elif node.is_type_character():
                next = self.get_node_for_idx(
                    self.get_next_idx_for_regex_item(regex_list, regex_list[idx]))
                if next:
                    self.graph_dict[node] = [ next ]
                else:
                    self.graph_dict[node] = []
            # Node is jump node.
            elif node.is_type_jump_backward():
                next_idx = self.get_re_index_for_pos(regex_list, regex_list[idx]["value"])
                next = self.get_node_for_idx(next_idx)
                if next:
                    self.graph_dict[node] = [next]
                else:
                    self.graph_dict[node] = []
            elif node.is_type_jump_forward():
                next_idx1 = self.get_next_idx_for_regex_item(
                    regex_list, regex_list[idx])
                next_idx2 = self.get_re_index_for_pos(regex_list, regex_list[idx]["value"])
                next1 = self.get_node_for_idx(next_idx1)
                next2 = self.get_node_for_idx(next_idx2)
                self.graph_dict[node] = []
                if next1:
                    self.graph_dict[node].append(next1)
                if next2:
                    self.graph_dict[node].append(next2)

    def get_character_nodes(self, node):
        node_list = []
        for next in self.graph_dict[node]:
            if next.is_type_character() or next.is_type_end():
                node_list.append(next)
            else:
                node_list = list(set(node_list).union(self.get_character_nodes(next)))
        return node_list

    def find_node_type_jump(self, current_node, node, backup_dict):
        if not current_node.is_type_jump():
            return False
        if current_node == node:
            return True
        if not self.graph_dict[current_node]:
            return False
        for next_node in backup_dict[current_node]:
            if self.find_node_type_jump(next_node, node, backup_dict):
                return True
        return False

    def reduce(self):
        for node in self.graph_dict.keys():
            if node.is_type_character():
                self.graph_dict[node] = self.get_character_nodes(node)
        old_dict = dict(self.graph_dict)
        backup_dict = dict(self.graph_dict)
        for node in old_dict.keys():
            if node.is_type_jump():
                if self.find_node_type_jump(self.start_node,
                        node, backup_dict):
                    continue
                del self.graph_dict[node]

    def get_edges(self, node):
        edges = []
        is_end_state = False
        for next in self.graph_dict[node]:
            if next.is_type_end():
                is_end_state = True
            else:
                edges.append((next.value, int(next.name)))
        return is_end_state, edges

    def convert_to_canonical(self):
        self.end_states = []
        for node in self.graph_dict.keys():
            if node.is_type_end():
                continue
            state_idx = int(node.name)
            is_end_state, self.canon_graph_dict[state_idx] = self.get_edges(node)
            if is_end_state == True:
                self.end_states.append(state_idx)
        for node in self.graph_dict.keys():
            if node.name == "0":
                self.start_state = -1
                self.canon_graph_dict[-1] = [ (node.value, 0) ]
        logger.debug(self.canon_graph_dict)
        logger.debug("end_states:")
        logger.debug(self.end_states)
        logger.debug("start_state:")
        logger.debug(self.start_state)

    def need_use_plus(self, initial_string, string_to_add):
        if not string_to_add.endswith("*"):
            return False

        if string_to_add.startswith("(") and string_to_add[-2:-1] == ")":
            actual_part = string_to_add[1:-2]
        else:
            actual_part = string_to_add[:-1]
        if initial_string.endswith(actual_part):
            return True
        if initial_string.endswith(string_to_add):
            return True

        return False

    def unify_two_strings(self, s1, s2):
        # Find largest common starting substring.
        lcss = ""
        for i in range(1, len(s1)+1):
            if s2.find(s1[:i], 0, i) != -1:
                lcss = s1[:i]
        if lcss:
            s1 = s1[len(lcss):]
            s2 = s2[len(lcss):]
        # Find largest common ending substring.
        lces = ""
        for i in range(1, len(s1)+1):
            if s2.find(s1[-i:], len(s2)-i, len(s2)) != -1:
                lces = s1[-i:]
        if lces:
            s1 = s1[:len(s1)-len(lces)]
            s2 = s2[:len(s2)-len(lces)]

        if not s1 and not s2:
            return lcss + lces

        if s1 and s2:
            return lcss + "(" + s1 + "|" + s2 + ")" + lces

        # Make s1 the empty string.
        if not s2:
            aux = s1
            s1 = s2
            s2 = aux

        if s2[-1] == '+':
            s2 = s2[:-1] + '*'
        else:
            if len(s2) > 1:
                s2 = "(" + s2 + ")?"
            else:
                s2 = s2 + '?'

        return lcss + s2 + lces

    def unify_strings(self, string_list):
        unified = ""
        if not string_list:
            return None
        if len(string_list) == 1:
            return string_list[0]
        # We now know we have multiple strings. Merge two at a time.
        current = string_list[0]
        for s in string_list[1:]:
            current = self.unify_two_strings(current, s)
        return current

    def remove_state(self, state_to_remove):
        itself_string = ""
        for (next_string, next_state) in self.canon_graph_dict[state_to_remove]:
            if next_state == state_to_remove:
                if len(next_string) > 1:
                    itself_string = "(%s)*" % next_string
                else:
                    itself_string = "%s*" % next_string

        # Create list of to_strings indexed by to_states.
        to_strings = {}
        for to_state in self.canon_graph_dict.keys():
            to_strings[to_state] = []
            if to_state == state_to_remove:
                continue
            for (iter_to_string, iter_to_state) in self.canon_graph_dict[state_to_remove]:
                if iter_to_state == to_state:
                    to_strings[to_state].append(iter_to_string)

        # Unify multiple strings leading to the same to_state.
        unified_to_string = {}
        for to_state in to_strings.keys():
            unified_to_string[to_state] = self.unify_strings(to_strings[to_state])

        # Go through all graph edges.
        for from_state in self.canon_graph_dict.keys():
            # Pass current state to remove.
            if from_state == state_to_remove:
                continue
            items_to_remove_list = []
            for (next_string, next_state) in self.canon_graph_dict[from_state]:
                # Only if edge points to state_to_remove.
                if next_state != state_to_remove:
                    continue
                # Plan edge to remove. Create new edge bypassing state_to_remove.
                items_to_remove_list.append((next_string, next_state))
                for to_state in self.canon_graph_dict.keys():
                    if len(to_strings[to_state]) == 0:
                        continue
                    to_string = unified_to_string[to_state]
                #for (to_string, to_state) in self.canon_graph_dict[state_to_remove]:
                #    # If state points to itself, do not add edge.
                #    if to_state == state_to_remove:
                #        continue
                    # Add new edge, consider if state points to itself.
                    if self.need_use_plus(next_string, itself_string):
                        self.canon_graph_dict[from_state].append((next_string + "+" + to_string, to_state))
                        continue
                    self.canon_graph_dict[from_state].append((next_string + itself_string + to_string, to_state))
            for (next_string, next_state) in items_to_remove_list:
                self.canon_graph_dict[from_state].remove((next_string, next_state))

        del self.canon_graph_dict[state_to_remove]

    def simplify(self):
        tmp_dict = dict(self.canon_graph_dict)
        for state in tmp_dict.keys():
            if state != self.start_state and state not in self.end_states:
                self.remove_state(state)

    def combine_start_end_nodes(self):
        working_strings = self.canon_graph_dict[self.start_state]
        final_strings = []
        string_added = True
        while string_added == True:
            string_added = False
            initial_strings = working_strings
            working_strings = []
            for (start_string, start_next_state) in initial_strings:
                if not start_next_state in self.end_states:
                    continue
                if self.canon_graph_dict[start_next_state]:
                    for (next_string, next_state) in self.canon_graph_dict[start_next_state]:
                        if next_state == start_next_state:
                            next_string = "(%s)*" % next_string
                            if self.need_use_plus(start_string, next_string):
                                final_strings.append((start_string + "+", None))
                            else:
                                final_strings.append((start_string + next_string, None))
                        else:
                            final_strings.append((start_string + next_string, None))
                            working_strings.append((start_string + next_string, next_state))
                else:
                    final_strings.append((start_string, None))
                string_added = True
        self.regex = [x[0] for x in final_strings]
        self.unified_regex = self.unify_strings(self.regex)


def create_regex_list(re):
    """Convert binary regex to list of items.

    Each item stores character position inside the binary regex (useful
    for jumps), character type and the value (either character or
    jump offset).
    """

    regex_list = []

    version = struct.unpack('>I', ''.join([chr(x) for x in re[:4]]))[0]
    logger.debug("re.version: 0x%x", version)

    i = 4
    if version == 1:
        RegexParserV1.parse(re, i, regex_list)
    elif version == 2:
        RegexParserV2.parse(re, i, regex_list)
    elif version == 3:
        RegexParserV3.parse(re, i, regex_list)
    else:
        logger.critical("No parser available for regex version {:x}".format(version))


    return regex_list


def parse_regex(re):
    """Parse binary form for regular expression into canonical string.

    The input binary format is the one stored in the sandbox profile
    file. The out format is a canonical regular expression string using
    standard ASCII characters and metacharacters such as ^, $, +, *, etc.
    """

    regex_list = create_regex_list(re)
    g = Graph()
    g.fill_from_regex_list(regex_list)
    g.reduce()
    g.convert_to_canonical()
    g.simplify()
    g.combine_start_end_nodes()
    logger.debug(g)
    return g.regex


import sys
import struct


def main():
    """Parse regular expressions in binary file."""

    if len(sys.argv) != 2:
        print >> sys.stderr, "Usage: %s <regex-binary-file>" % (sys.argv[0])
        sys.exit(1)

    with open(sys.argv[1]) as f:
        re_count = struct.unpack("<H", f.read(2))[0]
        for i in range(re_count):
            re_length = struct.unpack("<I", f.read(4))[0]
            re = struct.unpack("<%dB" % re_length, f.read(re_length))
            logger.info("total_re_length: 0x%x", re_length)
            re_debug_str = "re: [", ", ".join([hex(i) for i in re]), "]"
            logger.info(re_debug_str)
            print(parse_regex(re))


if __name__ == "__main__":
    sys.exit(main())