Repository: malus-security/sandblaster Branch: master Commit: d417bf90c9b2 Files: 31 Total size: 242.8 KB Directory structure: gitextract_1fgfkynp/ ├── .github/ │ └── workflows/ │ ├── config/ │ │ └── config.json │ ├── linter.yml │ └── rules/ │ ├── common/ │ │ ├── inlineTokenChildren.js │ │ └── wordPattern.js │ ├── md101.js │ ├── md102.js │ ├── md103.js │ ├── md104.js │ └── rules.js ├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── helpers/ │ └── extract_sandbox_data.py └── reverse-sandbox/ ├── filters/ │ ├── filters_ios11.json │ ├── filters_ios12.json │ ├── filters_ios13.json │ ├── filters_ios14.json │ ├── filters_ios4.json │ ├── filters_ios5.json │ └── filters_ios6.json ├── filters.py ├── logger.config ├── operation_node.py ├── regex_parser_v1.py ├── regex_parser_v2.py ├── regex_parser_v3.py ├── reverse_sandbox.py ├── reverse_string.py ├── sandbox_filter.py └── sandbox_regex.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/config/config.json ================================================ { "default": true, "MD048": { "style": "backtick" }, "MD046": { "style": "fenced" }, "MD029": { "style": "one" }, "line-length": false, "no-hard-tabs": false } ================================================ FILE: .github/workflows/linter.yml ================================================ name: Linter on: [push, pull_request] jobs: superlinter: name: Super Linter runs-on: ubuntu-latest steps: - name: Checkout Code uses: actions/checkout@v3 with: # Full git history is needed to get a proper list of changed files within `super-linter` fetch-depth: 0 - name: Lint Code Base uses: github/super-linter@v4 env: # Don't check already existent files VALIDATE_ALL_CODEBASE: false VALIDATE_GITHUB_ACTIONS: false LINTER_RULES_PATH: /.github/workflows/ MARKDOWN_CONFIG_FILE: config/config.json MARKDOWN_CUSTOM_RULE_GLOBS: rules/rules.js DEFAULT_BRANCH: main GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/rules/common/inlineTokenChildren.js ================================================ class InlineTokenChildren { constructor(token) { if (token.type === "inline") { this.root = token; this.column = -1; this.lineNumber = token.map[0]; } else { throw new TypeError("wrong argument token type"); } } *[Symbol.iterator]() { for (let token of this.root.children) { let { line, lineNumber } = token; if (this.lineNumber !== lineNumber) { this.column = -1; this.lineNumber = lineNumber; } this.column = line.indexOf(token.content, this.column + 1); yield { token, column: this.column + 1, lineNumber }; } } } module.exports = { InlineTokenChildren }; ================================================ FILE: .github/workflows/rules/common/wordPattern.js ================================================ class WordPattern { constructor(pattern, parameters) { const escapedDots = pattern.replace(/\\?\./g, "\\."); this.pattern = parameters && parameters.hasOwnProperty('noWordBoundary') ? escapedDots : "\\b" + escapedDots + "\\b"; const modifiers = parameters && parameters.hasOwnProperty('caseSensitive') && parameters.caseSensitive ? "" : "i"; this.regex = new RegExp(this.pattern, modifiers); this.suggestion = parameters && parameters.hasOwnProperty('suggestion') ? parameters.suggestion : pattern; this.stringRegex = new RegExp("^" + escapedDots + "$", modifiers); // To match "Category" column words in changelogs, see case-sensitive.js this.skipForUseCases = !!(parameters && parameters.hasOwnProperty('skipForUseCases')); } test(line) { return new Match(line.match(this.regex)); } } class Match { constructor(match) { this.match = match; } range() { if (this.match) { let column = this.match.index + 1; let length = this.match[0].length; if (this.match[2]) { column += this.match[1].length; length -= this.match[1].length; } return [column, length]; } return null; } toString() { return this.match ? this.match.toString() : "null"; } } module.exports = { WordPattern }; ================================================ FILE: .github/workflows/rules/md101.js ================================================ const { InlineTokenChildren } = require("./common/inlineTokenChildren"); const { WordPattern } = require("./common/wordPattern"); const keywords = [ new WordPattern("iExtractor-manager"), new WordPattern("device-info"), new WordPattern("device-name"), new WordPattern("list_apps"), new WordPattern("decrypt_kcache"), new WordPattern("decrypt_fs"), new WordPattern("curl"), new WordPattern("wget"), new WordPattern("crontab"), new WordPattern("cron"), new WordPattern("netcat"), new WordPattern("ping"), new WordPattern("traceroute"), new WordPattern("sudo"), new WordPattern("(? { var inHeading = false; var inLink = false; for (let token of params.tokens) { switch (token.type) { case "heading_open": inHeading = true; break; case "heading_close": inHeading = false; break; case "inline": let children = new InlineTokenChildren(token); for (let { token: child, column, lineNumber } of children) { let isText = child.type === "text"; switch (child.type) { case "link_open": inLink = true; break; case "link_close": inLink = false; break; } for (let k of keywords) { let anyCaseMatch = child.content.match(k.regex); if (anyCaseMatch != null) { let match = anyCaseMatch[0]; let correct = k.suggestion; if ((!inHeading && !inLink && isText) || // Bad not fenced (match !== correct)) { // Right fencing, wrong case onError({ lineNumber, detail: `Expected \`${correct}\`. Actual ${match}.`, range: [column + anyCaseMatch.index, match.length] }) } } } } } } } }; ================================================ FILE: .github/workflows/rules/md102.js ================================================ const http_keywords = [ "GET", "POST", "PUT", "PATCH", "DELETE", "Content-Type", "Content-Encoding", "User-Agent", "200 OK", "401 Unauthorized", "403 Forbidden", "API_DATA_READ", "API_DATA_WRITE", "API_META_READ", "API_META_WRITE", "USER", "EDITOR", "ENTITY_GROUP_ADMIN", "ADMIN" ]; const keywordsRegex = new RegExp(http_keywords.map(word => "\\b" + word + "\\b").join("|")); const { InlineTokenChildren } = require("./common/inlineTokenChildren"); module.exports = { names: ["MD102", "backtick-http"], description: "HTTP keywords must be fenced.", tags: ["backtick", "HTTP", "HTTPS"], "function": (params, onError) => { var inHeading = false; for (let token of params.tokens) { switch (token.type) { case "heading_open": inHeading = true; break; case "heading_close": inHeading = false; break; case "inline": if (!inHeading) { let children = new InlineTokenChildren(token); for (let { token: child, column, lineNumber } of children) { if (child.type === "text") { let exactCaseMatch = child.content.match(keywordsRegex); if (exactCaseMatch != null) { let match = exactCaseMatch[0]; onError({ lineNumber, detail: `Expected \`${match}\`. Actual ${match}.`, range: [column + exactCaseMatch.index, match.length] }) } } } } } } } }; ================================================ FILE: .github/workflows/rules/md103.js ================================================ "use strict"; module.exports = { "names": [ "MD103", "inline triple backticks" ], "description": "inline triple backticks", "tags": [ "backticks" ], "function": function rule(params, onError) { for (const inline of params.tokens.filter(function filterToken(token) { return token.type === "inline"; })) { const index = inline.content.toLowerCase().indexOf("```"); if (index !== -1) { onError({ "lineNumber": inline.lineNumber, "context": inline.content.substr(index - 1, 4), "detail": "Expected `. Actual ```" }); } } } }; ================================================ FILE: .github/workflows/rules/md104.js ================================================ "use strict"; module.exports = { names: ["MD104", "one line per sentence"], description: "one line (and only one line) per sentence", tags: ["sentences"], function: function rule(params, onError) { for (const inline of params.tokens.filter(function filterToken(token) { return token.type === "inline"; })) { var actual_lines = inline.content.split("\n"); actual_lines.forEach((line, index, arr) => { let outside = true; let count = 0; Array.from(line).forEach((char) => { if ((char == "." || char == "?" || char == "!" || char == ";" || char == ":") && outside) { count++; } if (char == "`") outside = !outside; if (char == "[") outside = false; if (char == "(") outside = false; if (char == "]") outside = true; if (char == ")") outside = true; }); if (count > 1) { onError({ lineNumber: inline.lineNumber + index, detail: "Expected one sentence per line. Multiple end of sentence punctuation signs found on one line!", }); } }); } }, }; ================================================ FILE: .github/workflows/rules/rules.js ================================================ "use strict"; const rules = [ require("./md101.js"), require("./md102.js"), require("./md103.js"), require("./md104.js"), ]; module.exports = rules; ================================================ FILE: .gitignore ================================================ *~ *.o *.zip *.rar *.tar *gz *bz2 *.obj *.a *.so *.lib *.dll *.swp *.swo tags TAGS *.exe *.class *.jar *.pyc *.log *.bin core .DS_STORE ================================================ FILE: .gitmodules ================================================ ================================================ FILE: LICENSE ================================================ BSD 3-Clause License Copyright (c) 2016, North Carolina State University and University POLITEHNICA of Bucharest. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # SandBlaster: Reversing the Apple Sandbox SandBlaster is a tool for reversing (decompiling) binary Apple sandbox profiles. Apple sandbox profiles are written in SBPL (*Sandbox Profile Language*), a Scheme-like language, and are then compiled into an undocumented binary format and shipped. Primarily used on iOS, sandbox profiles are present on macOS as well. SandBlaster is, to our knowledge, the first tool that reverses binary sandbox profiles to their original SBPL format. SandBlaster works on iOS from version 7 onwards including iOS 11. The technical report [SandBlaster: Reversing the Apple Sandbox](https://arxiv.org/abs/1608.04303) presents extensive (though a bit outdated) information on SandBlaster internals. SandBlaster relied on previous work by [Dionysus Blazakis](https://github.com/dionthegod/XNUSandbox) and Stefan Esser's [code](https://github.com/sektioneins/sandbox_toolkit) and [slides](https://www.slideshare.net/i0n1c/ruxcon-2014-stefan-esser-ios8-containers-sandboxes-and-entitlements). The reverser (in the `reverse-sandbox/` folder) and the helper tool (in the `helpers/` folder) run on any Python running platform. SandBlaster may be installed and run standalone, though we recommend installing and running it from within [iExtractor](https://github.com/malus-security/iExtractor). Check the [iExtractor documentation](https://github.com/malus-security/iExtractor/blob/master/README.md) for information. iExtractor is open source software released under the 3-clause BSD license. ## Installation SandBlaster requires Python2 for the reverser (in `reverse-sandbox/`), Python3 with `lief` library for helper script (in `helpers/`). After cloning the SandBlaster repository, you have to install `lief` for Python3: ``` pip3 install lief ``` If the installation of `lief` fails you need compile to it. More information about how to compile it can be found on the [wiki page](https://lief.quarkslab.com/doc/stable/compilation.html). ## Usage In order to use SandBlaster you need access to the binary sandbox profiles and the sandbox operations, a set of strings that define sandbox-specific actions. Sandbox operations and sandbox profiles are extracted using the `helpers/extract_sandbox_data.py` script. Sandbox profiles are extracted from the kernel sandbox extension (as a bundle for iOS 4 and 9-11) or from kernel cache (as a bundle for iOS 12) or from the `sandboxd` file in the iOS filesystem (for iOS 5-8). Sandbox operations are extracted either from kernel extension (for iOS 4-11) or from kernel cache (for iOS 12). So, as input data, SandBlaster requires the kernelcache, the kernel sandbox extension and the `sandboxd` file. Information and scripts on extracting them from a publicly available IPSW (*iPhone Software*) file is presented by [iExtractor](https://github.com/malus-security/iExtractor). Below are the steps and commands to reverse the sandbox profiles for iOS 8.4.1, assuming the sandbox kernel extension (`com.apple.security.sandbox.kext`) and the `sandboxd` file are available: ``` # Extract sandbox operations from kernelcache. cd helpers/ ./extract_sandbox_data.py -o iPad2,1_8.4.1_12H321.sb_ops iPad2,1_8.4.1_12H321.com.apple.security.sandox.kext 8.4.1 # Extract binary sandbox profile files from sandboxd. mkdir iPad2,1_8.4.1_12H321.sandbox_profiles ./extract_sandbox_data.py -O iPad2,1_8.4.1_12H321.sandbox_profiles/ iPad2,1_8.4.1_12H321.sandboxd 8.4.1 # Reverse all binary sandbox profiles. cd ../reverse-sandbox/ mkdir iPad2,1_8.4.1_12H321.reversed_profiles for i in ../helpers/iPad2,1_8.4.1_12H321.sandbox_profiles/*; do python reverse_sandbox.py -r 8.4.1 -o ../helpers/iPad2,1_8.4.1_12H321.sb_ops -d iPad2,1_8.4.1_12H321.reversed_profiles/ "$i"; done ``` Below are the steps and commands to reverse the sandbox profiles for iOS 9.3, assuming the sandbox kernel extension (`com.apple.security.sandbox.kext`) is available: ``` # Extract sandbox operations from kernelcache. cd helpers/ ./extract_sandbox_data.py -o iPhone5,1_9.3_13E237.sb_ops iPhone5,1_9.3_13E237.com.apple.security.sandox.kext 9.3 # Extract sandbox profile bundle from kernel sandbox extension. ./extract_sandbox_data.py -O . iPhone5,1_9.3_13E237.com.apple.security.sandox.kext 9.3 cd ../reverse-sandbox/ # Reverse all binary sandbox profiles in sandbox bundle. mkdir iPhone5,1_9.3_13E237.reversed_profiles # Print all sandbox profiles in bundle. python reverse_sandbox.py -r 9.3 -o ../helpers/iPhone5,1_9.3_13E237.sb_ops -d iPhone5,1_9.3_13E237.reversed_profiles/ ../helpers/sandbox_bundle -psb # Do actual reversing. python reverse_sandbox.py -r 9.3 -o ../helpers/iPhone5,1_9.3_13E237.sb_ops -d iPhone5,1_9.3_13E237.reversed_profiles/ ../helpers/sandbox_bundle ``` The extraction of the binary sandbox profiles differs between iOS <= 8 and iOS >= 9. Since iOS >= 9 the binary sandbox profiles are stored in a sandbox bundle in the kernel sandbox extension. The `helpers/extract_sandbox_data.py` script extracts them appropriately depending on the iOS version. The `-psb` option for `reverse_sandbox.py` prints out the sandbox profiles part of a sandbox bundle without doing the actual reversing. The `reverse_sandbox.py` script needs to be run in its directory (`reverse-sandbox/`) since it needs the other Python modules and the `logger.config` file. ## Internals The `helpers/` subfolder contains helper scripts that provide a nicer interface for the external tools. The actual reverser is part of the `reverse-sandbox/` folder. Files here can be categorized as follows: * The main script is `reverse_sandbox.py`. It parses the command line arguments, does basic parsing of the input binary file (extracts sections) and calls the appropriate functions from the other modules. * The core of the implementation is `operation_node.py`. It provides functions to build the rules graph corresponding to the sandbox profile and to convert the graph to SBPL. It is called by `reverse_sandbox.py`. * Sandbox filters (i.e. match rules inside sandbox profiles) are handled by the implementation in `sandbox_filter.py` and the configuration in `filters.json`, `filter_list.py` and `filters.py`. Filter specific functions are called by `operation_node.py`. * Regular expression reversing is handled by `sandbox_regex.py` and `regex_parse.py`. `regex_parse.py` is the back end parser that converts the binary representation to a basic graph. `sandbox_regex.py` converts the graph representation (an automaton) to an actual regular expression (i.e. a string of characters and metacharacters). It is called by `reverse_sandbox.py` for parsing regular expressions, with the resulting regular expression list being passed to the functions exposed by `operation_node.py`; `operation_node.py` passes them on to sandbox filter handling files. * The new format for storing strings since iOS 10 is handled by `reverse_string.py`. The primary `SandboxString` class in `reverse_string.py` is used in `sandbox_filter.py`. * Logging is configured in the `logger.config` file. By default, `INFO` and higher level messages are printed to the console, while `DEBUG` and higher level messages are printed to the `reverse.log` file. ## Supported iOS Versions SandBlaster works for iOS version 4 onwards including iOS 12. Apple has been making updates to the binary format of the sandbox profiles: since iOS 9 sandbox profiles are stored in a bundle, since iOS 10 strings are aggregated together in a specialied binary format. iOS 11 didn't bring any change to the format. ## Community Join us on [Discord](https://discord.gg/m3gjuyHYw9) for live discussions. ================================================ FILE: helpers/extract_sandbox_data.py ================================================ #!/usr/bin/env python3 import sys import argparse import struct import lief CSTRING_SECTION = '__cstring' CONST_SECTION = '__const' DATA_SECTION = '__data' def binary_get_word_size(binary: lief.MachO.Binary): """Gets the word size of the given binary The Mach-O binary has 'magic' bytes. These bytes can be used for checking whether the binary is 32bit or 64bit. Note: iOS 4 and 5 are different to the other sandbox profiles as they have no magic values. Args: binary: A sandbox profile in its binary form. Returns: 4: for 32bit MachO binaries 8: for 64bit MachO binaries """ assert (binary.header.magic in [lief.MachO.MACHO_TYPES.MAGIC, lief.MachO.MACHO_TYPES.MAGIC_64]) return 4 if binary.header.magic == lief.MachO.MACHO_TYPES.MAGIC else 8 def unpack(bytes_list): """Unpacks bytes The information is stored as little endian so '<' is needed. For 32bit 'I' is needed and for 64bit 'Q'. Args: bytes_list: A packed list of bytes. Returns: The unpacked 'higher-order' equivalent. """ if len(bytes_list) == 4: return struct.unpack(' 0: vaddr_str = str_sect.virtual_address + strs[0] xref_vaddrs = get_xref(binary, vaddr_str) if len(xref_vaddrs) > 0: sects = [binary.section_from_virtual_address(x) for x in xref_vaddrs] sects = [s for s in sects if 'const' in s.name.lower()] assert len(sects) >= 1 and all([sects[0] == s for s in sects]) return sects[0] seg = binary.get_segment('__DATA') if seg: sects = [s for s in seg.sections if s.name == CONST_SECTION] assert len(sects) <= 1 if len(sects) == 1: return sects[0] return binary.get_section(CONST_SECTION) def is_vaddr_in_section(vaddr, section): """Checks if given virtual address is inside given section. Args: vaddr: A virtual address. section: A section of the binary. Returns: True: if the address is inside the section False: Otherwise """ return vaddr >= section.virtual_address \ and vaddr < section.virtual_address + section.size def unpack_pointer(addr_size, binary, vaddr): """Unpacks a pointer and untags it if it is necessary. Args: binary: A sandbox profile in its binary form. vaddr: A virtual address. addr_size: The size of an address (4 or 8). Returns: A pointer. """ ptr = unpack( binary.get_content_from_virtual_address(vaddr, addr_size)) if addr_size == 8: ptr = untag_pointer(ptr) return ptr def extract_data_tables_from_section(binary: lief.MachO.Binary, to_data, section): """ Generic implementation of table search. A table is formed of adjacent pointers to data. Args: binary: A sandbox profile in its binary form. to_data: Function that checks if the data is valid. This function returns None for invalid data and anything else otherwise. section: A section of the binary. Returns: An array of tables (arrays of data). """ addr_size = binary_get_word_size(binary) start_addr = section.virtual_address end_addr = section.virtual_address + section.size tables = [] vaddr = start_addr while vaddr <= end_addr - addr_size: ptr = unpack_pointer(addr_size, binary, vaddr) data = to_data(binary, ptr) if data is None: vaddr += addr_size continue table = [data] vaddr += addr_size while vaddr <= end_addr - addr_size: ptr = unpack_pointer(addr_size, binary, vaddr) data = to_data(binary, ptr) if data is None: break table.append(data) vaddr += addr_size if table not in tables: tables.append(table) vaddr += addr_size return tables def extract_string_tables(binary: lief.MachO.Binary): """Extracts string tables from the given MachO binary. Args: binary: A sandbox profile in its binary form. Returns: The string tables. """ return extract_data_tables_from_section(binary, binary_get_string_from_address, get_tables_section(binary)) def extract_separated_profiles(binary, string_tables): """Extract separated profiles from given MachO binary. It requires all string tables. This function is intended to be used for older version of iOS(<=7) because in newer versions the sandbox profiles are bundled. Args: binary: A sandbox profile in its binary form. string_tables: The extracted string tables. Returns: A zip object with profiles. """ def get_profile_names(): """Extracts the profile names. Returns: A list with the names of the sandbox profiles. """ def transform(arr): if len(arr) <= 3: return None ans = [] tmp = [] for val in arr: if val in ['default', '0123456789abcdef']: ans.append(tmp) tmp = [] else: tmp.append(val) ans.append(tmp) return ans def get_sol(posible): ans = [arr for arr in posible if 'com.apple.sandboxd' in arr] assert len(ans) == 1 return ans[0] profile_names_v = [transform(v) for v in string_tables] profile_names_v = [v for v in profile_names_v if v is not None] profile_names_v = [x for v in profile_names_v for x in v] return get_sol(profile_names_v) def get_profile_contents(): """Extracts the profile names. Returns: The contents of the sandbox profiles. """ def get_profile_content(binary, vaddr): addr_size = binary_get_word_size(binary) section = get_section_from_segment(binary, "__DATA", DATA_SECTION) if not is_vaddr_in_section(vaddr, section): return None data = binary.get_content_from_virtual_address(vaddr, 2 * addr_size) if len(data) != 2 * addr_size: return None data_vaddr = unpack(data[:addr_size]) size = unpack(data[addr_size:]) if not is_vaddr_in_section(vaddr, section): return None data = binary.get_content_from_virtual_address(data_vaddr, size) if len(data) != size: return None return bytes(data) contents_v = [v for v in extract_data_tables_from_section(binary, get_profile_content, get_tables_section(binary)) if len(v) > 3] assert len(contents_v) == 1 return contents_v[0] profile_names = get_profile_names() profile_contents = get_profile_contents() assert len(profile_names) == len(profile_contents) return zip(profile_names, profile_contents) def extract_sbops(string_tables): """ Extracts sandbox operations from a given MachO binary. If the sandbox profiles are stored either in sandboxd or sandbox kernel extension, the operations are stored always in the kernel extension. The sandbox operations are stored similar to the separated sandbox profiles but this time we have only one table: the name table. Args: string_tables: The binary's string tables. Returns: The sandbox operations. """ def transform(arr): if len(arr) <= 3: return None idxs = [] for idx, val in enumerate(arr): if val == 'default': idxs.append(idx) return [arr[idx:] for idx in idxs] def get_sol(possible): assert len(possible) >= 1 sol = [] if len(possible) > 1: cnt = min(len(arr) for arr in possible) for vals in zip(*[val[:cnt] for val in possible]): if not all(val == vals[0] for val in vals): break sol.append(vals[0]) else: sol.append(possible[0][0]) for pos in possible[0][1:]: if pos in ['HOME', 'default']: break sol.append(pos) return sol sbops_v = [transform(v) for v in string_tables] sbops_v = [v for v in sbops_v if v is not None and v != []] sbops_v = [x for v in sbops_v for x in v] return get_sol(sbops_v) def get_ios_major_version(version: str): """Extracts the major iOS version from a given version. Args: version: A string with the 'full' version. Returns: An integer with the major iOS version. """ return int(version.split('.')[0]) def findall(searching, pattern): """Finds all the substring in the given string. Args: searching: A string. pattern: A pattern that needs to be searched in the searching string. Returns: The indexes of all substrings equal to pattern inside searching string. """ i = searching.find(pattern) while i != -1: yield i i = searching.find(pattern, i + 1) def check_regex(data: bytes, base_index: int, ios_version: int): """ Checks if the regular expression (from sandbox profile) at offset base_index from data is valid for newer versions of iOS(>=8). Args: data: An array of bytes. base_index: The starting index. ios_version: An integer representing the iOS version. Returns: True: if the regular expression is valid for iOS version >= 8. False: otherwise. """ if base_index + 0x10 > len(data): return False if ios_version >= 13: size = struct.unpack('I', data[base_index + 0x2: base_index + 0x6])[0] else: size = struct.unpack('I', data[base_index + 0x4: base_index + 0x8])[0] if size > 0x1000 or size < 0x8 or base_index + size + 4 > len(data): return False if version != 3: return False if ios_version >= 13: sub_size = struct.unpack('= 13). Args: base_index: The starting index. count: Bundle size. data: An array of bytes. Returns: The new base index and an offset. """ re_offset = base_index + 12 op_nodes_count = struct.unpack('=8). Args: data: An array of bytes. base_index: The starting index. ios_version: An integer representing the iOS version. Returns: True: if the sandbox profile bundle is valid. False: otherwise. """ if len(data) - base_index < 50: return False re_offset, aux = struct.unpack('<2H', data[base_index + 2:base_index + 6]) if ios_version >= 13: count = struct.unpack('= 12: count = (aux - re_offset) * 4 # bundle should be big if count < 0x10: return False else: count = aux if count > 0x1000 or re_offset < 0x10: return False if ios_version >= 13: base_index, re_offset = unpack_for_newer_ios(base_index, count, data) else: re_offset = base_index + re_offset * 8 if len(data) - re_offset < count * 2: return False for off_index in range(re_offset, re_offset + 2 * count, 2): index = struct.unpack('\n" (name, argument) = self.value.values() if argument == None: result_str += (level+1)*"\t" + "\n" else: arg = str(argument).replace('&', '&').replace('"', '"').replace('\'', ''').replace('<', '<').replace('>', '>') result_str += (level+1)*"\t" + "\n" result_str += level*"\t" + "\n" else: (name, argument) = self.value.values() if argument == None: result_str += level*"\t" + "\n" else: arg = str(argument).replace('&', '&').replace('"', '"').replace('\'', ''').replace('<', '<').replace('>', '>') result_str += level*"\t" + "\n" elif self.is_type_require_entitlement(): if self.is_not: result_str += level*"\t" + "\n" level += 1 result_str += level*"\t" + "', '>') result_str += " value=\"" + _tmp + "\" />\n" else: _tmp = str(n.value)[21:-1].replace('&', '&').replace('"', '"').replace('\'', ''').replace('<', '<').replace('>', '>') result_str += " value=\"" + _tmp + "\">\n" result_str += i.recursive_xml_str(level+1, self.is_not) result_str += level*"\t" + "\n" if self.is_not: level -= 1 result_str += level*"\t" + "\n" else: result_str += level*"\t" + "\n" for i, v in enumerate(self.value): result_str += v.recursive_xml_str(level+1, recursive_is_not) result_str += level*"\t" + "\n" return result_str def __str__(self): return self.recursive_str(1, False) def str_debug(self): return self.recursive_str_debug(1, False) def str_simple(self): if self.is_type_single(): return self.value.str_debug() elif self.is_type_require_any(): return "require-any" elif self.is_type_require_all(): return "require-all" elif self.is_type_require_entitlement(): return self.value.str_debug()[1:-1] elif self.is_type_start(): return "start" else: return "unknown-type" def str_print_debug(self): if self.is_type_single(): return (self.value.str_debug(), None) elif self.is_type_require_any(): return ("(require-any", ")") elif self.is_type_require_all(): return ("(require-all", ")") elif self.is_type_require_entitlement(): return (self.value.str_debug()[:-1], ")") elif self.is_type_start(): return (None, None) else: return ("unknown-type", None) def str_print(self): if self.is_type_single(): return (str(self.value), None) elif self.is_type_require_any(): return ("(require-any", ")") elif self.is_type_require_all(): return ("(require-all", ")") elif self.is_type_require_entitlement(): return (str(self.value)[:-1], ")") elif self.is_type_start(): return (None, None) else: return ("unknown-type", None) def str_print_not(self): result_str = "" if self.is_type_single(): if self.is_not: value = str(self.value) if "(require-any" in value: result_str = self.value.str_not() else: result_str += "(require-not " + str(self.value) + ")" return result_str def xml_str(self): return self.recursive_xml_str(3, False) class ReducedEdge(): start = None end = None def __init__(self, start=None, end=None): self.start = start self.end = end def str_debug(self): return self.start.str_debug() + " -> " + self.end.str_debug() def str_simple(self): #print "start: %s" % (self.start.str_simple()) #print "end: %s" % (self.end.str_simple()) return "%s -----> %s" % (self.start.str_simple(), self.end.str_simple()) def __str__(self): return str(self.start) + " -> " + str(self.end) class ReducedGraph(): vertices = [] edges = [] final_vertices = [] reduce_changes_occurred = False def __init__(self): self.vertices = [] self.edges = [] self.final_vertices = [] self.reduce_changes_occurred = False def add_vertice(self, v): self.vertices.append(v) def add_edge(self, e): self.edges.append(e) def add_edge_by_vertices(self, v_start, v_end): e = ReducedEdge(v_start, v_end) self.edges.append(e) def set_final_vertices(self): self.final_vertices = [] for v in self.vertices: is_final = True for e in self.edges: if v == e.start: is_final = False break if is_final: self.final_vertices.append(v) def contains_vertice(self, v): return v in self.vertices def contains_edge(self, e): return e in self.edges def contains_edge_by_vertices(self, v_start, v_end): for e in self.edges: if e.start == v_start and e.end == v_end: return True return False def get_vertice_by_value(self, value): for v in self.vertices: if v.is_type_single(): if v.value == value: return v def get_edge_by_vertices(self, v_start, v_end): for e in self.edges: if e.start == v_start and e.end == v_end: return e return None def remove_vertice(self, v): edges_copy = list(self.edges) for e in edges_copy: if e.start == v or e.end == v: self.edges.remove(e) if v in self.vertices: self.vertices.remove(v) def remove_vertice_update_decision(self, v): edges_copy = list(self.edges) for e in edges_copy: if e.start == v: self.edges.remove(e) if e.end == v: e.start.decision = v.decision self.edges.remove(e) if v in self.vertices: self.vertices.remove(v) def remove_edge(self, e): if e in self.edges: self.edges.remove(e) def remove_edge_by_vertices(self, v_start, v_end): e = self.get_edge_by_vertices(v_start, v_end) if e: self.edges.remove(e) def replace_vertice_in_edge_start(self, old, new): global replace_occurred for e in self.edges: if e.start == old: e.start = new replace_occurred = True else: if isinstance(e.start.value, list): e.start.replace_in_list(old, new) if replace_occurred: e.start.decision = new.decision def replace_vertice_in_edge_end(self, old, new): global replace_occurred for e in self.edges: if e.end == old: e.end = new replace_occurred = True else: if isinstance(e.end.value, list): e.end.replace_in_list(old, new) if replace_occurred: e.end.decision = new.decision def replace_vertice_in_single_vertices(self, old, new): for v in self.vertices: if len(self.get_next_vertices(v)) == 0 and len(self.get_prev_vertices(v)) == 0: if isinstance(v.value, list): v.replace_in_list(old, new) def replace_vertice_list(self, old, new): for v in self.vertices: if isinstance(v.value, list): v.replace_sublist_in_list(old, new) if set(self.get_next_vertices(v)) == set(old): for n in old: self.remove_edge_by_vertices(v, n) self.add_edge_by_vertices(v, new) if set(self.get_prev_vertices(v)) == set(old): for n in old: self.remove_edge_by_vertices(n, v) self.add_edge_by_vertices(new, v) def get_next_vertices(self, v): next_vertices = [] for e in self.edges: if e.start == v: next_vertices.append(e.end) return next_vertices def get_prev_vertices(self, v): prev_vertices = [] for e in self.edges: if e.end == v: prev_vertices.append(e.start) return prev_vertices def get_start_vertices(self): start_vertices = [] for v in self.vertices: if not self.get_prev_vertices(v): start_vertices.append(v) return start_vertices def get_end_vertices(self): end_vertices = [] for v in self.vertices: if not self.get_next_vertices(v): end_vertices.append(v) return end_vertices def reduce_next_vertices(self, v): next_vertices = self.get_next_vertices(v) if len(next_vertices) <= 1: return self.reduce_changes_occurred = True new_vertice = ReducedVertice("require-any", next_vertices, next_vertices[0].decision) add_to_final = False for n in next_vertices: self.remove_edge_by_vertices(v, n) self.replace_vertice_list(next_vertices, new_vertice) for n in next_vertices: if n in self.final_vertices: self.final_vertices.remove(n) add_to_final = True # If no more next vertices, remove vertice. if not self.get_next_vertices(n): if n in self.vertices: self.vertices.remove(n) self.add_edge_by_vertices(v, new_vertice) self.add_vertice(new_vertice) if add_to_final: self.final_vertices.append(new_vertice) def reduce_prev_vertices(self, v): prev_vertices = self.get_prev_vertices(v) if len(prev_vertices) <= 1: return self.reduce_changes_occurred = True new_vertice = ReducedVertice("require-any", prev_vertices, v.decision) for p in prev_vertices: self.remove_edge_by_vertices(p, v) self.replace_vertice_list(prev_vertices, new_vertice) for p in prev_vertices: # If no more prev vertices, remove vertice. if not self.get_prev_vertices(p): if p in self.vertices: self.vertices.remove(p) self.add_vertice(new_vertice) self.add_edge_by_vertices(new_vertice, v) def reduce_vertice_single_prev(self, v): global replace_occurred prev = self.get_prev_vertices(v) if len(prev) != 1: logger.debug("not a single prev for node") return p = prev[0] nexts = self.get_next_vertices(p) if len(nexts) > 1 or nexts[0] != v: logger.debug("multiple nexts for prev") return require_all_vertices = [] if p.is_type_require_all(): require_all_vertices.extend(p.value) else: require_all_vertices.append(p) if v.is_type_require_all(): require_all_vertices.extend(v.value) else: require_all_vertices.append(v) new_vertice = ReducedVertice("require-all", require_all_vertices, v.decision) self.remove_edge_by_vertices(p, v) replace_occurred = False self.replace_vertice_in_edge_start(v, new_vertice) self.replace_vertice_in_edge_end(p, new_vertice) self.replace_vertice_in_single_vertices(p, new_vertice) self.replace_vertice_in_single_vertices(v, new_vertice) self.remove_vertice(p) self.remove_vertice(v) if not replace_occurred: self.add_vertice(new_vertice) if v in self.final_vertices: self.final_vertices.remove(v) self.final_vertices.append(new_vertice) def reduce_vertice_single_next(self, v): global replace_occurred next = self.get_next_vertices(v) if len(next) != 1: return n = next[0] prevs = self.get_prev_vertices(n) if len(prevs) > 1 or prevs[0] != v: return require_all_vertices = [] if v.is_type_require_all(): require_all_vertices.extend(v.value) else: require_all_vertices.append(v) if n.is_type_require_all(): require_all_vertices.extend(n.value) else: require_all_vertices.append(n) new_vertice = ReducedVertice("require-all", require_all_vertices, n.decision) self.remove_edge_by_vertices(v, n) replace_occurred = False self.replace_vertice_in_edge_start(n, new_vertice) self.replace_vertice_in_edge_end(e, new_vertice) self.replace_vertice_in_single_vertices(v, new_vertice) self.replace_vertice_in_single_vertices(n, new_vertice) self.remove_vertice(v) self.remove_vertice(n) if not replace_occurred: self.add_vertice(new_vertice) if n in self.final_vertices: self.final_vertices.remove(n) self.final_vertices.append(new_vertice) def reduce_graph(self): self.set_final_vertices() logger.debug("before everything:\n" + self.str_simple()) # Do until no more changes. while True: self.reduce_changes_occurred = False copy_vertices = list(self.vertices) for v in copy_vertices: self.reduce_next_vertices(v) if self.reduce_changes_occurred == False: break logger.debug("after next:\n" + self.str_simple()) # Do until no more changes. while True: self.reduce_changes_occurred = False copy_vertices = list(self.vertices) for v in copy_vertices: self.reduce_prev_vertices(v) if self.reduce_changes_occurred == False: break logger.debug("after next/prev:\n" + self.str_simple()) # Reduce graph starting from final vertices. Keep going until # final vertices don't change during an iteration. while True: copy_final_vertices = list(self.final_vertices) for v in copy_final_vertices: logger.debug("reducing single prev vertex: " + v.str_debug()) self.reduce_vertice_single_prev(v) logger.debug("### new graph is:") logger.debug(self.str_simple()) if set(copy_final_vertices) == set(self.final_vertices): break for e in self.edges: v = e.end logger.debug("reducing single prev vertex: " + v.str_debug()) self.reduce_vertice_single_prev(v) logger.debug("after everything:\n" + self.str_simple()) def reduce_graph_with_metanodes(self): # Add require-any metanode if current node has multiple successors. copy_vertices = list(self.vertices) for v in copy_vertices: nlist = self.get_next_vertices(v) if len(nlist) >= 2: new_node = ReducedVertice("require-any", None, None) self.add_vertice(new_node) self.add_edge_by_vertices(v, new_node) for n in nlist: self.remove_edge_by_vertices(v, n) self.add_edge_by_vertices(new_node, n) start_list = self.get_start_vertices() new_node = ReducedVertice("start", None, None) self.add_vertice(new_node) for s in start_list: self.add_edge_by_vertices(new_node, s) # Add require-all metanode if current node has a require-any as a predecessor and is followed by another node. copy_vertices = list(self.vertices) for v in copy_vertices: prev_vertices = list(self.get_prev_vertices(v)) next_vertices = list(self.get_next_vertices(v)) for p in prev_vertices: if (p.is_type_require_any() or p.is_type_start()) and next_vertices: # Except for when a require-entitlement ending block. if v.is_type_require_entitlement(): has_next_nexts = False for n in next_vertices: if n.is_type_require_any(): for n2 in self.get_next_vertices(n): if self.get_next_vertices(n2): has_next_nexts = True break else: if self.get_next_vertices(n): has_next_nexts = True break if not has_next_nexts: continue new_node = ReducedVertice("require-all", None, None) self.add_vertice(new_node) self.remove_edge_by_vertices(p, v) self.add_edge_by_vertices(p, new_node) self.add_edge_by_vertices(new_node, v) def str_simple_with_metanodes(self): logger.debug("==== vertices:\n") for v in self.vertices: logger.debug(v.str_simple()) logger.debug("==== edges:\n") for e in self.edges: logger.debug(e.str_simple()) def str_simple(self): message = "==== vertices:\n" for v in self.vertices: message += "decision: " + str(v.decision) + "\t" + v.str_debug() + "\n" message += "==== final vertices:\n" for v in self.final_vertices: message += "decision: " + str(v.decision) + "\t" + v.str_debug() + "\n" message += "==== edges:\n" for e in self.edges: message += "\t" + e.str_debug() + "\n" return message def __str__(self): result_str = "" for v in self.vertices: result_str += "(" + str(v.decision) + " " if len(self.get_next_vertices(v)) == 0 and len(self.get_next_vertices(v)) == 0: if v in self.final_vertices: result_str += str(v) + "\n" result_str += ")\n" for e in self.edges: result_str += str(e) + "\n" result_str += "\n" return result_str def remove_builtin_filters(self): copy_vertices = list(self.vertices) for v in copy_vertices: if re.search("###\$\$\$\*\*\*", str(v)): self.remove_vertice_update_decision(v) def reduce_integrated_vertices(self, integrated_vertices): if len(integrated_vertices) == 0: return (None, None) if len(integrated_vertices) > 1: return (ReducedVertice("require-any", integrated_vertices, integrated_vertices[0].decision), integrated_vertices[0].decision) require_all_vertices = [] v = integrated_vertices[0] decision = None while True: if not re.search("entitlement-value #t", str(v)): require_all_vertices.append(v) next_vertices = self.get_next_vertices(v) if decision == None and v.decision != None: decision = v.decision self.remove_vertice(v) if v in self.final_vertices: self.final_vertices.remove(v) if next_vertices: v = next_vertices[0] else: break if len(require_all_vertices) == 0: return (None, v.decision) if len(require_all_vertices) == 1: return (ReducedVertice(value=require_all_vertices[0].value, decision=require_all_vertices[0].decision, is_not=require_all_vertices[0].is_not), v.decision) return (ReducedVertice("require-all", require_all_vertices, require_all_vertices[len(require_all_vertices)-1].decision), v.decision) def aggregate_require_entitlement(self, v): next_vertices = [] prev_vertices = self.get_prev_vertices(v) integrated_vertices = [] for n in self.get_next_vertices(v): if not re.search("entitlement-value", str(n)): next_vertices.append(n) break integrated_vertices.append(n) current_list = [ n ] while current_list: current = current_list.pop() for n2 in self.get_next_vertices(current): if not re.search("entitlement-value", str(n2)): self.remove_edge_by_vertices(current, n2) next_vertices.append(n2) else: current_list.append(n2) new_vertice = ReducedVertice(type="require-entitlement", value=(v, None), decision=None, is_not=v.is_not) for p in prev_vertices: self.remove_edge_by_vertices(p, v) self.add_edge_by_vertices(p, new_vertice) for n in next_vertices: self.remove_edge_by_vertices(v, n) self.add_edge_by_vertices(new_vertice, n) for i in integrated_vertices: self.remove_edge_by_vertices(v, i) self.remove_vertice(v) self.add_vertice(new_vertice) if v in self.final_vertices: self.final_vertices.remove(v) self.final_vertices.append(new_vertice) (new_integrate, decision) = self.reduce_integrated_vertices(integrated_vertices) for i in integrated_vertices: self.remove_vertice(i) if i in self.final_vertices: self.final_vertices.remove(i) new_vertice.set_integrated_vertice(new_integrate) new_vertice.set_decision(decision) def aggregate_require_entitlement_nodes(self): copy_vertices = list(self.vertices) idx = 0 while idx < len(copy_vertices): v = copy_vertices[idx] if re.search("require-entitlement", str(v)): self.aggregate_require_entitlement(v) idx += 1 def cleanup_filters(self): self.remove_builtin_filters() self.aggregate_require_entitlement_nodes() def remove_builtin_filters_with_metanodes(self): copy_vertices = list(self.vertices) for v in copy_vertices: if re.search("###\$\$\$\*\*\*", v.str_simple()): self.remove_vertice(v) elif re.search("entitlement-value #t", v.str_simple()): self.remove_vertice(v) elif re.search("entitlement-value-regex #\"\.\"", v.str_simple()): v.value.non_terminal.argument = "#\".+\"" elif re.search("global-name-regex #\"\.\"", v.str_simple()): v.value.non_terminal.argument = "#\".+\"" elif re.search("local-name-regex #\"\.\"", v.str_simple()): v.value.non_terminal.argument = "#\".+\"" def replace_require_entitlement_with_metanodes(self, v): prev_list = self.get_prev_vertices(v) next_list = self.get_next_vertices(v) new_node = ReducedVertice(type="require-entitlement", value=v.value, decision=None, is_not=v.is_not) self.add_vertice(new_node) self.remove_vertice(v) for p in prev_list: self.add_edge_by_vertices(p, new_node) for n in next_list: self.add_edge_by_vertices(new_node, n) def aggregate_require_entitlement_with_metanodes(self): copy_vertices = list(self.vertices) for v in copy_vertices: if re.search("require-entitlement", str(v)): self.replace_require_entitlement_with_metanodes(v) def cleanup_filters_with_metanodes(self): self.remove_builtin_filters_with_metanodes() self.aggregate_require_entitlement_with_metanodes() def print_vertices_with_operation(self, operation, out_f): allow_vertices = [v for v in self.vertices if v.decision == "allow"] deny_vertices = [v for v in self.vertices if v.decision == "deny"] if allow_vertices: out_f.write("(allow %s " % (operation)) if len(allow_vertices) > 1: for v in allow_vertices: out_f.write("\n" + 8*" " + str(v)) else: out_f.write(str(allow_vertices[0])) out_f.write(")\n") if deny_vertices: out_f.write("(deny %s " % (operation)) if len(deny_vertices) > 1: for v in deny_vertices: out_f.write("\n" + 8*" " + str(v)) else: out_f.write(str(deny_vertices[0])) out_f.write(")\n") def print_vertices_with_operation_metanodes(self, operation, default_is_allow, out_f): # Return if only start node in list. if len(self.vertices) == 1 and self.vertices[0].is_type_start(): return # Use reverse of default rule. if default_is_allow: out_f.write("(deny %s" % (operation)) else: out_f.write("(allow %s" % (operation)) vlist = [] start_list = self.get_start_vertices() start_list.reverse() vlist.insert(0, (None, 0)) for s in start_list: vlist.insert(0, (s, 1)) while True: if not vlist: break (cnode, indent) = vlist.pop(0) if not cnode: out_f.write(")") continue (first, last) = cnode.str_print() if first: if cnode.is_not: if cnode.str_print_not() != "": out_f.write("\n" + indent * "\t" + cnode.str_print_not()) else: out_f.write("\n" + indent * "\t" + "(require-not " + first) if cnode.is_type_require_any() or cnode.is_type_require_all() or cnode.is_type_require_entitlement(): vlist.insert(0, (None, indent)) else: out_f.write(")") else: out_f.write("\n" + indent * "\t" + first) if last: vlist.insert(0, (None, indent)) next_vertices_list = self.get_next_vertices(cnode) if next_vertices_list: if cnode.is_type_require_any() or cnode.is_type_require_all() or cnode.is_type_require_entitlement(): indent += 1 next_vertices_list.reverse() if cnode.is_type_require_entitlement(): pos = 0 for n in next_vertices_list: if (n.is_type_single() and not re.search("entitlement-value", n.str_simple())) or \ n.is_type_require_entitlement(): vlist.insert(pos + 1, (n, indent-1)) else: vlist.insert(0, (n, indent)) pos += 1 else: for n in next_vertices_list: vlist.insert(0, (n, indent)) out_f.write("\n") def dump_xml(self, operation, out_f): allow_vertices = [v for v in self.vertices if v.decision == "allow"] deny_vertices = [v for v in self.vertices if v.decision == "deny"] if allow_vertices: out_f.write("\t\n" % (operation)) out_f.write("\t\t\n") for v in allow_vertices: out_f.write(v.xml_str()) out_f.write("\t\t\n") out_f.write("\t\n") if deny_vertices: out_f.write("\t\n" % (operation)) out_f.write("\t\t\n") for v in deny_vertices: out_f.write(v.xml_str()) out_f.write("\t\t\n") out_f.write("\t\n") def reduce_operation_node_graph(g): # Create reduced graph. rg = ReducedGraph() for node_iter in g.keys(): rv = ReducedVertice(value=node_iter, decision=g[node_iter]["decision"], is_not=g[node_iter]["not"]) rg.add_vertice(rv) for node_iter in g.keys(): rv = rg.get_vertice_by_value(node_iter) for node_next in g[node_iter]["list"]: rn = rg.get_vertice_by_value(node_next) rg.add_edge_by_vertices(rv, rn) # Handle special case for require-not (require-enitlement (...)). l = len(g.keys()) for idx, node_iter in enumerate(g.keys()): rv = rg.get_vertice_by_value(node_iter) if not re.search("require-entitlement", str(rv)): continue if not rv.is_not: continue c_idx = idx while True: c_idx += 1 if c_idx >= l: break rn = rg.get_vertice_by_value(list(g.keys())[c_idx]) if not re.search("entitlement-value", str(rn)): break prevs_rv = rg.get_prev_vertices(rv) prevs_rn = rg.get_prev_vertices(rn) if sorted(prevs_rv) != sorted(prevs_rn): continue for pn in prevs_rn: rg.remove_edge_by_vertices(rn, pn) rg.add_edge_by_vertices(rv, rn) rg.cleanup_filters_with_metanodes() for node_iter in g.keys(): rv = rg.get_vertice_by_value(node_iter) rg.reduce_graph_with_metanodes() return rg def main(): if len(sys.argv) != 4: print >> sys.stderr, "Usage: %s binary_sandbox_file operations_file ios_version" % (sys.argv[0]) sys.exit(-1) ios_major_version = int(sys.argv[3].split('.')[0]) # Read sandbox operations. sb_ops = [l.strip() for l in open(sys.argv[2])] num_sb_ops = len(sb_ops) logger.info("num_sb_ops:", num_sb_ops) f = open(sys.argv[1], "rb") operation_nodes = build_operation_nodes(f, num_sb_ops, ios_major_version) global num_regex f.seek(4) num_regex = struct.unpack("I', ''.join([chr(x) for x in re[i:i+4]]))[0] node_transition = struct.unpack('>I', ''.join([chr(x) for x in re[i+4:i+8]]))[0] node_arg = struct.unpack('>I', ''.join([chr(x) for x in re[i+8:i+12]]))[0] i += 12 logger.debug('node idx:{:#010x} type: {:#02x} arg: {:#010x}' \ ' transition: {:#010x}'.format(node_idx, node_type,node_arg, node_transition)) assert(node_type in node_type_dispatch_table) regex_list.append( node_type_dispatch_table[node_type]( node_type, node_arg, node_transition, node_idx)) return i def class_parse(re, i, classes, class_idx): def transform(x): c = chr(x) if c in '[]-': return '\\' + c else: return c class_size = struct.unpack('>I', ''.join([chr(x) for x in re[i:i+4]]))[0] i += 0x4 content = struct.unpack('>{}I'.format(class_size), ''.join([chr(x) for x in re[i:i+4*class_size]])) i += 0x4 * class_size assert(class_size % 2 == 0) cls = '' for idx in range(0, class_size, 2): start = content[idx] end = content[idx+1] if start != end: cls += '{}-{}'.format(transform(start), transform(end)) else: cls += transform(start) logger.debug('class idx = {:#x} size = {:#x} content=[{}]'.format( class_idx, class_size, cls)) classes.append(cls) return i class RegexParser(object): @staticmethod def parse(re, i, regex_list): node_count = struct.unpack('>I', ''.join([chr(x) for x in re[i:i+0x4]]))[0] logger.debug('node count = {:#x}'.format(node_count)) start_node = struct.unpack('>I', ''.join([chr(x) for x in re[i+0x4:i+0x8]]))[0] logger.debug('start node = {:#x}'.format(start_node)) end_node = struct.unpack('>I', ''.join([chr(x) for x in re[i+0x8:i+0xC]]))[0] logger.debug('end node = {:#x}'.format(end_node)) cclass_count = struct.unpack('>I', ''.join([chr(x) for x in re[i+0xC:i+0x10]]))[0] logger.debug('character class count = {:#x}'.format(cclass_count)) submatch_count = struct.unpack('>I', ''.join([chr(x) for x in re[i+0x10:i+0x14]]))[0] i += 0x14 logger.debug('submatch count = {:#x}'.format(submatch_count)) for node_idx in range(node_count): i = node_parse(re, i, regex_list, node_idx) classes = [] for class_idx in range(cclass_count): i = class_parse(re, i, classes, class_idx) for node in regex_list: if node['type'] == 'class': node['value'] = '[{}]'.format(classes[node['value']]) elif node['type'] == 'class_exclude': node['value'] = '[{}]'.format(classes[node['value']]) regex_list[start_node]['start_node'] = True ================================================ FILE: reverse-sandbox/regex_parser_v2.py ================================================ import logging import struct logging.config.fileConfig("logger.config") logger = logging.getLogger(__name__) def parse_character(node_type, node_arg, node_transition, node_idx): value = chr(node_arg & 0xff) if value == ".": value = "[.]" return { "pos": node_idx, "nextpos": node_transition, "type": "character", "value": value} def parse_end(node_type, node_arg, node_transition, node_idx): return { "pos": node_idx, "nextpos": node_transition, "type": "end", "value": 0} def parse_jump_forward(node_type, node_arg, node_transition, node_idx): jump_to = node_arg return { "pos": node_idx, "nextpos": node_transition, "type": "jump_forward", "value": jump_to} def parse_jump_backward(node_type, node_arg, node_transition, node_idx): jump_to = node_transition return { "pos": node_idx, "nextpos": node_transition, "type": "jump_backward", "value": jump_to} def parse_beginning_of_line(node_type, node_arg, node_transition, node_idx): return { "pos": node_idx, "nextpos": node_transition, "type": "character", "value": "^"} def parse_end_of_line(node_type, node_arg, node_transition, node_idx): return { "pos": node_idx, "nextpos": node_transition, "type": "character", "value": "$"} def parse_dot(node_type, node_arg, node_transition, node_idx): return { "pos": node_idx, "nextpos": node_transition, "type": "character", "value": "."} def parse_character_class(node_type, node_arg, node_transition, node_idx): return { "pos": node_idx, "nextpos": node_transition, "type": "class", "value": node_arg} def parse_character_neg_class(node_type, node_arg, node_transition, node_idx): return { "pos": node_idx, "nextpos": node_transition, "type": "class_exclude", "value": node_arg} def parse_parantheses_open(node_type, node_arg, node_transition, node_idx): return parse_jump_backward(node_type, node_arg, node_transition, node_idx) ''' return { "pos": node_idx, "nextpos": node_transition, "type": "character", "value": "("} ''' def parse_parantheses_close(node_type, node_arg, node_transition, node_idx): return parse_jump_backward(node_type, node_arg, node_transition, node_idx) ''' return { "pos": node_idx, "nextpos": node_transition, "type": "character", "value": ")"} ''' node_type_dispatch_table = { 0x10: parse_character, 0x22: parse_end, 0x25: parse_jump_forward, 0x26: parse_jump_forward, 0x27: parse_jump_forward, 0x28: parse_jump_forward, 0x30: parse_dot, 0x31: parse_jump_backward, 0x32: parse_beginning_of_line, 0x33: parse_end_of_line, 0x34: parse_character_class, 0x35: parse_character_neg_class, } def node_parse(re, i, regex_list, node_idx): node_type = struct.unpack('> 4) i = i+1 logger.debug("i: %d, num: %d", i, num) values = [] value = "[" for j in range(0, num): values.append(re[i+2*j]) values.append(re[i+2*j+1]) first = values[0] last = values[2*num-1] # In case of excludes. if (first > last): node_type = "class_exclude" value += "^" for j in range(len(values)-1, 0, -1): values[j] = values[j-1] values[0] = last for j in range(0, len(values)): if j % 2 == 0: values[j] = values[j]+1 else: values[j] = values[j]-1 else: node_type = "class" for j in range(0, len(values), 2): if values[j] < values[j+1]: value += "%s-%s" % (chr(values[j]), chr(values[j+1])) else: value += "%s" % (chr(values[j])) value += "]" regex_list.append({ "pos": i-6-1, "nextpos": i + 2 * num - 6, "type": node_type, "value": value }) message = "values: [", ", ".join([hex(j) for j in values]), "]" logger.debug(message) return i + 2 * num - 1 def parse_end(re, i, regex_list): regex_list.append({ "pos": i-6, "nextpos": i+2-6, "type": "end", "value": 0 }) return i + 1 def parse(re, i, regex_list): # Actual character. if re[i] == 0x02: i = parse_character(re, i, regex_list) # Beginning of line. elif re[i] == 0x19: parse_beginning_of_line(i, regex_list) # End of line. elif re[i] == 0x29: parse_end_of_line(i, regex_list) # Any character. elif re[i] == 0x09: parse_any_character(i, regex_list) # Jump forward. elif re[i] == 0x2f: i = parse_jump_forward(re, i, regex_list) # Jump backward. elif re[i] & 0xf == 0xa: i = parse_jump_backward(re, i, regex_list) # Character class. elif re[i] & 0xf == 0xb: i = parse_character_class(re, i, regex_list) elif re[i] & 0xf == 0x5: i = parse_end(re, i, regex_list) else: logger.warning("##########unknown", hex(re[i])) return i + 1 class RegexParser(object): @staticmethod def parse(re, i, regex_list): length = struct.unpack('= 13: f.seek(get_base_addr(f, ios_version) + offset * 8) len = struct.unpack("\n') outfile_xml.write('\n') outfile_xml.write('\n') outfile_xml.write('\n') outfile_xml.write('\n') outfile_xml.write('\n') outfile_xml.write('\n') outfile_xml.write('\n') outfile_xml.write('\n') outfile_xml.write(']>\n') outfile_xml.write("\n") # Extract node for 'default' operation (index 0). default_node = operation_node.find_operation_node_by_offset(operation_nodes, op_table[0]) outfile.write("(%s default)\n" % (default_node.terminal)) outfile_xml.write("\t\n" % (default_node.terminal)) # For each operation expand operation node. for idx in range(1, len(op_table)): offset = op_table[idx] operation = sb_ops[idx] # Go past operations not in list, in case list is not empty. if ops_to_reverse: if operation not in ops_to_reverse: continue logger.info("parsing operation %s (index %d)", operation, idx) node = operation_node.find_operation_node_by_offset(operation_nodes, offset) if not node: logger.info("operation %s (index %d) has no operation node", operation, idx) continue g = operation_node.build_operation_node_graph(node, default_node) if g: rg = operation_node.reduce_operation_node_graph(g) rg.str_simple_with_metanodes() rg.print_vertices_with_operation_metanodes(operation, default_node.terminal.is_allow(), outfile) #rg.dump_xml(operation, outfile_xml) else: logger.info("no graph for operation %s (index %d)", operation, idx) if node.terminal and default_node.terminal: if node.terminal.type != default_node.terminal.type: outfile.write("(%s %s)\n" % (node.terminal, operation)) outfile_xml.write("\t\n" % (operation, node.terminal)) outfile.close() outfile_xml.write("\n") outfile_xml.close() def get_ios_major_version(release): """ Returns major version of release """ return int(release.split('.')[0]) def is_ios_more_than_10_release(release): """ Returns True if release is using newer (iOS >= 10) binary sandbox profile format. """ major_version = get_ios_major_version(release) if major_version < 10: return False return True def display_sandbox_profiles(f, re_table_offset, num_sb_ops, ios_version): logger.info("Printing sandbox profiles from bundle") if ios_version >= 13: f.seek(6) elif ios_version >= 12: f.seek(12) elif ios_version >= 10: f.seek(10) else: f.seek(6) num_profiles = struct.unpack("= 13: f.seek(2) num_operation_nodes = struct.unpack("= 12: f.seek(14 + (num_sb_ops + 2) * 2 * num_profiles) elif ios_version >= 10: f.seek(12 + (num_sb_ops + 2) * 2 * num_profiles) else: f.seek(8 + (num_sb_ops + 2) * 2 * num_profiles) while True: word = struct.unpack("= 13: f.seek(8) regex_table_count = struct.unpack('= 12: f.seek(14 + (num_sb_ops + 2) * 2 * i) elif ios_version >= 10: f.seek(12 + (num_sb_ops + 2) * 2 * i) else: f.seek(8 + (num_sb_ops + 2) * 2 * i) name_offset = struct.unpack(" 0: f.seek(vars_offset + i*2) else: f.seek(vars_offset*8 + i*2) current_offset = struct.unpack(" 0: len = struct.unpack("= 13: # extract operation node table count f.seek(2) op_nodes_count = struct.unpack('= 6: header = struct.unpack("= 13: re_table_offset = 12 else: re_table_offset = struct.unpack("= 12: f.seek(8) re_table_count = struct.unpack(" 0: if get_ios_major_version(args.release) >= 13: f.seek(re_table_offset) else: f.seek(re_table_offset * 8) re_offsets_table = struct.unpack("<%dH" % re_table_count, f.read(2 * re_table_count)) for offset in re_offsets_table: if get_ios_major_version(args.release) >= 13: f.seek(get_base_addr(f, get_ios_major_version(args.release)) + offset * 8) re_length = struct.unpack("= 13: # get the regex table entries f.seek(8) regex_table_count = struct.unpack('= 12: f.seek(4) vars_offset = struct.unpack("= 10: f.seek(6) vars_offset = struct.unpack("= 13: f.seek(2) num_operation_nodes = struct.unpack("= 12: f.seek(14 + (num_sb_ops + 2) * 2 * num_profiles) elif get_ios_major_version(args.release) >= 10: f.seek(12 + (num_sb_ops + 2) * 2 * num_profiles) else: f.seek(8 + (num_sb_ops + 2) * 2 * num_profiles) while True: word = struct.unpack("= 13: f.seek(8) regex_table_count = struct.unpack('= 12: f.seek(14 + (num_sb_ops + 2) * 2 * i) elif get_ios_major_version(args.release) >= 10: f.seek(12 + (num_sb_ops + 2) * 2 * i) else: f.seek(8 + (num_sb_ops + 2) * 2 * i) name_offset = struct.unpack("= 13: f.seek(8) regex_table_count = struct.unpack('= 12: f.seek(14 + (num_sb_ops + 2) * 2 * i + 4) elif get_ios_major_version(args.release) >= 10: f.seek(12 + (num_sb_ops + 2) * 2 * i + 4) else: f.seek(8 + (num_sb_ops + 2) * 2 * i + 4) op_table = struct.unpack("<%dH" % num_sb_ops, f.read(2 * num_sb_ops)) for idx in range(1, len(op_table)): offset = op_table[idx] operation = sb_ops[idx] logger.info("operation %s (index %u) starts at node offset %u (0x%x)", operation, idx, offset, offset) out_fname = os.path.join(out_dir, name + ".sb") process_profile(f, out_fname, sb_ops, ops_to_reverse, op_table, operation_nodes) else: if get_ios_major_version(args.release) >= 12: f.seek(4) vars_offset = struct.unpack("= 10: f.seek(6) vars_offset = struct.unpack("= 6: f.seek(6) else: f.seek(4) op_table = struct.unpack("<%dH" % num_sb_ops, f.read(2 * num_sb_ops)) for idx in range(1, len(op_table)): offset = op_table[idx] operation = sb_ops[idx] logger.info("operation %s (index %u) starts at node offset %u (0x%x)", operation, idx, offset, offset) # Place file pointer to start of operation nodes area. while True: word = struct.unpack("= 0x80: self.update_state_split_byte_read() elif b == 0x00 or b == 0x07: self.update_state_unknown() elif b == 0x05: self.update_state_reset_string() elif b == 0x08: self.update_state_concat_save_byte_read() # XXX: Read two bytes. I don't know what they do. self.get_next_byte() self.get_next_byte() elif b >= 0x10 and b < 0x3f: self.update_state_constant_read() elif b == 0x0b: self.update_state_range_byte_read() elif b == 0x02: self.update_state_plus_read() elif b == 0x06: self.update_state_reset_string() else: self.update_state_token_byte_read() def get_next_byte(self): if self.is_end(): return 0x00 b = struct.unpack("= len(self.binary_string): return True return False def reset_base(self): if len(self.base_stack) >= 1: self.base = self.base_stack.pop() def reset_base_full(self): self.base_stack = [] self.base = "" class SandboxString: rss_stack = [] def parse_byte_string(self, s, global_vars): rss = ReverseStringState(s) base = "" reset_base = False tokens = [] token = "" while True: if rss.state == rss.STATE_UNKNOWN: logger.debug("state is STATE_UNKNOWN") b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_TOKEN_READ: logger.debug("state is STATE_TOKEN_READ") b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_TOKEN_BYTE_READ: logger.debug("state is STATE_TOKEN_BYTE_READ") # String starts with length. prev_state = rss.state_stack[len(rss.state_stack)-1] if prev_state != rss.STATE_TOKEN_READ: token_len = rss.get_length_minus_1() rss.read_token(token_len) rss.update_state_token_read() else: logger.warn("read token byte from token state") break elif rss.state == rss.STATE_CONSTANT_READ: logger.debug("state is STATE_CONSTANT_READ") b = rss.get_last_byte() if b >= 0x10 and b < 0x3f: rss.token = "${" + global_vars[b-0x10] + "}" b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_CONCAT_BYTE_READ: logger.debug("state is STATE_CONCAT_BYTE_READ") if rss.state_stack[len(rss.state_stack)-1] == rss.STATE_TOKEN_READ or \ rss.state_stack[len(rss.state_stack)-1] == rss.STATE_CONSTANT_READ or \ rss.state_stack[len(rss.state_stack)-1] == rss.STATE_RANGE_BYTE_READ or \ rss.state_stack[len(rss.state_stack)-1] == rss.STATE_SINGLE_BYTE_READ or \ rss.state_stack[len(rss.state_stack)-1] == rss.STATE_PLUS_READ: rss.update_base() b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_CONCAT_SAVE_BYTE_READ: logger.debug("state is STATE_CONCAT_SAVE_BYTE_READ") if rss.state_stack[len(rss.state_stack)-1] == rss.STATE_TOKEN_READ or \ rss.state_stack[len(rss.state_stack)-1] == rss.STATE_CONSTANT_READ or \ rss.state_stack[len(rss.state_stack)-1] == rss.STATE_RANGE_BYTE_READ or \ rss.state_stack[len(rss.state_stack)-1] == rss.STATE_SINGLE_BYTE_READ or \ rss.state_stack[len(rss.state_stack)-1] == rss.STATE_PLUS_READ: rss.update_base_stack() b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_END_BYTE_READ: logger.debug("state is STATE_END_BYTE_READ") rss.end_current_token() rss.reset_base() b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_RANGE_BYTE_READ: logger.debug("state is STATE_RANGE_BYTE_READ") rss.update_base_stack() b = rss.get_next_byte() b_array = [] all_ascii = True token = "" for i in range(0, b+1): b1 = rss.get_next_byte() b2 = rss.get_next_byte() if b1 < 0x20 or b1 > 0x7f or b2 < 0x20 or b2 > 0x7f: all_ascii = False b_array.append((b1,b2)) if all_ascii == False: (b1, b2) = b_array[0] (b3, b4) = b_array[1] if b2 == 0xff and b3 == 0x00: if b1-1 == b4+1: # single char exclude token = "[^{:c}]".format(b1-1) else: # range exclude token = "[^{:c}-{:c}]".format(b4+1, b1-1) else: token = "[TODO]" else: token = "[" for (b1, b2) in b_array: token += "{:c}-{:c}".format(b1, b2) token += "]" rss.token = token b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_SPLIT_BYTE_READ: logger.debug("state is STATE_SPLIT_BYTE_READ") substr_len = rss.get_last_byte() - 0x7f substr = rss.get_substring(substr_len) subtokens = self.parse_byte_string(substr, global_vars) rss.end_with_subtokens(subtokens) b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_SINGLE_BYTE_READ: logger.debug("state is STATE_SINGLE_BYTE_READ") rss.read_token(1) b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_RESET_STRING: logger.debug("state is STATE_RESET_STRING") rss.end_current_token() rss.reset_base_full() b = rss.get_next_byte() rss.update_state(b) elif rss.state == rss.STATE_PLUS_READ: logger.debug("state is STATE_PLUS_READ") if rss.state_stack[len(rss.state_stack)-1] == rss.STATE_CONCAT_BYTE_READ: rss.token = "+" rss.update_base() else: logger.warn("previous state is not concat") rss.read_token(1) b = rss.get_next_byte() rss.update_state(b) else: logger.warn("unknown state ({:d})".format(rss.state)) break if rss.is_end(): break # String must end in a STATE_END_BYTE_READ byte. if rss.state == rss.STATE_END_BYTE_READ: logger.debug("state is STATE_END_BYTE_READ") rss.end_current_token() elif rss.state == rss.STATE_UNKNOWN or rss.state == rss.STATE_CONCAT_BYTE_READ: pass elif rss.state_stack[len(rss.state_stack)-1] == rss.STATE_END_BYTE_READ: pass else: logger.warn("last state is not STATE_END_BYTE_READ ({:d})".format(rss.state)) logger.warn("previous state ({:d})".format(rss.state_stack[len(rss.state_stack)-1])) logger.info("initial string: " + " ".join("0x{:02x}".format(ord(c)) for c in s)) logger.info("output_strings (num: {:d}): {:s}".format(len(rss.output_strings), ",".join('"{:s}"'.format(s) for s in rss.output_strings))) return rss.output_strings def __init__(self): self.rss_stack = [] def main(): s = sys.stdin.read() ss = SandboxString() my_global_vars = ["FRONT_USER_HOME", "HOME", "PROCESS_TEMP_DIR"] l = ss.parse_byte_string(s[4:], my_global_vars) print(list(set(l))) if __name__ == "__main__": sys.exit(main()) ================================================ FILE: reverse-sandbox/sandbox_filter.py ================================================ #!/usr/bin/env python import struct import re import logging import logging.config import reverse_sandbox import reverse_string from filters import Filters logging.config.fileConfig("logger.config") logger = logging.getLogger(__name__) ios_major_version = 4 keep_builtin_filters = False global_vars = [] base_addr = 0 def get_filter_arg_string_by_offset(f, offset): """Extract string (literal) from given offset.""" f.seek(base_addr + offset * 8) if ios_major_version >= 13: len = struct.unpack("= 10: f.seek(offset * 8) s = f.read(4+len) logger.info("binary string is " + s.encode("hex")) ss = reverse_string.SandboxString() myss = ss.parse_byte_string(s[4:], global_vars) actual_string = "" for sss in myss: actual_string = actual_string + sss + " " actual_string = actual_string[:-1] logger.info("actual string is " + actual_string) return myss type = struct.unpack("= 13: len = struct.unpack("= 10: f.seek(base_addr + offset * 8) s = f.read(4+len) logger.info("binary string is " + s.encode("hex")) ss = reverse_string.SandboxString() myss = ss.parse_byte_string(s[4:], global_vars) append = "literal" actual_string = "" for sss in myss: actual_string = actual_string + sss + " " actual_string = actual_string[:-1] logger.info("actual string is " + actual_string) return (append, myss) type = struct.unpack("= 13: len = struct.unpack("> 8) number = arg & 0xff return '(_IO "%s" %d)' % (letter, number) def get_filter_arg_vnode_type(f, arg): """Convert integer to file (vnode) type string.""" arg_types = { 0x01: "REGULAR-FILE", 0x02: "DIRECTORY", 0x03: "BLOCK-DEVICE", 0x04: "CHARACTER-DEVICE", 0x05: "SYMLINK", 0x06: "SOCKET", 0x07: "FIFO", 0xffff: "TTY" } if arg in arg_types.keys(): return '%s' % (arg_types[arg]) else: return '%d' % arg def get_filter_arg_owner(f, arg): """Convert integer to process owner string.""" arg_types = { 0x01: "self", 0x02: "pgrp", 0x03: "others", 0x04: "children", 0x05: "same-sandbox" } if arg in arg_types.keys(): return '%s' % (arg_types[arg]) else: return '%d' % arg def get_filter_arg_socket_domain(f, arg): """Convert integer to socket domain string.""" arg_types = { 0: "AF_UNSPEC", 1: "AF_UNIX", 2: "AF_INET", 3: "AF_IMPLINK", 4: "AF_PUP", 5: "AF_CHAOS", 6: "AF_NS", 7: "AF_ISO", 8: "AF_ECMA", 9: "AF_DATAKIT", 10: "AF_CCITT", 11: "AF_SNA", 12: "AF_DECnet", 13: "AF_DLI", 14: "AF_LAT", 15: "AF_HYLINK", 16: "AF_APPLETALK", 17: "AF_ROUTE", 18: "AF_LINK", 19: "pseudo_AF_XTP", 20: "AF_COIP", 21: "AF_CNT", 22: "pseudo_AF_RTIP", 23: "AF_IPX", 24: "AF_SIP", 25: "pseudo_AF_PIP", 27: "AF_NDRV", 28: "AF_ISDN", 29: "pseudo_AF_KEY", 30: "AF_INET6", 31: "AF_NATM", 32: "AF_SYSTEM", 33: "AF_NETBIOS", 34: "AF_PPP", 35: "pseudo_AF_HDRCMPLT", 36: "AF_RESERVED_36", 37: "AF_IEEE80211", 38: "AF_UTUN", 40: "AF_MAX" } if arg in arg_types.keys(): return '%s' % (arg_types[arg]) else: return '%d' % arg def get_filter_arg_socket_type(f, arg): """Convert integer to socket type string.""" arg_types = { 0x01: "SOCK_STREAM", 0x02: "SOCK_DGRAM", 0x03: "SOCK_RAW", 0x04: "SOCK_RDM", 0x05: "SOCK_SEQPACKET" } if arg in arg_types.keys(): return '"%s"' % (arg_types[arg]) else: return '%d' % arg def get_none(f, arg): """Dumb callback function""" return None def get_filter_arg_privilege_id(f, arg): """Convert integer to privilege id string.""" arg_types = { 1000: "PRIV_ADJTIME", 1001: "PRIV_PROC_UUID_POLICY", 1002: "PRIV_GLOBAL_PROC_INFO", 1003: "PRIV_SYSTEM_OVERRIDE", 1004: "PRIV_HW_DEBUG_DATA", 1005: "PRIV_SELECTIVE_FORCED_IDLE", 1006: "PRIV_PROC_TRACE_INSPECT", 1008: "PRIV_KERNEL_WORK_INTERNAL", 6000: "PRIV_VM_PRESSURE", 6001: "PRIV_VM_JETSAM", 6002: "PRIV_VM_FOOTPRINT_LIMIT", 10000: "PRIV_NET_PRIVILEGED_TRAFFIC_CLASS", 10001: "PRIV_NET_PRIVILEGED_SOCKET_DELEGATE", 10002: "PRIV_NET_INTERFACE_CONTROL", 10003: "PRIV_NET_PRIVILEGED_NETWORK_STATISTICS", 10004: "PRIV_NET_PRIVILEGED_NECP_POLICIES", 10005: "PRIV_NET_RESTRICTED_AWDL", 10006: "PRIV_NET_PRIVILEGED_NECP_MATCH", 11000: "PRIV_NETINET_RESERVEDPORT", 14000: "PRIV_VFS_OPEN_BY_ID", } if arg in arg_types.keys(): return '"%s"' % (arg_types[arg]) else: return '%d' % arg def get_filter_arg_process_attribute(f, arg): """Convert integer to process attribute string.""" arg_types = { 0: 'is-plugin', 1: 'is-installer', 2: 'is-restricted', 3: 'is-initproc', } if arg in arg_types.keys(): return '%s' % (arg_types[arg]) else: return '%d' % arg def get_filter_arg_csr(f, arg): """Convert integer to csr string.""" arg_types = { 1: 'CSR_ALLOW_UNTRUSTED_KEXTS', 2: 'CSR_ALLOW_UNRESTRICTED_FS', 4: 'CSR_ALLOW_TASK_FOR_PID', 8: 'CSR_ALLOW_KERNEL_DEBUGGER', 16: 'CSR_ALLOW_APPLE_INTERNAL', 32: 'CSR_ALLOW_UNRESTRICTED_DTRACE', 64: 'CSR_ALLOW_UNRESTRICTED_NVRAM', 128: 'CSR_ALLOW_DEVICE_CONFIGURATION', } if arg in arg_types.keys(): return '"%s"' % (arg_types[arg]) else: return '%d' % arg def get_filter_arg_host_port(f, arg): """Convert integer to host special port string.""" arg_types = { 8: 'HOST_DYNAMIC_PAGER_PORT', 9: 'HOST_AUDIT_CONTROL_PORT', 10: 'HOST_USER_NOTIFICATION_PORT', 11: 'HOST_AUTOMOUNTD_PORT', 12: 'HOST_LOCKD_PORT', 13: 'unknown: 13', 14: 'HOST_SEATBELT_PORT', 15: 'HOST_KEXTD_PORT', 16: 'HOST_CHUD_PORT', 17: 'HOST_UNFREED_PORT', 18: 'HOST_AMFID_PORT', 19: 'HOST_GSSD_PORT', 20: 'HOST_TELEMETRY_PORT', 21: 'HOST_ATM_NOTIFICATION_PORT', 22: 'HOST_COALITION_PORT', 23: 'HOST_SYSDIAGNOSE_PORT', 24: 'HOST_XPC_EXCEPTION_PORT', 25: 'HOST_CONTAINERD_PORT', } if arg in arg_types.keys(): return '"%s"' % (arg_types[arg]) else: return '%d' % arg """An array (dictionary) of filter converting items A filter is identied by a filter id and a filter argument. They are both stored in binary format (numbers) inside the binary sandbox profile file. Each item in the dictionary is identied by the filter id (used in hexadecimal). The value of each item is the string form of the filter id and the callback function used to convert the binary form the filter argument to a string form. While there is a one-to-one mapping between the binary form and the string form of the filter id, that is not the case for the filter argument. To convert the binary form of the filter argument to its string form we use one of the callback functions above; almost all callback function names start with get_filter_arg_. """ def convert_filter_callback(f, ios_major_version_arg, keep_builtin_filters_arg, global_vars_arg, re_list, filter_id, filter_arg, base_addr_arg): """Convert filter from binary form to string. Binary form consists of filter id and filter argument: * filter id is the index inside the filters array above * filter argument is an actual parameter (such as a port number), a file offset or a regular expression index The string form consists of the name of the filter (as extracted from the filters array above) and a string representation of the filter argument. The string form of the filter argument if obtained from the binary form through the use of the callback function (as extracted frm the filters array above). Function arguments are: f: the binary sandbox profile file regex_list: list of regular expressions filter_id: the binary form of the filter id filter_arg: the binary form of the filter argument """ global regex_list global ios_major_version global keep_builtin_filters global global_vars global base_addr keep_builtin_filters = keep_builtin_filters_arg ios_major_version = ios_major_version_arg global_vars = global_vars_arg regex_list = re_list base_addr = base_addr_arg if not Filters.exists(ios_major_version, filter_id): logger.warn("filter_id {} not in keys".format(filter_id)) return (None, None) filter = Filters.get(ios_major_version, filter_id) if not filter["arg_process_fn"]: logger.warn("no function for filter {}".format(filter_id)) return (None, None) if filter["arg_process_fn"] == "get_filter_arg_string_by_offset_with_type": (append, result) = globals()[filter["arg_process_fn"]](f, filter_arg) if filter_id == 0x01 and append == "path": append = "subpath" if result == None and filter["name"] != "debug-mode": logger.warn("result of calling string offset for filter {} is none".format(filter_id)) return (None, None) return (filter["name"] + append, result) result = globals()[filter["arg_process_fn"]](f, filter_arg) if result == None and not ((filter["name"] in ["debug-mode", "syscall-mask", "machtrap-mask", "kernel-mig-routine-mask"]) or (filter["name"] in ["extension", "mach-extension"] and ios_major_version <= 5)): logger.warn("result of calling arg_process_fn for filter {} is none".format(filter_id)) return (None, None) return (filter["name"], result) ================================================ FILE: reverse-sandbox/sandbox_regex.py ================================================ #!/usr/bin/env python3 import struct import logging import logging.config logging.config.fileConfig("logger.config") logger = logging.getLogger(__name__) from regex_parser_v1 import RegexParser as RegexParserV1 from regex_parser_v2 import RegexParser as RegexParserV2 from regex_parser_v3 import RegexParser as RegexParserV3 class Node(): """Representation of a node inside a regex non-deterministic automaton The most important attribute is the node type, which may be any of the four macros TYPE_... below. """ TYPE_JUMP_FORWARD = 1 TYPE_JUMP_BACKWARD = 2 TYPE_CHARACTER = 3 TYPE_END = 4 FLAG_WHITE = 1 FLAG_GREY = 2 FLAG_BLACK = 3 name = "" type = None value = None flag = "white" def __init__(self, name=None, type=None, value=''): self.name = name self.type = type self.value = value self.flag = self.FLAG_WHITE def set_name(self, name): self.name = name def set_type_jump_forward(self): self.type = self.TYPE_JUMP_FORWARD def set_type_jump_backward(self): self.type = self.TYPE_JUMP_BACKWARD def set_type_character(self): self.type = self.TYPE_CHARACTER def set_type_end(self): self.type = self.TYPE_END def is_type_end(self): return self.type == self.TYPE_END def is_type_jump(self): return self.type == self.TYPE_JUMP_BACKWARD or self.type == self.TYPE_JUMP_FORWARD def is_type_jump_backward(self): return self.type == self.TYPE_JUMP_BACKWARD def is_type_jump_forward(self): return self.type == self.TYPE_JUMP_FORWARD def is_type_character(self): return self.type == self.TYPE_CHARACTER def set_value(self, value): self.value = value def set_flag_white(self): self.flag = self.FLAG_WHITE def set_flag_grey(self): self.flag = self.FLAG_GREY def set_flag_black(self): self.flag = self.FLAG_BLACK def __str__(self): if self.type == self.TYPE_JUMP_BACKWARD: return "(%s: jump backward)" % (self.name) elif self.type == self.TYPE_JUMP_FORWARD: return "(%s: jump forward)" % (self.name) elif self.type == self.TYPE_END: return "(%s: end)" % (self.name) else: return "(%s: %s)" % (self.name, self.value) class Graph(): """Representation of a regex NDA (Non-Deterministic Automaton) Use this class to convert a regex list of items into its canonical regular expression string. """ graph_dict = {} canon_graph_dict = {} node_list = [] start_node = None end_states = [] start_state = 0 regex = [] unified_regex = "" def __init__(self): self.graph_dict = {} def add_node(self, node, next_list=None): self.graph_dict[node] = next_list def has_node(self, node): return node in graph_dict.keys() def update_node(self, node, next_list): self.graph_dict[node] = next_list def add_new_next_to_node(self, node, next): self.graph_dict[node].append(next) def __str__(self): # Get maximum node number. max = -1 for node in self.graph_dict.keys(): if max < int(node.name): max = int(node.name) # Create graph list for ordered listing of nodes. graph_list = [None] * (max+1) for node in self.graph_dict.keys(): actual_string = str(node) + ":" for next_node in self.graph_dict[node]: actual_string += " " + str(next_node) graph_list[int(node.name)] = actual_string # Store node graph in ret_string. ret_string = "\n-- Node graph --\n" for s in graph_list: if s: ret_string += s + "\n" # Store canonical graph in ret_string. ret_string += "\n-- Canonical graph --\n" for state in self.canon_graph_dict.keys(): if state == self.start_state: ret_string += "> " elif state in self.end_states: ret_string += "# " else: ret_string += " " ret_string += "%d: %s\n" % (state, self.canon_graph_dict[state]) ret_string += "\n" return ret_string def get_node_for_idx(self, idx): if idx >= len(self.node_list) or idx < 0: return None return self.node_list[idx] def get_re_index_for_pos(self, regex_list, pos): for idx, item in enumerate(regex_list): if item["pos"] == pos: return idx for idx, item in enumerate(regex_list): if item["pos"]-1 == pos: return idx return -1 def get_next_idx_for_regex_item(self, regex_list, regex_item): result = self.get_re_index_for_pos(regex_list, regex_item["nextpos"]) assert(result >= 0) return result def fill_from_regex_list(self, regex_list): # First create list of nodes. No pointers/links at this point. # Create a node for each item. self.node_list = [] for idx, item in enumerate(regex_list): node = Node(name="%s" % (idx)) if item["type"] == "jump_backward": node.set_type_jump_backward() elif item["type"] == "jump_forward": node.set_type_jump_forward() elif item["type"] == "end": node.set_type_end() else: node.set_type_character() node.set_value(item["value"]) if 'start_node' in item and item['start_node'] == True: assert(self.start_node == None) self.start_node = node self.node_list.append(node) self.graph_dict = {} for idx, node in enumerate(self.node_list): # If node is end node ignore. if node.is_type_end(): self.graph_dict[node] = [] elif node.is_type_character(): next = self.get_node_for_idx( self.get_next_idx_for_regex_item(regex_list, regex_list[idx])) if next: self.graph_dict[node] = [ next ] else: self.graph_dict[node] = [] # Node is jump node. elif node.is_type_jump_backward(): next_idx = self.get_re_index_for_pos(regex_list, regex_list[idx]["value"]) next = self.get_node_for_idx(next_idx) if next: self.graph_dict[node] = [next] else: self.graph_dict[node] = [] elif node.is_type_jump_forward(): next_idx1 = self.get_next_idx_for_regex_item( regex_list, regex_list[idx]) next_idx2 = self.get_re_index_for_pos(regex_list, regex_list[idx]["value"]) next1 = self.get_node_for_idx(next_idx1) next2 = self.get_node_for_idx(next_idx2) self.graph_dict[node] = [] if next1: self.graph_dict[node].append(next1) if next2: self.graph_dict[node].append(next2) def get_character_nodes(self, node): node_list = [] for next in self.graph_dict[node]: if next.is_type_character() or next.is_type_end(): node_list.append(next) else: node_list = list(set(node_list).union(self.get_character_nodes(next))) return node_list def find_node_type_jump(self, current_node, node, backup_dict): if not current_node.is_type_jump(): return False if current_node == node: return True if not self.graph_dict[current_node]: return False for next_node in backup_dict[current_node]: if self.find_node_type_jump(next_node, node, backup_dict): return True return False def reduce(self): for node in self.graph_dict.keys(): if node.is_type_character(): self.graph_dict[node] = self.get_character_nodes(node) old_dict = dict(self.graph_dict) backup_dict = dict(self.graph_dict) for node in old_dict.keys(): if node.is_type_jump(): if self.find_node_type_jump(self.start_node, node, backup_dict): continue del self.graph_dict[node] def get_edges(self, node): edges = [] is_end_state = False for next in self.graph_dict[node]: if next.is_type_end(): is_end_state = True else: edges.append((next.value, int(next.name))) return is_end_state, edges def convert_to_canonical(self): self.end_states = [] for node in self.graph_dict.keys(): if node.is_type_end(): continue state_idx = int(node.name) is_end_state, self.canon_graph_dict[state_idx] = self.get_edges(node) if is_end_state == True: self.end_states.append(state_idx) for node in self.graph_dict.keys(): if node.name == "0": self.start_state = -1 self.canon_graph_dict[-1] = [ (node.value, 0) ] logger.debug(self.canon_graph_dict) logger.debug("end_states:") logger.debug(self.end_states) logger.debug("start_state:") logger.debug(self.start_state) def need_use_plus(self, initial_string, string_to_add): if not string_to_add.endswith("*"): return False if string_to_add.startswith("(") and string_to_add[-2:-1] == ")": actual_part = string_to_add[1:-2] else: actual_part = string_to_add[:-1] if initial_string.endswith(actual_part): return True if initial_string.endswith(string_to_add): return True return False def unify_two_strings(self, s1, s2): # Find largest common starting substring. lcss = "" for i in range(1, len(s1)+1): if s2.find(s1[:i], 0, i) != -1: lcss = s1[:i] if lcss: s1 = s1[len(lcss):] s2 = s2[len(lcss):] # Find largest common ending substring. lces = "" for i in range(1, len(s1)+1): if s2.find(s1[-i:], len(s2)-i, len(s2)) != -1: lces = s1[-i:] if lces: s1 = s1[:len(s1)-len(lces)] s2 = s2[:len(s2)-len(lces)] if not s1 and not s2: return lcss + lces if s1 and s2: return lcss + "(" + s1 + "|" + s2 + ")" + lces # Make s1 the empty string. if not s2: aux = s1 s1 = s2 s2 = aux if s2[-1] == '+': s2 = s2[:-1] + '*' else: if len(s2) > 1: s2 = "(" + s2 + ")?" else: s2 = s2 + '?' return lcss + s2 + lces def unify_strings(self, string_list): unified = "" if not string_list: return None if len(string_list) == 1: return string_list[0] # We now know we have multiple strings. Merge two at a time. current = string_list[0] for s in string_list[1:]: current = self.unify_two_strings(current, s) return current def remove_state(self, state_to_remove): itself_string = "" for (next_string, next_state) in self.canon_graph_dict[state_to_remove]: if next_state == state_to_remove: if len(next_string) > 1: itself_string = "(%s)*" % next_string else: itself_string = "%s*" % next_string # Create list of to_strings indexed by to_states. to_strings = {} for to_state in self.canon_graph_dict.keys(): to_strings[to_state] = [] if to_state == state_to_remove: continue for (iter_to_string, iter_to_state) in self.canon_graph_dict[state_to_remove]: if iter_to_state == to_state: to_strings[to_state].append(iter_to_string) # Unify multiple strings leading to the same to_state. unified_to_string = {} for to_state in to_strings.keys(): unified_to_string[to_state] = self.unify_strings(to_strings[to_state]) # Go through all graph edges. for from_state in self.canon_graph_dict.keys(): # Pass current state to remove. if from_state == state_to_remove: continue items_to_remove_list = [] for (next_string, next_state) in self.canon_graph_dict[from_state]: # Only if edge points to state_to_remove. if next_state != state_to_remove: continue # Plan edge to remove. Create new edge bypassing state_to_remove. items_to_remove_list.append((next_string, next_state)) for to_state in self.canon_graph_dict.keys(): if len(to_strings[to_state]) == 0: continue to_string = unified_to_string[to_state] #for (to_string, to_state) in self.canon_graph_dict[state_to_remove]: # # If state points to itself, do not add edge. # if to_state == state_to_remove: # continue # Add new edge, consider if state points to itself. if self.need_use_plus(next_string, itself_string): self.canon_graph_dict[from_state].append((next_string + "+" + to_string, to_state)) continue self.canon_graph_dict[from_state].append((next_string + itself_string + to_string, to_state)) for (next_string, next_state) in items_to_remove_list: self.canon_graph_dict[from_state].remove((next_string, next_state)) del self.canon_graph_dict[state_to_remove] def simplify(self): tmp_dict = dict(self.canon_graph_dict) for state in tmp_dict.keys(): if state != self.start_state and state not in self.end_states: self.remove_state(state) def combine_start_end_nodes(self): working_strings = self.canon_graph_dict[self.start_state] final_strings = [] string_added = True while string_added == True: string_added = False initial_strings = working_strings working_strings = [] for (start_string, start_next_state) in initial_strings: if not start_next_state in self.end_states: continue if self.canon_graph_dict[start_next_state]: for (next_string, next_state) in self.canon_graph_dict[start_next_state]: if next_state == start_next_state: next_string = "(%s)*" % next_string if self.need_use_plus(start_string, next_string): final_strings.append((start_string + "+", None)) else: final_strings.append((start_string + next_string, None)) else: final_strings.append((start_string + next_string, None)) working_strings.append((start_string + next_string, next_state)) else: final_strings.append((start_string, None)) string_added = True self.regex = [x[0] for x in final_strings] self.unified_regex = self.unify_strings(self.regex) def create_regex_list(re): """Convert binary regex to list of items. Each item stores character position inside the binary regex (useful for jumps), character type and the value (either character or jump offset). """ regex_list = [] version = struct.unpack('>I', ''.join([chr(x) for x in re[:4]]))[0] logger.debug("re.version: 0x%x", version) i = 4 if version == 1: RegexParserV1.parse(re, i, regex_list) elif version == 2: RegexParserV2.parse(re, i, regex_list) elif version == 3: RegexParserV3.parse(re, i, regex_list) else: logger.critical("No parser available for regex version {:x}".format(version)) return regex_list def parse_regex(re): """Parse binary form for regular expression into canonical string. The input binary format is the one stored in the sandbox profile file. The out format is a canonical regular expression string using standard ASCII characters and metacharacters such as ^, $, +, *, etc. """ regex_list = create_regex_list(re) g = Graph() g.fill_from_regex_list(regex_list) g.reduce() g.convert_to_canonical() g.simplify() g.combine_start_end_nodes() logger.debug(g) return g.regex import sys import struct def main(): """Parse regular expressions in binary file.""" if len(sys.argv) != 2: print >> sys.stderr, "Usage: %s " % (sys.argv[0]) sys.exit(1) with open(sys.argv[1]) as f: re_count = struct.unpack("