[
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\r\n__pycache__/\r\n*.py[cod]\r\n*$py.class\r\n\r\n# C extensions\r\n*.so\r\n\r\n# Distribution / packaging\r\n.Python\r\nbuild/\r\ndevelop-eggs/\r\ndist/\r\ndownloads/\r\neggs/\r\n.eggs/\r\nlib/\r\nlib64/\r\nparts/\r\nsdist/\r\nvar/\r\nwheels/\r\n*.egg-info/\r\n.installed.cfg\r\n*.egg\r\nMANIFEST\r\n\r\n# PyInstaller\r\n#  Usually these files are written by a python script from a template\r\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\r\n*.manifest\r\n*.spec\r\n\r\n# Installer logs\r\npip-log.txt\r\npip-delete-this-directory.txt\r\n\r\n# Unit test / coverage reports\r\nhtmlcov/\r\n.tox/\r\n.coverage\r\n.coverage.*\r\n.cache\r\nnosetests.xml\r\ncoverage.xml\r\n*.cover\r\n.hypothesis/\r\n.pytest_cache/\r\n\r\n# Translations\r\n*.mo\r\n*.pot\r\n\r\n# Django stuff:\r\n*.log\r\nlocal_settings.py\r\ndb.sqlite3\r\n\r\n# Flask stuff:\r\ninstance/\r\n.webassets-cache\r\n\r\n# Scrapy stuff:\r\n.scrapy\r\n\r\n# Sphinx documentation\r\ndocs/_build/\r\n\r\n# PyBuilder\r\ntarget/\r\n\r\n# Jupyter Notebook\r\n.ipynb_checkpoints\r\n\r\n# pyenv\r\n.python-version\r\n\r\n# celery beat schedule file\r\ncelerybeat-schedule\r\n\r\n# SageMath parsed files\r\n*.sage.py\r\n\r\n# Environments\r\n.env\r\n.venv\r\nenv/\r\nvenv/\r\nENV/\r\nenv.bak/\r\nvenv.bak/\r\n\r\n# Spyder project settings\r\n.spyderproject\r\n.spyproject\r\n\r\n# Rope project settings\r\n.ropeproject\r\n\r\n# mkdocs documentation\r\n/site\r\n\r\n# mypy\r\n.mypy_cache/\r\n"
  },
  {
    "path": "ADVobfuscator/README.org",
    "content": "* IDAPython script deobfuscating ADVobfuscator strings, applied to a TrickBoot sample\n\nThe script requires [[https://github.com/fireeye/flare-emu][flare-emu]].\nThe tested sample is [[https://www.virustotal.com/gui/file/c1f1bc58456cff7413d7234e348d47a8acfdc9d019ae7a4aba1afc1b3ed55ffa/details][491115422a6b94dc952982e6914adc39]] (TrickBot's UEFI firmware reconnaissance module called \"TrickBoot\").\n\nNote: We may not be able to reuse it for a different sample that was compiled with a different compiler or with different flags but I think the same approach (decoder function pattern matching + emulation) can be applied.\n\nA result example:\n\n#+BEGIN_SRC \n[*] 0x1000a124: xor2-encoded function detected (size = 0x2f)\n[*] 0x1000b92c: emulating from 0x1000b71b to 0x1000b92c\n[+] 0x1000b92c: uefi_expl_port_writeDeviceIoControl() ERROR %d\n#+END_SRC\n\n[[./img/adv_result.png]]\n\n** Reference\n\n- https://github.com/andrivet/ADVobfuscator\n- https://eclypsium.com/2020/12/03/trickbot-now-offers-trickboot-persist-brick-profit/\n- [[http://antonioparata.blogspot.com/2020/06/deobfuscating-c-advobfuscator-with.html]]\n\n\n\n"
  },
  {
    "path": "ADVobfuscator/idapy3_ADVobfuscator_deob.py",
    "content": "# idapy3_ADVobfuscator_deob.py - IDAPython script deobfuscating ADVobfuscator strings, applied to a TrickBoot sample\n# Takahiro Haruyama (@cci_forensics)\n\nfrom idc import *\nfrom idautils import *\nimport idaapi\n\ntry:\n    import flare_emu\nexcept ImportError as e:\n    print((\"Could not import flare_emu: {}\\nExiting.\".format(e.message)))\n    raise\n\nimport re, unicorn\n'''\ndec \n\n\n'''\ng_pat_sub = re.compile(rb'^\\x33\\xD2\\x8A\\x04\\x0A\\x0F\\xBE\\xC0\\x83\\xE8(.)\\x88\\x04\\x0A\\x42\\x83\\xFA(.)\\x72\\xEE\\x8B\\xC1\\xC3$', re.DOTALL)\ng_pat_xor1 = re.compile(rb'^\\x53\\x55\\x56\\x57\\x8b\\xf9\\x6a(.)\\x5d\\x8d\\x47\\x04\\x8a\\x10\\x0f\\xbe\\x37\\x0f\\xbe\\xca\\x33\\xce\\x88\\x08\\x40\\x83\\xed\\x01\\x75\\xee\\xc6\\x47.\\x00\\x8d\\x47\\x04\\x5f\\x5e\\x5d\\x5b\\xc3$', re.DOTALL)\ng_pat_xor2 = re.compile(rb'^\\x53\\x56\\x57\\x8b\\xf1\\x33\\xdb\\x8a\\x54\\x1e\\x04\\x8b\\x06\\x02\\xc3\\x0f\\xbe\\xca\\x33\\xc1\\x88\\x44\\x1e\\x04\\x43\\x83\\xfb(.)\\x72\\xe9\\x5f\\xc6\\x46.\\x00\\x8d\\x46\\x04\\x5e\\x5b\\xc3$', re.DOTALL)\ng_pat_dec = re.compile(rb'^\\x33\\xd2\\x8a\\x04\\x0a\\x0f\\xbe\\xc0\\x48\\x88\\x04\\x0a\\x42\\x83\\xfa(.)\\x72\\xf0\\x8b\\xc1\\xc3$', re.DOTALL)\ng_pats = {\n    'sub': g_pat_sub,\n    'xor1': g_pat_xor1,\n    'xor2': g_pat_xor2,\n    'dec': g_pat_dec,\n}\n\ndef info(msg):\n    print((\"[*] {}\".format(msg)))\n\ndef success(msg):\n    print((\"[+] {}\".format(msg)))\n\ndef error(msg):\n    print((\"[!] {}\".format(msg)))\n\ndef set_decomplier_cmt(ea, cmt):\n    try:\n        cfunc = idaapi.decompile(ea)\n        tl = idaapi.treeloc_t()\n        tl.ea = ea\n        tl.itp = idaapi.ITP_SEMI\n        if cfunc:\n          cfunc.set_user_cmt(tl, cmt)\n          cfunc.save_user_cmts()\n        else:\n          error(\"Decompile failed: {:#x}\".format(ea))\n    except:\n        error(\"Decompile failed: {:#x}\".format(ea))\n\ndef add_bookmark(ea, comment):\n    last_free_idx = -1\n    for i in range(0, 1024):\n        slot_ea = get_bookmark(i)\n        if slot_ea == BADADDR or slot_ea == ea:\n            # empty slot found or overwrite existing one\n            last_free_idx = i\n            break\n    # Check Empty Slot\n    if last_free_idx < 0:\n        return False\n    # Register Slot\n    put_bookmark(ea, 0, 0, 0, last_free_idx, comment)\n    return True\n\ndef get_emu_range(ea):\n    func = idaapi.get_func(ea)\n    if func is None:\n        return None, None\n\n    for bb in idaapi.FlowChart(func):\n        if bb.start_ea <= ea <= bb.end_ea:            \n            #return bb.start_ea, next_head(ea) # \n            return bb.start_ea, ea\n    return None, None\n\n# enable a step into emulation for the decoder (disabled)\ndef call_hook(address, argv, funcName, userData):\n    if funcName == userData[\"dec_fn_name\"]:\n        #print('dec_fn detected')\n        userData['skipCalls'] = False\n    else:\n        userData['skipCalls'] = True\n\n# validate the emulation result, based on the encoded buf ptr (disabled)\ndef inst_hook(uc, address, size, userData):\n    #info('instr_hook {:#x}'.format(address))\n    if address == userData['ref']:\n        eh = userData[\"EmuHelper\"]\n        try:\n            pc = uc.reg_read(eh.regs[\"pc\"])\n            enc_ea = uc.reg_read(eh.regs[\"ecx\"])\n            info('pc = {:#x}, address = {:#x}), enc_ea = {:#x}'.format(pc, address, enc_ea))\n            userData[\"enc_ea\"] = enc_ea\n        except unicorn.UcError as e:\n            error(\"emulation error: {}\".format(str(e)))\n    elif address == userData['end'] and userData.get('enc_ea'):\n        eh = userData[\"EmuHelper\"]\n        try:\n            pc = uc.reg_read(eh.regs[\"pc\"])\n            if userData[\"dec_fn_name\"].find('sub') != -1:\n                dec = uc.mem_read(userData[\"enc_ea\"], userData['size'])\n            else: # xor\n                dec = uc.mem_read(userData[\"enc_ea\"] + 4, userData['size'])\n            success('{:#x}: {}'.format(userData['ref'], dec))\n            \n        except unicorn.UcError as e:\n            error(\"emulation error: {}\".format(str(e)))\n\ndef emulate(pname, eh, dec_fn, size, key):\n    cnt = 0\n    \n    refs = CodeRefsTo(dec_fn, False)\n    for ref in refs:\n        if GetMnem(ref) == 'call':\n            start, end = get_emu_range(ref)\n            \n            if start and end:\n                info('{:#x}: emulating from {:#x} to {:#x}'.format(ref, start, end))\n                userData = {\n                    'dec_fn_name': get_name(dec_fn),\n                    'start': start,\n                    'end': end,\n                    'ref': ref,\n                    'size': size,\n                }\n                try:\n                    #eh.emulateRange(start, endAddr=end, callHook=call_hook, instructionHook=inst_hook, hookData=userData)\n                    #eh.emulateRange(start, endAddr=end, callHook=call_hook, hookData=userData)\n                    eh.emulateRange(start, endAddr=end)\n                    \n                    pc = eh.uc.reg_read(eh.regs[\"pc\"])\n                    ea = eh.uc.reg_read(eh.regs[\"ecx\"])\n                    if pname == 'sub':\n                        enc = eh.uc.mem_read(ea, size)\n                        #info('key = {:#x}, enc = {}'.format(key, enc))\n                        dec = bytes([(x - key) & 0xff for x in enc]).decode()\n                    elif pname == 'dec':\n                        enc = eh.uc.mem_read(ea, size)\n                        dec = bytes([(x - 1) & 0xff for x in enc]).decode()\n                    else:\n                        key = eh.uc.mem_read(ea, 4)[0]\n                        enc = eh.uc.mem_read(ea + 4, size)\n                        #info('key = {:#x}, enc = {}'.format(key, enc))\n                        if pname == 'xor1':\n                            dec = bytes([x ^ key for x in enc]).decode()\n                        else: # xor2\n                            dec = bytes([x ^ (key + i) for i, x in enumerate(enc)]).decode()\n\n                    # to obtain the step into emulation (disabled)\n                    #dec_ea = eh.uc.reg_read(eh.regs[\"eax\"])\n                    #info('{:#x}: dec_ea = {:#x}'.format(pc, dec_ea))\n                    #dec = eh.uc.mem_read(dec_ea, size)\n                    \n                    success('{:#x}: {}'.format(ref, dec))\n                    MakeComm(ref, dec)\n                    set_decomplier_cmt(ref, dec)\n                    add_bookmark(ref, 'decoded: {}'.format(dec))\n                    cnt += 1\n                    \n                except unicorn.UcError as e:\n                    pc = eh.uc.reg_read(eh.regs[\"pc\"])\n                    error(\"{:#x}: {} when reading {:#x}\".format(pc, str(e), ea))\n                    \n                finally:\n                    eh.resetEmulatorHeapAndStack()\n\n    return cnt\n\ndef main():\n    info('start')\n    eh = flare_emu.EmuHelper()\n\n    # search the decoding functions\n    cnts = {}\n    for fva in Functions():\n        #if fva != 0x1000A19F:\n        #    continue\n        if idc.get_func_flags(fva) & (idc.FUNC_LIB | idc.FUNC_THUNK):\n            continue\n\n        size = 0\n        fn_bytes = idc.get_bytes(fva, get_func_attr(fva, FUNCATTR_END) - fva)\n\n        for pname, pat in g_pats.items():\n            m = pat.search(fn_bytes)\n            if m:\n                try:\n                    if pname == 'sub':\n                        key = int.from_bytes(m.group(1), 'little')\n                        size = int.from_bytes(m.group(2), 'little')\n                    else:\n                        key = None\n                        size = int.from_bytes(m.group(1), 'little')\n                except ValueError:\n                    pass\n                else:\n                    print('\\n')\n                    info('{:#x}: {}-encoded function detected (size = {:#x})'.format(fva, pname, size))\n                    idaapi.do_name_anyway(fva, 'fn_ADVobfuscator_decode_{}_len{}'.format(pname, size))\n                    \n                    cnt = emulate(pname, eh, fva, size, key)\n                    if cnts.get(pname):\n                        cnts[pname] += cnt\n                    else:\n                        cnts[pname] = cnt\n                    break\n\n    info('number of decoded strings: {}'.format(cnts))\n    info('done')    \n\nif __name__ == '__main__':\n    main()\n    \n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.org",
    "content": "#+OPTIONS: ^:{}\n* ida_haru\n\nScripts/plugins for IDA Pro\n\nNote: Old scripts don't work for IDA 8.x, but I leave them just for reference. \n\n** eset_crackme\n\nIDA Pro loader/processor modules for ESET CrackMe driver VM\n\n** stackstring_static\n\nIDAPython script statically-recovering strings constructed in stack\n\n** fn_fuzzy\n\nIDAPython script for fast multiple binary diffing triage\n\n** bindiff\n\npython script for multiple binary diffing by BinDiff\n\n** ADVobfuscator\n\nIDAPython script deobfuscating ADVobfuscator strings, applied to a TrickBoot sample\n\n** HexRaysDeob\n\nmodified version for defeating APT10 ANEL's code obfuscations (located in a [[https://github.com/carbonblack/HexRaysDeob][corporate github repository]])\n\n** callstrings\n\nscripts for defeating \"polymorphic stack strings\" obfuscation used by Hodur sample\n"
  },
  {
    "path": "bindiff/README.org",
    "content": "#+OPTIONS: ^:{}\n\n#+TITLE: BinDiff wrapper script for multiple binary diffing\n\n* Purpose\n\nmultiple binary diffing up to 100 samples ([[https://github.com/TakahiroHaruyama/ida_haru/tree/master/fn_fuzzy][fn_fuzzy]] is better for more samples)\n\n* Requirements\n\n- IDA 7.6 and BinDiff 6\n- python packages: pefile macholib pyelftools python-idb prettytable\n\n* How to Use\n\nBefore using it, you have to edit the paths for executables/scripts in bindiff.py.\n#+BEGIN_SRC \n# paths (should be edited)\ng_out_dir = r'Z:\\haru\\analysis\\tics\\bindiff_db' \ng_ida_dir = r'C:\\work\\tool\\IDAx64'\ng_exp_path = r'Z:\\cloud\\gd\\python\\IDAPython\\ida_haru\\bindiff\\bindiff_export.idc'\ng_differ_path = r\"C:\\Program Files\\BinDiff\\bin\\bindiff.exe\"\n#g_differ_path = r'C:\\Program Files (x86)\\zynamics\\BinDiff 4.2\\bin\\differ64.exe'\ng_save_fname_path = r'Z:\\cloud\\gd\\python\\IDAPython\\ida_haru\\bindiff\\save_func_names.py'\n#+END_SRC\n\nYou can check the command line options by -h or --help.\n#+BEGIN_EXAMPLE\nZ:\\cloud\\gd\\work\\python\\IDAPython\\bindiff>python bindiff.py -h\nusage: bindiff.py [-h] [--out_dir OUT_DIR] [--ws_th WS_TH] [--fs_th FS_TH] [--ins_th INS_TH] [--bb_th BB_TH] [--size_th SIZE_TH] [--func_regex FUNC_REGEX] [--debug]\n                  [--clear] [--noidb] [--use_pyidb]\n                  primary {1,m} ...\n\npositional arguments:\n  primary               primary binary to compare\n  {1,m}                 mode: 1, m\n    1                   BinDiff 1 to 1\n    m                   BinDiff 1 to many\n\noptional arguments:\n  -h, --help            show this help message and exit\n  --out_dir OUT_DIR, -o OUT_DIR\n                        output directory including .BinExport/.BinDiff (default: Z:\\haru\\analysis\\tics\\bindiff_db)\n  --ws_th WS_TH, -w WS_TH\n                        whole binary similarity threshold (default: 0.2)\n  --fs_th FS_TH, -f FS_TH\n                        function similarity threshold (default: 0.8)\n  --ins_th INS_TH, -i INS_TH\n                        instruction threshold (default: 30)\n  --bb_th BB_TH, -b BB_TH\n                        basic block threshold (default: 1)\n  --size_th SIZE_TH, -s SIZE_TH\n                        file size threshold (MB) (default: 10)\n  --func_regex FUNC_REGEX, -e FUNC_REGEX\n                        function name regex to reduce noise (default: sub_|fn_|chg_)\n  --debug, -d           print debug output (default: False)\n  --clear, -c           clear .BinExport, .BinDiff and function name cache (default: False)\n  --noidb, -n           skip a secondary binary without idb (default: False)\n  --use_pyidb           use python-idb (default: False)\n#+END_EXAMPLE\n\nThere are 2 modes. One is \"1 to 1\" mode, the other is \"1 to many\" mode.\n\n** \"1 to 1\" mode example\n\nIn \"1 to 1\" mode, we should specify executable file paths for primary and secondary targets.\n\n#+BEGIN_EXAMPLE\nZ:\\cloud\\gd\\work\\python\\IDAPython\\bindiff>python bindiff.py Z:\\haru\\analysis\\tics\\hoge\\[redacted]_worker_fixed\n1 Z:\\haru\\analysis\\tics\\hoge\\samples\\checked\\[redacted]c2f05\n---------------------------------------------\n[*] BinDiff result\n[*] elapsed time = 0.390000104904 sec, number of diffing = 1\n[*] primary binary: (([redacted]_worker_fixed))\n\n============== 1 high similar binaries (>0.2) ================\n+----------------+--------------------------------------+\n|   similarity   |           secondary binary           |\n+----------------+--------------------------------------+\n| 0.211967127395 | [redacted]c2f05                      |\n+----------------+--------------------------------------+\n---------------------------------------------\n#+END_EXAMPLE\n\n\"high similar binaries\" means some binaries are found with whole binary similarities. You can adjust the similarity by -w option.\n\n** \"1 to many\" mode example\n\nIn \"1 to many\" mode, we should specify an executable file path for a primary target and a folder path for secondary targets. We can specify to compare secondary binaries recursively (-r option).\n\n#+BEGIN_EXAMPLE\nZ:\\cloud\\gd\\work\\python\\IDAPython\\bindiff>python bindiff.py Z:\\haru\\analysis\\tics\\hoge\\samples\\attacker\\[redacted]_worker_fixed\nm Z:\\haru\\analysis\\tics\\hoge\\samples\\tmp\n---------------------------------------------\n[*] BinDiff result\n[*] elapsed time = 6.71900010109 sec, number of diffing = 3\n[*] primary binary: (([redacted]_worker_fixed))\n\n============== 10 high similar functions (>0.8), except high similar binaries ================\n+----------------+--------------+--------------------------------+----------------+----------------------------------+-----------------+\n|   similarity   | primary addr |          primary name          | secondary addr |          secondary name          |secondary binary |\n+----------------+--------------+--------------------------------+----------------+----------------------------------+-----------------+\n|      1.0       | 0x180067720  |       Virt_sub_180067720       |  0x180004c30   |          sub_180004c30           | [redacted]e6504 |\n|      1.0       | 0x1800674b0  |         sub_1800674b0          |  0x180004930   |          sub_180004930           | [redacted]e6504 |\n|      1.0       | 0x1800673a0  | chg_peparse_Virt_sub_1800673A0 |  0x180004820   |          sub_180004820           | [redacted]e6504 |\n|      1.0       | 0x1800672b0  |       Virt_sub_1800672B0       |  0x180004730   |          sub_180004730           | [redacted]e6504 |\n|      1.0       | 0x18005fd84  |         sub_18005fd84          |  0x13f69af94   |          sub_13f69af94           | [redacted]fb841 |\n|      1.0       | 0x18005fd84  |         sub_18005fd84          |  0x180012648   |         __crtMessageBoxW         | [redacted]e6504 |\n|      1.0       | 0x180050f30  |         sub_180050f30          |  0x1800019f0   | ?erase@?$basic_string@DU?$char_t | [redacted]e6504 |\n| 0.98987073046  | 0x1800677e0  | chg_peparse_Virt_sub_1800677E0 |  0x180004cf0   |          sub_180004cf0           | [redacted]e6504 |\n| 0.963708558784 | 0x180067560  |         sub_180067560          |  0x1800049e0   |          sub_1800049e0           | [redacted]e6504 |\n| 0.946399194338 | 0x180018780  |    chg_rotate_sub_180018780    |  0x140004360   |          sub_140004360           | [redacted]92023 |\n+----------------+--------------+--------------------------------+----------------+----------------------------------+-----------------+\n---------------------------------------------\n#+END_EXAMPLE\n\"high similar functions\" means some functions are found with function similarities though they have lower whole binary similarities than the threshold. You can ajust the similarity by -f option.\n\nThe function similarity result is very noisy so library/thunk functions are filtered out by the script. Additionally, we can specify the number of instructions/basic blocks, file size, and so on to reduce the noise.\n\nAnd by default, the script newly creates idbs for the target binaries if not found. If you want to only compare existing idbs, please specify -n.\n\n* Notes\n\n- If you can't get the function similarities correctly, adjust the function similarity threshold (--fs_th), instruction threshold (--ins_th), basic block threshold (--bb_th) and function name filter rule (--func_regex) options. The script excludes the matches of small codes because function similarity results of multiple binaries are noisy.\n\n- BinDiff 5.0 and later contains a [[https://issuetracker.google.com/issues/129600738][bug]] that we can't load existing .BinDiff files and import symbols/comments due to missing .BinExport files. I hope it will be fixed someday. \n\n- python-idb doesn't work for IDA 7.6 IDBs. So by default it's not used (enable --use_pyidb option if needed).\n\n\n"
  },
  {
    "path": "bindiff/bindiff.py",
    "content": "# bindiff.py - BinDiff wrapper script for multiple binary diffing\n# Takahiro Haruyama (@cci_forensics)\n\nimport argparse, subprocess, os, sqlite3, time, pickle, re, multiprocessing, sys, struct, logging\nfrom prettytable import PrettyTable\nimport pefile\nfrom macholib.MachO import MachO\nfrom macholib.mach_o import *\nfrom elftools.elf.elffile import ELFFile\nimport idb\n\nlogging.basicConfig(level=logging.ERROR) # to suppress python-idb warning\n\n# paths (should be edited)\n# Windows \n#g_out_dir = r'C:\\analysisw\\tmp\\bindiff'\n#g_ida_dir = r'C:\\analysisw\\tool\\IDA'\n#g_differ_path = r\"C:\\Program Files\\BinDiff\\bin\\bindiff.exe\"\n# MacOS\ng_out_dir = r'/Users/haru/analysis/tmp/bindiff'\n#g_ida_dir = r'/Applications/IDA/ida.app/Contents/MacOS'\ng_ida32_path = r'/Applications/IDA/ida.app/Contents/MacOS/ida'\ng_ida64_path = r'/Applications/IDA/ida64.app/Contents/MacOS/ida64'\ng_differ_path = r\"/Applications/BinDiff/BinDiff.app/Contents/MacOS/bin/bindiff\"\ng_exp_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'bindiff_export.idc')\ng_save_fname_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'save_func_names_7x.py')\n\n# parameters\ng_ws_th = 0.15 # whole binary similarity threshold\ng_fs_th = 0.70 # function similarity threshold\ng_ins_th = 10 # instruction threshold\ng_bb_th = 0 # basic block threshold\ng_size_th = 10 # file size threshold (MB)\n#g_func_regex = r'sub_|fn_|chg_' # function name filter rule\ng_func_regex = r'.*' # function name filter rule\n\nclass LocalError(Exception): pass\nclass ProcExportError(LocalError): pass\nclass ProcDiffError(LocalError): pass\nclass LoadFuncNamesError(LocalError): pass\nclass FileNotFoundError(LocalError): pass\nclass ChildProcessError(LocalError): pass\n\nclass BinDiff(object):\n    \n    def __init__ (self, primary, out_dir, ws_th, fs_th, ins_th, bb_th, size_th, func_regex, debug=False, clear=False, newidb=False, use_pyidb=False):\n    #def __init__ (self, primary, out_dir, ws_th, fs_th, ins_th, bb_th, size_th, debug=False, clear=False, noidb=False, use_pyidb=False):        \n        self._debug = debug\n        self._clear = clear\n        self._newidb = newidb\n        self._lock = multiprocessing.Lock()        \n        self._primary = primary\n        self._ws_th = ws_th\n        self._fs_th = fs_th\n        self._ins_th = ins_th\n        self._bb_th = bb_th\n        self._size_th = size_th\n        self._out_dir = out_dir\n        self.use_pyidb = use_pyidb\n        \n        self._format, self._arch = self._get_machine_type(primary)\n        if self._format is None:\n            raise ProcExportError('primary binary should be PE/Mach-O/ELF'.format(primary))\n        self._dprint('primary binary format: {}'.format(self._format))\n        self._dprint('primary binary architecture: {}'.format(self._arch))\n        \n        self._ida_path = self._get_ida_path(self._arch)\n        res = self._files_not_found()\n        if res is not None:\n            raise FileNotFoundError('file is not found: {}'.format(res))\n        self._dprint('IDA binary path for primary: {}'.format(self._ida_path))\n        \n        if self._make_BinExport(self._primary, self._ida_path) != 0:\n            raise ProcExportError('primary BinExport failed: {}'.format(primary))\n\n        if self.use_pyidb:\n            idb_path = self._get_idb_path(primary, self._arch)\n            self._func_names = self._load_func_names_pyidb(idb_path)\n        else:\n            self._func_p = re.compile(func_regex)\n            self._func_regex = func_regex\n            self._func_names = self._load_func_names_default(func_regex, primary,\n                                                             self._ida_path)\n        \n        self._high_ws = {}\n        self._high_fs = {}\n        self._diff_cnt = 0\n\n    def _dprint(self, msg):\n        if self._debug:\n            self._lock.acquire()            \n            print('[+] [{}]: {}'.format(os.getpid(), msg))\n            self._lock.release()\n\n    def _get_machine_type(self, path):\n        try:\n            pe = pefile.PE(path)\n            format_ = 'PE'\n            if pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine].find('I386') != -1:\n                arch = '32-bit'\n            else:\n                arch = '64-bit'\n        except (pefile.PEFormatError,KeyError) as detail:\n            try:\n                self._dprint(detail)\n                m = MachO(path)\n                format_ = 'Mach-O'\n                for header in m.headers:\n                    if CPU_TYPE_NAMES.get(header.header.cputype,header.header.cputype) == 'x86_64':\n                    #if header.MH_MAGIC == MH_MAGIC_64:\n                        arch = '64-bit'\n                    else:\n                        arch = '32-bit'\n            except:\n                try:\n                    elffile = ELFFile(open(path, 'rb'))\n                    format_ = 'ELF'\n                    e_ident = elffile.header['e_ident']\n                    if e_ident['EI_CLASS'] == 'ELFCLASS64':\n                        arch = '64-bit'\n                    else:\n                        arch = '32-bit'\n                except:                    \n                    return None, None\n                    #format_ = 'shellcode'\n                    #arch = '32-bit' # 32-bit fixed\n        return format_, arch\n\n    def _files_not_found(self):\n        #for path in (self._ida_path, g_exp_path, g_save_fname_path, g_differ_path):\n        for path in (self._ida_path, g_exp_path, g_differ_path):\n            if not os.path.isfile(path):\n                return path\n        return None\n\n    def _get_db_path_noext(self, target):\n        return os.path.join(self._out_dir, os.path.splitext(os.path.basename(target))[0])\n        #return os.path.join(self._out_dir, os.path.basename(target))\n\n    def _get_idb_path(self, target, arch):\n        db_ext = '.idb' if arch == '32-bit' else '.i64'\n        target_split = os.path.splitext(target)[0]\n        \n        if os.path.exists(target_split + db_ext):\n            return target_split + db_ext\n        else:\n            return target + db_ext # for recent IDA versions\n\n    def _get_ida_path(self, arch):\n        #idaq = 'idaq.exe' if arch == '32-bit' else 'idaq64.exe'\n        #idaq = g_ida32_name if arch == '32-bit' else g_ida64_name\n        #return os.path.join(g_ida_dir, idaq)\n        return g_ida32_path if arch == '32-bit' else g_ida64_path\n\n    def _load_func_names_pyidb(self, idb_path): # exlcude library/thunk functions\n        pickle_path = os.path.splitext(os.path.join(self._out_dir, os.path.basename(idb_path)))[0] + '_func_names.pickle'\n        if self._clear or not os.path.exists(pickle_path):        \n            func_names = {}        \n            with idb.from_file(idb_path) as db:\n                api = idb.IDAPython(db)\n                for ea in api.idautils.Functions(api.idc.MinEA(), api.idc.MaxEA()):\n                    flags = api.idc.GetFunctionFlags(ea)\n                    if flags & api.ida_funcs.FUNC_LIB or flags & api.ida_funcs.FUNC_THUNK:\n                        continue\n                    func_name = api.idc.GetFunctionName(ea)\n                    func_names[ea] = func_name\n            with open(pickle_path, 'wb') as f:\n                pickle.dump(func_names, f)\n\n        with open(pickle_path, 'rb') as f:\n            self._dprint('function names loaded: {}'.format(idb_path))\n            return pickle.load(f)\n                        \n    # default function without python-idb\n    def _load_func_names_default(self, func_regex, path, ida_path):\n        pickle_path = os.path.splitext(os.path.join(self._out_dir, os.path.basename(path)))[0] + '_func_names.pickle'\n        if self._clear or not os.path.exists(pickle_path):\n            cmd = [ida_path, '-A', '-S{}'.format(g_save_fname_path), '-Osave_func_names:{}:{}'.format(func_regex, pickle_path), path]\n            #cmd = [ida_path, '-S{}'.format(g_save_fname_path), '-Osave_func_names:{}:{}'.format(func_regex, pickle_path), path]\n\n            self._dprint('saving function names for {}'.format(path))\n            self._dprint(' '.join(cmd))\n            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n            stdout, stderr = proc.communicate()            \n            if proc.returncode != 0:\n                raise LoadFuncNamesError('function names saving failed: {}'.format(path))\n            \n        with open(pickle_path, 'rb') as f:\n            self._dprint('function names loaded: {}'.format(path))\n            return pickle.load(f)\n        \n        raise LoadFuncNamesError('function names loading failed: {}'.format(path))\n\n    def _make_BinExport(self, target, ida_path):\n        binexp_path = self._get_db_path_noext(target) + '.BinExport'\n        #binexp_path = os.path.splitext(target)[0] + '.BinExport'\n        if not self._clear and os.path.exists(binexp_path):\n            self._dprint('already existed BinExport: {}'.format(binexp_path))\n            return 0\n\n        #cmd = [ida_path, '-A', '-S{}'.format(g_exp_path), '-OExporterModule:{}'.format(binexp_path), target]  # the .BinExport filename should be specified in 4.3\n        #if self._debug:\n            #cmd = [ida_path, '-S{}'.format(g_exp_path), '-OBinExportModule:{}'.format(binexp_path), target]\n        #else:\n        cmd = [ida_path, '-A', '-S{}'.format(g_exp_path), '-OBinExportModule:{}'.format(binexp_path), target]\n        #print cmd\n        \n        self._dprint('getting BinExport for {}'.format(target))\n        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n        stdout, stderr = proc.communicate()\n        return proc.returncode\n\n    def _get_BinDiff_path(self, secondary):\n        primary_noext = self._get_db_path_noext(self._primary)\n        secondary_noext = os.path.splitext(secondary)[0]\n        return primary_noext + '_vs_' + os.path.basename(secondary_noext) + '.BinDiff'\n\n    def _make_BinDiff(self, secondary):\n        pri_binexp = self._get_db_path_noext(self._primary) + '.BinExport'\n        sec_binexp = self._get_db_path_noext(secondary) + '.BinExport'\n        #pri_binexp = os.path.splitext(self._primary)[0] + '.BinExport'\n        #sec_binexp = os.path.splitext(secondary)[0] + '.BinExport'\n        bindiff_path = self._get_BinDiff_path(secondary)\n        if not self._clear and os.path.exists(bindiff_path):\n            self._dprint('already existed BinDiff: {}'.format(bindiff_path))\n            return 0, None            \n        \n        cmd = [g_differ_path, '--primary={}'.format(pri_binexp), '--secondary={}'.format(sec_binexp), '--output_dir={}'.format(self._out_dir)]\n        #print cmd\n        \n        self._dprint('diffing the binaries..')\n        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n        stdout, stderr = proc.communicate()\n        self._dprint('differ output:')\n        self._dprint(stdout)\n        self._dprint(stderr)\n        return proc.returncode, cmd\n\n    def is_skipped(self, secondary):\n        # file check (in case of the same dir)\n        #if os.path.splitext(self._primary)[0] == os.path.splitext(secondary)[0]:\n        if self._primary == secondary:\n            return True\n        \n        # target at executables\n        if os.path.splitext(secondary)[1] in ('.BinExport', '.BinDiff', '.idb', '.i64'):\n            return True\n        \n        # size check\n        if (os.path.getsize(secondary) >> 20) > self._size_th:\n            self._dprint('The size is bigger (skipped): {}'.format(secondary))\n            return True\n        \n        # format/arch check\n        format_, arch = self._get_machine_type(secondary)\n        if format_ is None:\n            return True\n        #elif format_ != self._format or arch != self._arch:\n        elif format_ != self._format: # only check the format \n            self._dprint('different executable format (skipped): {}'.format(secondary))\n            return True\n\n        # skip if idb not found\n        idb_path = self._get_idb_path(secondary, arch)\n        if not self._newidb and not os.path.exists(idb_path):\n            self._dprint('no existing idb (skipped): {}'.format(secondary))\n            return True\n        \n        return False\n\n    def check_similarity(self, secondary, q=None):\n        format_, arch = self._get_machine_type(secondary)\n        ida_path = self._get_ida_path(arch)\n        self._dprint('IDA binary path for secondary: {}'.format(ida_path))        \n        if self._make_BinExport(secondary, ida_path) != 0:\n            if q is not None:\n                q.put((None, None))            \n            raise ProcExportError('secondary BinExport failed: {}'.format(secondary))\n\n        retcode, cmd = self._make_BinDiff(secondary)\n        if retcode != 0:\n            if q is not None:\n                q.put((None, None))            \n            raise ProcDiffError('BinDiff failed: {}'.format(cmd))\n\n        conn = sqlite3.connect(self._get_BinDiff_path(secondary))\n        c = conn.cursor()\n        try:\n            c.execute(\"SELECT similarity,confidence FROM metadata\")\n        except sqlite3.OperationalError as detail:\n            print('[!] .BinDiff database ({}) is something wrong: {}'.format(self._get_BinDiff_path(secondary), detail))\n            return\n            \n        ws, wc = c.fetchone()\n        self._dprint('whole binary similarity={} confidence={}'.format(ws, wc))\n        c.execute(\"SELECT address1,address2,similarity,confidence FROM function WHERE similarity > ? and instructions > ? and basicblocks > ?\", (self._fs_th, self._ins_th, self._bb_th))\n        frows = c.fetchall()\n        self._dprint('{} similar functions detected'.format(len(frows)))\n        conn.close()\n\n        c_high_ws = {}\n        c_high_fs = {}\n        if ws > self._ws_th:\n            c_high_ws[secondary] = {'similarity':ws, 'confidence':wc}\n        elif frows:\n            if self.use_pyidb:\n                idb_path = self._get_idb_path(secondary, arch)\n                func_names = self._load_func_names_pyidb(idb_path)\n            else:\n                func_names = self._load_func_names_default(self._func_regex, secondary,\n                                                           ida_path)\n            for row in frows:\n                addr1, addr2, fs, fc = row\n                self._dprint('addr1={:#x}, addr2={:#x}, similarity={}, confidence={}'.format(addr1, addr2, fs, fc))\n                if addr1 in self._func_names and addr2 in func_names:\n                    c_high_fs[(addr1, self._func_names[addr1], addr2, func_names[addr2], secondary)] = {'similarity':fs, 'confidence':fc}\n            if not c_high_fs and not self._debug:\n                os.remove(self._get_BinDiff_path(secondary))\n        else:\n            if not self._debug:\n                os.remove(self._get_BinDiff_path(secondary))\n\n        #self._dprint(c_high_ws)\n        #self._dprint(c_high_fs)\n        if q is None:\n            self._high_ws = c_high_ws\n            self._high_fs = c_high_fs\n        else:\n            q.put((c_high_ws, c_high_fs))\n\n    def check_similarities(self, secondary_dir, recursively):\n        if recursively:\n            seconds = [os.path.join(root, file_) for root, dirs, files in os.walk(secondary_dir) for file_ in files]\n        else:\n            seconds = [os.path.join(secondary_dir, entry) for entry in os.listdir(secondary_dir) if os.path.isfile(os.path.join(secondary_dir, entry))]\n\n        procs = []            \n        for secondary in seconds:\n            if self.is_skipped(secondary):\n                continue\n            q = multiprocessing.Queue()\n            p = multiprocessing.Process(target=self.check_similarity, args=(secondary, q))\n            p.start()\n            procs.append((p,q))\n        self._diff_cnt = len(procs)\n        for p,q in procs:\n            c_high_ws, c_high_fs = q.get()\n            self._high_ws.update(c_high_ws)\n            self._high_fs.update(c_high_fs)\n            p.join()\n\n    def increment_count(self):\n        self._diff_cnt += 1\n    \n    def get_result(self):\n        return self._high_ws, self._high_fs, self._diff_cnt\n\n    \ndef main():    \n    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n    parser.add_argument('primary', help=\"primary binary to compare\")\n    parser.add_argument('--out_dir', '-o', default=g_out_dir, help=\"output directory including .BinExport/.BinDiff\")\n    parser.add_argument('--ws_th', '-w', type=float, default=g_ws_th, help=\"whole binary similarity threshold\")\n    parser.add_argument('--fs_th', '-f', type=float, default=g_fs_th, help=\"function similarity threshold\")\n    parser.add_argument('--ins_th', '-i', type=int, default=g_ins_th, help=\"instruction threshold\")\n    parser.add_argument('--bb_th', '-b', type=int, default=g_bb_th, help=\"basic block threshold\")    \n    parser.add_argument('--size_th', '-s', type=int, default=g_size_th, help=\"file size threshold (MB)\")\n    parser.add_argument('--func_regex', '-e', default=g_func_regex, help=\"function name regex to include in the result\")\n    parser.add_argument('--debug', '-d', action='store_true', help=\"print debug output\")\n    parser.add_argument('--clear', '-c', action='store_true', help=\"clear .BinExport, .BinDiff and function name cache\")\n    parser.add_argument('--newidb', '-n', action='store_true', help=\"create an idb for the secondary binary\")\n    parser.add_argument('--use_pyidb', action='store_true', help=\"use python-idb\")\n    \n    subparsers = parser.add_subparsers(dest='mode', help='mode: 1, m')\n    parser_1 = subparsers.add_parser('1', help='BinDiff 1 to 1')\n    parser_1.add_argument('secondary', help=\"secondary binary to compare\")    \n    parser_m = subparsers.add_parser('m', help='BinDiff 1 to many')\n    parser_m.add_argument('secondary_dir', help=\"secondary directory including binaries to compare\")\n    parser_m.add_argument('--recursively', '-r', action='store_true', help=\"getting binaries recursively\")\n\n    args = parser.parse_args()\n\n    high_ws = high_fs = None\n    if os.path.isfile(args.primary):\n        start = time.time()\n        try:\n            bd = BinDiff(args.primary, args.out_dir, args.ws_th, args.fs_th, args.ins_th, args.bb_th, args.size_th, args.func_regex, args.debug, args.clear, args.newidb, args.use_pyidb)\n            #bd = BinDiff(args.primary, args.out_dir, args.ws_th, args.fs_th, args.ins_th, args.bb_th, args.size_th, args.debug, args.clear, args.noidb, args.use_pyidb)\n            if args.mode == '1' and os.path.isfile(args.secondary):\n                if not bd.is_skipped(args.secondary):\n                    bd.check_similarity(args.secondary)\n                    bd.increment_count()\n            elif args.mode == 'm' and os.path.isdir(args.secondary_dir):\n                bd.check_similarities(args.secondary_dir, args.recursively)\n            high_ws, high_fs, cnt = bd.get_result()                \n        except LocalError as e:\n            print('[!] {} ({})'.format(str(e), type(e)))\n            return \n        elapsed = time.time() - start\n\n        print('---------------------------------------------')\n        print('[*] BinDiff result')\n        print('[*] elapsed time = {} sec, number of diffing = {}'.format(elapsed, cnt))\n        print('[*] primary binary: (({}))'.format(os.path.basename(args.primary)))\n        if high_ws:\n            print('\\n============== {} high similar binaries (>{}) ================'.format(len(high_ws), args.ws_th))\n            table = PrettyTable(['similarity', 'secondary binary'])\n            for path,res in sorted(list(high_ws.items()), key=lambda x:x[1]['similarity'], reverse=True):\n                table.add_row([res['similarity'], '(({}))'.format(os.path.basename(path))])\n            print(table)\n        if high_fs:\n            print('\\n============== {} high similar functions (>{}), except high similar binaries ================'.format(len(high_fs), args.fs_th))\n            table = PrettyTable(['similarity', 'primary addr', 'primary name', 'secondary addr', 'secondary name', 'secondary binary'])\n            for key,res in sorted(list(high_fs.items()), key=lambda x:(x[1]['similarity'], x[0][0]), reverse=True):\n                addr1, func_name1, addr2, func_name2, path = key\n                table.add_row([res['similarity'], '{:#x}'.format(addr1), func_name1[:0x20], '{:#x}'.format(addr2), func_name2[:0x20], '{}'.format(os.path.basename(path))])\n            print(table)\n        if (not high_ws) and (not high_fs):\n            print('\\nno similar binaries/functions found')\n        print('---------------------------------------------')\n        \nif ( __name__ == \"__main__\" ):\n    main()\n"
  },
  {
    "path": "bindiff/bindiff_export.idc",
    "content": "#include <idc.idc>\nstatic main()\n{\n    ChangeConfig(\"ABANDON_DATABASE=YES\");\n    Batch(0);\n    Wait();\n    //RunPlugin(\"binexport11\", 2 );\n    //Exit( 1 - RunPlugin(\"zynamics_binexport_9\", 2 ));\n    //Exit( 1 - RunPlugin(\"zynamics_binexport_8\", 2 ));\n    //Exit( 1 - RunPlugin(\"binexport10\", 2 ));\n    //Exit( 1 - RunPlugin(\"binexport11\", 2 ));\n    //RunPlugin(\"binexport12_ida\", 2 );\n    Exit( 1 - RunPlugin(\"binexport12_ida\", 2 ));\n}\n"
  },
  {
    "path": "bindiff/save_func_names.py",
    "content": "import os, pickle, re\n\ng_track_parent_th = 2 # parent function tracking level threshold\ng_parent_func_exclude_list = ['__NMSG_WRITE', '__fassign_l']\ng_pfe_list = [LocByName(p) for p in g_parent_func_exclude_list]\n\ndef get_pfuncs(ea, track_th):\n    pfuncs = [GetFunctionAttr(ref, FUNCATTR_START) for ref in CodeRefsTo(ea, False)]\n    track_th -= 1\n    if track_th > 0:\n        ppfuncs = [ppfunc for pfunc in pfuncs for ppfunc in get_pfuncs(pfunc, track_th)]\n        pfuncs.extend(ppfuncs)\n    return pfuncs\n\ndef main():    \n    #Wait()\n\n    # not change the database to maintain the window setting\n    process_config_line(\"ABANDON_DATABASE=YES\")\n\n    # -Odecomp:option1:option2:option3\n    options = idaapi.get_plugin_options(\"save_func_names\").split(':')\n    func_regex = options[0]\n    pickle_path = ':'.join(options[1:])\n    p = re.compile(func_regex)\n\n    func_names = {}\n    with open(pickle_path, 'wb') as f:\n        for ea in Functions(MinEA(), MaxEA()):\n            func_name = GetFunctionName(ea)\n            if p.search(func_name):\n                flags = GetFunctionFlags(ea)\n                if flags & FUNC_LIB or flags & FUNC_THUNK:\n                    continue\n                pfuncs = get_pfuncs(ea, g_track_parent_th)\n                if not (set(pfuncs) & set(g_pfe_list)):\n                    func_names[ea] = func_name\n        pickle.dump(func_names, f)\n\n    Exit(0)\n\n    #with open(os.path.splitext(GetIdbPath())[0] + '_func_names.pickle', 'rb') as f:\n    #    func_names = pickle.load(f)\n    #    print func_names\n\nif ( __name__ == \"__main__\" ):\n    main()\n\n\n"
  },
  {
    "path": "bindiff/save_func_names_7x.py",
    "content": "import os, pickle, re\r\nfrom idautils import *\r\n\r\ng_track_parent_th = 2 # parent function tracking level threshold\r\ng_parent_func_exclude_list = ['__NMSG_WRITE', '__fassign_l']\r\ng_pfe_list = [get_name_ea_simple(p) for p in g_parent_func_exclude_list]\r\n\r\ndef get_pfuncs(ea, track_th):\r\n    pfuncs = [get_func_attr(ref, FUNCATTR_START) for ref in CodeRefsTo(ea, False)]\r\n    track_th -= 1\r\n    if track_th > 0:\r\n        ppfuncs = [ppfunc for pfunc in pfuncs for ppfunc in get_pfuncs(pfunc, track_th)]\r\n        pfuncs.extend(ppfuncs)\r\n    return pfuncs\r\n\r\ndef main():    \r\n    #Wait()\r\n\r\n    # not change the database to maintain the window setting\r\n    process_config_line(\"ABANDON_DATABASE=YES\")\r\n\r\n    # -Odecomp:option1:option2:option3\r\n    options = idaapi.get_plugin_options(\"save_func_names\").split(':')\r\n    func_regex = options[0]\r\n    pickle_path = ':'.join(options[1:])\r\n    p = re.compile(func_regex)\r\n\r\n    func_names = {}\r\n    with open(pickle_path, 'wb') as f:\r\n        for ea in Functions(idaapi.cvar.inf.minEA, idaapi.cvar.inf.maxEA):\r\n            func_name = idc.get_func_name(ea)\r\n            if p.search(func_name):\r\n                flags = idc.get_func_attr(ea, FUNCATTR_FLAGS)\r\n                if flags & FUNC_LIB or flags & FUNC_THUNK:\r\n                    continue\r\n                pfuncs = get_pfuncs(ea, g_track_parent_th)\r\n                if not (set(pfuncs) & set(g_pfe_list)):\r\n                    func_names[ea] = func_name\r\n        pickle.dump(func_names, f)\r\n\r\n    ida_pro.qexit(0)\r\n\r\n    #with open(os.path.splitext(idc.get_idb_path())[0] + '_func_names.pickle', 'rb') as f:\r\n    #    func_names = pickle.load(f)\r\n    #    print func_names\r\n\r\nif ( __name__ == \"__main__\" ):\r\n    main()\r\n\r\n\r\n"
  },
  {
    "path": "callstrings/README.org",
    "content": "#+OPTIONS: ^:{}\n* callstrings - deobfuscating Hodur's global string encryption\n\n- Recover strings using various methods (static decoding, emulation, IDA debug hook)\n- Apply API function types to the local variable pointers\n\nThe script comparison is below:\n[[./img/comparison.png]]\n\n- As the comparison shows, ida_callstrings_dbg.py and ida_callstrings_flare_emu.py (except emulateSelection) can work for other malware.\n- As the reference slides say, it is recommended to use modified [[https://github.com/TakahiroHaruyama/flare-emu/tree/xorloop][flare-emu]] and [[https://github.com/TakahiroHaruyama/capa/tree/comment_insertion][CAPA]] to make ida_callstrings_flare_emu.py work better.\n\n** Reference\n\n- https://speakerdeck.com/takahiro_haruyama/the-art-of-malware-c2-scanning-how-to-reverse-and-emulate-protocol-obfuscated-by-compiler\n\n\n"
  },
  {
    "path": "callstrings/hexrays_utils.py",
    "content": "'''\nhexrays_utils.py - common classes/functions using Hex-Rays decompiler APIs\nTakahiro Haruyama (@cci_forensics)\n'''\n\n#from abc import ABCMeta, abstractmethod\n\nfrom idc import *\nimport idaapi, ida_ida, ida_ua, ida_typeinf, ida_kernwin\nfrom ida_hexrays import *\nfrom ida_allins import NN_callni, NN_call, NN_callfi\nimport idautils\nimport re\n\n# Global options/variables\ng_DEBUG = True\ng_CACHE = True\ng_ASCII_TYPES = ['CHAR *', 'CONST CHAR *', 'LPSTR', 'LPCSTR']\ng_UNICODE_TYPES = ['WCHAR *', 'CONST WCHAR *', 'LPWSTR', 'LPCWSTR']\ng_STR_TYPES = g_ASCII_TYPES + g_UNICODE_TYPES\ng_stub_GetProcAddress = 'fn_resolve_API_addr'\ng_RENAME_RETRY_CNT = 100\n\ndef info(msg):\n    print(\"\\033[34m\\033[1m[*]\\033[0m {}\".format(msg))\n\ndef success(msg):\n    print(\"\\033[32m\\033[1m[+]\\033[0m {}\".format(msg))\n    \ndef error(msg):\n    print(\"\\033[31m\\033[1m[!]\\033[0m {}\".format(msg))\n\ndef debug(msg):\n    if g_DEBUG:\n        print(\"\\033[33m\\033[1m[D]\\033[0m {}\".format(msg))\n\n\ndef extract_ascii(data):\n    pat = re.compile(rb'^(?:[\\x20-\\x7E]){2,}')\n    return list(set([w.decode('ascii') for w in pat.findall(data)]))\n\ndef extract_unicode(data):\n    pat = re.compile(r'^(?:[\\x20-\\x7E][\\x00]){2,}')\n    return list(set([w.decode('utf-16le') for w in pat.findall(data)]))\n\ndef get_ctree_root(ea, cache=True):\n    \n    cfunc = None\n    try:\n        if cache:\n            cfunc = decompile(ea)\n        else:\n            cfunc = decompile(ea, flags=DECOMP_NO_CACHE)        \n    except:\n        error('Decompilation of a function {:#x} failed'.format(ea))\n\n    return cfunc\n\n# Detect constant value used in string decoding\nclass cnt_val_finder_t(ctree_visitor_t):\n\n    def __init__(self):\n        \n        ctree_visitor_t.__init__(self, CV_FAST)\n\n        self.cst_val = None\n\n    def visit_expr(self, expr):\n\n        if expr.op == cot_asgxor and expr.y.op == cot_xor and expr.y.y.op == cot_num:\n            cst = expr.y.y.n._value\n            \n            if expr.y.x.op == cot_add:\n                expr_add = expr.y.x\n            elif expr.y.x.op == cot_cast and expr.y.x.x.op == cot_add:\n                expr_add = expr.y.x.x\n            else:\n                expr_add = None\n\n            if expr_add and expr_add.y.op == cot_num and \\\n                (expr_add.y.n._value == cst) and (0 < cst < 0xff):\n                success(f'{expr.ea:#x}: string decoding constant value {cst:#x} detected')\n                self.cst_val = cst\n                return 1\n            \n            # x ^ (y - 0x1d) ^ 0xe3 == x ^ (y + 0xe3) ^ 0xe3\n            if expr.y.x.op == cot_sub:\n                expr_sub = expr.y.x\n            elif expr.y.x.op == cot_cast and expr.y.x.x.op == cot_sub:\n                expr_sub = expr.y.x.x\n            else:\n                expr_sub = None\n\n            if expr_sub and expr_sub.y.op == cot_num and \\\n                (expr_sub.y.n._value + cst == 0x100) and (0 < cst < 0xff):\n                success(f'{expr.ea:#x}: string decoding constant value {cst:#x} detected')\n                self.cst_val = cst\n                return 1\n            \n        return 0\n    \n    def get_cnt_val(self):\n\n        return self.cst_val\n\n# Detect assignments when inserting comments\nclass asg_parent_finder_t(ctree_visitor_t):\n\n    def __init__(self, call_ea):\n        \n        ctree_visitor_t.__init__(self, CV_PARENTS)\n        self.call_ea = call_ea\n        self.asg_ea = BADADDR\n\n    def visit_expr(self, expr):\n\n        if expr.op == cot_asg and \\\n            ((expr.y.op == cot_call and expr.y.ea == self.call_ea) or \\\n             (expr.y.op == cot_cast and expr.y.x.op == cot_call and expr.y.x.ea == self.call_ea)):\n            self.asg_ea = expr.ea\n            info(f'{self.call_ea:#x}: assignment detected, replaced with the ea {self.asg_ea:#x}')\n            return 1\n        \n        return 0\n\n# Change type/name of the specified lvar name\nclass my_lvar_modifier_t(user_lvar_modifier_t):\n\n    def __init__(self, target_name, new_name=None, new_decl=None, new_tif=None):\n        \n        user_lvar_modifier_t.__init__(self)\n        self.target_name = target_name\n        self.new_name = new_name\n        self.new_decl = new_decl\n        self.new_tif = new_tif\n\n    def modify_lvars(self, lvars):\n\n        # Note: Variables without user-specified info are not present in lvvec\n        if len(lvars.lvvec) == 0:\n            error('modify_lvars: len(lvars.lvvec) == 0')\n\n        for idx, one in enumerate(lvars.lvvec):\n            debug('modify_lvars: target_name = \"{}\" current = \"{}\"'.format(self.target_name, one.name))\n\n            # Set the type to the target var\n            if one.name == self.target_name:\n                if self.new_name:\n                    one.name = self.new_name\n                    info('modify_lvars: Name \"{}\" set to {}'.format(one.name, self.target_name))\n\n                tif = None\n                if self.new_decl:                    \n                    tif = ida_typeinf.tinfo_t()\n                    res = ida_typeinf.parse_decl(tif, None, self.new_decl, 0)\n                    #if not res:\n                    #    error('{}: parse_decl from {} FAILED'.format(one.name, self.new_decl))\n                elif self.new_tif:\n                    tif = self.new_tif\n                if tif:\n                    one.type = tif\n                    info('modify_lvars: Type \"{}\" set to {}'.format(str(tif), one.name))\n\n                return True\n\n        return False\n\n#class HexRaysUtils(metaclass=ABCMeta):\nclass HexRaysUtils():\n\n    def __init__(self):\n\n        self.cmts = {}\n        self.call_eas = []\n\n    #@abstractmethod\n    def get_reg_value(self, reg_name):\n        raise NotImplementedError()\n    \n    #@abstractmethod\n    def get_ptr_value(self, ptr):\n        raise NotImplementedError()\n\n    #@abstractmethod\n    def get_string(self, ea, is_unicode=False):\n        raise NotImplementedError()\n\n    def get_bytes(self, ea):\n        raise NotImplementedError()\n\n    def get_fn_offset(self, ea):\n\n        func_ea = get_func_attr(ea, FUNCATTR_START)\n        return get_name(func_ea) + f'+{ea-func_ea:#x}'\n    '''\n    def set_decomplier_cmt(self, cfunc, ea, cmt):\n\n        tl = idaapi.treeloc_t()\n        tl.ea = ea\n        tl.itp = idaapi.ITP_SEMI\n        cfunc.set_user_cmt(tl, cmt)\n        cfunc.save_user_cmts()\n    '''\n    def set_decomplier_cmt(self, cfunc, ea, cmt):\n\n        # Prevent orphan comment issues in assignments\n        finder = asg_parent_finder_t(ea)\n        finder.apply_to_exprs(cfunc.body, None)\n        #print(f'{finder.asg_ea=:#x}')\n        cmt_ea = ea if finder.asg_ea == BADADDR else finder.asg_ea\n\n        tl = idaapi.treeloc_t()\n        tl.ea = cmt_ea\n        tl.itp = idaapi.ITP_SEMI\n\n        cfunc.set_user_cmt(tl, cmt)\n        cfunc.save_user_cmts()\n        cfunc.refresh_func_ctext()\n\n    # This function was ported from https://github.com/RolfRolles/Miscellaneous/blob/master/PrintTypeSignature.py\n    # If an indirect API call still has a cast after the var type is set, apply \"Force call type\" on the var in Pseudocode view\n    def GetTypeSignature(self, apiName):\n        \n        # Look up the prototype by name from the main TIL\n        o = ida_typeinf.get_named_type(None, apiName, ida_typeinf.NTF_SYMU)\n        \n        # Found?\n        if o is not None:\n            code, type_str, fields_str, cmt, field_cmts, sclass, value = o\n            \n            # Create a tinfo_t by deserializing the data returned above\n            t = ida_typeinf.tinfo_t()\n            if t.deserialize(None, type_str, fields_str, field_cmts):\n                \n                # And change the prototype into a function pointer\n                ptrType = ida_typeinf.tinfo_t()\n                ptrType.create_ptr(t)\n                return ptrType\n        \n        # On any failure, return None\n        return None\n\n    # IDA decompiler has no API forcing lvar name\n    def force_rename_lvar(self, ea, var, new_name):\n\n        func_ea = get_func_attr(ea, FUNCATTR_START)\n        debug('force_rename_lvar: function ea = {:#x}'.format(func_ea))\n        old_name = var.name\n        \n        if rename_lvar(func_ea, var.name, new_name):\n            info('force_rename_lvar {:#x}: lvar name changed \"{}\" ->  \"{}\"'.format(ea, old_name, new_name))\n            var.name = new_name # to refresh immediately\n            return\n                \n        for i in range(g_RENAME_RETRY_CNT):            \n            if rename_lvar(func_ea, var.name, new_name + '_{}'.format(i + 1)):\n                info('force_rename_lvar {:#x}: lvar name changed \"{}\" -> \"{}\"'.format(ea, old_name, new_name + '_{}'.format(i + 1)))\n                var.name = new_name + '_{}'.format(i + 1)\n                break\n        else:\n            error('{:#x}: renaming {} failed (rename_lvar, {} times)'.format(ea, var.name, g_RENAME_RETRY_CNT))\n\n    def get_arg_strings(self, address):\n\n        if address in self.call_eas:\n            info(f'{address:#x} ({self.get_fn_offset(address)}): already-visited call')\n            return\n        else:\n            self.call_eas.append(address)\n\n        cfunc = get_ctree_root(address, cache=g_CACHE)\n\n        if cfunc:\n            item = cfunc.body.find_closest_addr(address)\n\n            if item.op == cot_call:\n                expr = item.cexpr\n                print('-' * 80)\n\n                if expr.x.obj_ea == BADADDR:\n                    # dynamically-resolved API\n                    if expr.x.op == cot_var:\n                        callee_name = expr.x.v.getv().name\n                    elif expr.x.op == cot_cast and expr.x.x.op == cot_var:\n                        callee_name = expr.x.x.v.getv().name\n                        # Force call type (remove the cast)\n                        tif = ida_typeinf.tinfo_t()\n                        if print_insn_mnem(expr.ea) == 'call' and not ida_nalt.get_op_tinfo(tif, expr.ea, 0): # Skip an already-specified operand\n                            tif = self.GetTypeSignature(callee_name)\n                            if tif:\n                                if ida_nalt.set_op_tinfo(expr.ea, 0, tif):\n                                    success(f'{expr.ea:#x}: Force call type \"{str(tif)}\" to the operand \"{callee_name}\"')\n                                else:\n                                    error(f'{expr.ea:#x}: Force call type failed')\n                    else:\n                        callee_name = 'UNRESOLVED'\n                else:\n                    callee_name = get_name(expr.x.obj_ea)\n                \n                info(f'{address:#x} ({self.get_fn_offset(address)}): call {callee_name} ({expr.x.obj_ea:#x})')\n                debug(f'{str(expr.x.type)}')\n                \n                debug(f'argc = {expr.a.size()}')\n                arg_strs = []\n                for i in range(expr.a.size()):\n                    #breakpoint()\n                    arg = expr.a.at(i)\n\n                    # Sometimes the arg type in stubs is int *\n                    if str(arg.type).upper() in g_STR_TYPES or callee_name.find(g_stub_GetProcAddress) != -1:\n                        debug(f'arg{i} = {str(arg.type)}')\n\n                        ea = 0\n                        if str(expr.x.type).find('__thiscall') != -1:\n                            debug('thiscall')\n                            if i == 0:\n                                ea = self.get_reg_value(\"ECX\")\n                            else:\n                                ea = self.get_ptr_value(self.get_reg_value(\"ESP\") + (i - 1) * 4)\n                        elif str(expr.x.type).find('__fastcall') != -1:\n                            debug('fastcall')\n                            if i == 0:\n                                ea = self.get_reg_value(\"RCX\")\n                            elif i == 1:\n                                ea = self.get_reg_value(\"RDX\")\n                            elif i == 2:\n                                ea = self.get_reg_value(\"R8\")\n                            elif i == 3:\n                                ea = self.get_reg_value(\"R9\")\n                            else:\n                                ea = self.get_ptr_value(self.get_reg_value(\"RSP\") + (i - 4) * 4)\n                        else: # __stdcall, __cdecl, etc.\n                            debug('other calling conventions')\n                            ea = self.get_ptr_value(self.get_reg_value(\"ESP\") + i * 4)\n                        \n                        debug(f'{ea=:#x}')\n                        if str(arg.type).upper() in g_ASCII_TYPES or callee_name.find(g_stub_GetProcAddress) != -1:\n                            #if i == 2:\n                            #    res = self.get_bytes(ea)\n                            #else:\n                            res = self.get_string(ea)\n                        else: # g_UNICODE_TYPES\n                            res = self.get_string(ea, is_unicode=True)\n                        \n                        if res:\n                            arg_strs.append(f'arg{i} = {res}')\n                            debug(f'arg{i} = {res}')\n\n                            # Set the function prototype if the callee is the GetProcAddress stubs or GetProcAddress API\n                            if (i == 0 and callee_name.find(g_stub_GetProcAddress) != -1) or \\\n                                (i == 1 and callee_name == \"GetProcAddress\"):\n                                #breakpoint()\n                                p_item = cfunc.body.find_parent_of(expr)\n                                p_expr = p_item.cexpr\n\n                                if p_expr.op == cot_cast:\n                                    p_item = cfunc.body.find_parent_of(p_expr)\n                                    p_expr = p_item.cexpr\n\n                                if p_expr.op == cot_asg and p_expr.x.op == cot_var:\n                                    var = p_expr.x.v.getv()\n                                    tif = self.GetTypeSignature(res)\n                                    # We need to use rename_lvar calling modify_user_lvar_info indirectly to add the var into lvvec\n                                    self.force_rename_lvar(address, var, res)\n                                    my_lvar_mod = my_lvar_modifier_t(var.name, new_tif=tif)\n                                    modify_user_lvars(get_func_attr(address, FUNCATTR_START), my_lvar_mod)\n\n                # Set the arguments comment at the call instruction address\n                if arg_strs:\n                    cmt = f'{address:#x} ({self.get_fn_offset(address)}): {\",\".join(arg_strs)}'\n                    success(cmt)\n                    self.set_decomplier_cmt(cfunc, address, cmt)\n                    self.cmts[address] = cmt\n                    cfunc.refresh_func_ctext()\n\n    def print_summary(self):\n\n        if self.cmts:\n            success('Summary:')\n            for k,v in self.cmts.items():\n                print(f'{v}')\n\n    def decode(self, enc, cst_val):\n\n        return bytes([enc[i] ^ ((i + cst_val) & 0xff) ^ cst_val for i in range(len(enc))])\n"
  },
  {
    "path": "callstrings/ida_callstrings_dbg.py",
    "content": "'''\nida_callstrings_dbg.py - string deobfuscation using IDA debug hook class\nTakahiro Haruyama (@cci_forensics)\n'''\n\nimport idaapi\nidaapi.require('hexrays_utils', package='*')\nfrom hexrays_utils import *\nfrom ida_dbg import *\n\n# Global options/variables\ng_DEBUG = False\ng_MAX_INSTRUCTIONS = 0 # 0 = disabled\n\ndef info(msg):\n    print(\"\\033[34m\\033[1m[*]\\033[0m {}\".format(msg))\n\ndef success(msg):\n    print(\"\\033[32m\\033[1m[+]\\033[0m {}\".format(msg))\n    \ndef error(msg):\n    print(\"\\033[31m\\033[1m[!]\\033[0m {}\".format(msg))\n\ndef debug(msg):\n    if g_DEBUG:\n        print(\"\\033[33m\\033[1m[D]\\033[0m {}\".format(msg))\n\n\nclass TraceHook(DBG_Hooks, HexRaysUtils):\n\n    def __init__(self, target_ea):\n\n        DBG_Hooks.__init__(self)\n        HexRaysUtils.__init__(self)\n\n        self.traces = 0\n        self.target_ea = target_ea\n        #self.current_tid = get_current_thread()\n\n    def get_reg_value(self, reg_name):\n\n        return get_reg_val(reg_name)\n\n    def get_ptr_value(self, ptr):\n        \n        if idaapi.get_inf_structure().is_64bit():\n            return get_qword(ptr)\n        else:\n            return get_wide_dword(ptr)\n    \n    def get_string(self, ea, is_unicode=False):\n\n        res = get_strlit_contents(ea, strtype=STRTYPE_C_16) if is_unicode else get_strlit_contents(ea)\n\n        return res.decode() if res else None\n\n    def dbg_trace(self, tid, ea):\n\n        debug(\"[tid %X] trace %08X\" % (tid, ea))\n\n        if ea < ida_ida.inf_get_min_ea() or ea > ida_ida.inf_get_max_ea():\n            raise Exception(\n                \"Received a trace callback for an address outside this database!\"\n            )\n        \n        insn = ida_ua.insn_t()\n        insnlen = ida_ua.decode_insn(insn, ea)\n        fn_name = get_name(get_func_attr(ea, FUNCATTR_START))\n        if insnlen > 0 and insn.itype in [NN_callni, NN_call, NN_callfi] and fn_name.find(g_stub_GetProcAddress) == -1:\n            refresh_debugger_memory()\n            self.get_arg_strings(ea)\n\n        self.traces += 1\n        if g_MAX_INSTRUCTIONS and self.traces >= g_MAX_INSTRUCTIONS:\n            request_disable_step_trace()\n            request_suspend_process()\n\n            if run_requests():\n                info('Requests suspending the process executed (g_MAX_INSTRUCTIONS)')\n            else:\n                error('Requests suspending the process failed (g_MAX_INSTRUCTIONS)')\n\n        #return 1\n        return 0 # log it\n    \n    def dbg_thread_start(self, pid, tid, ea):\n\n        info(f'[Thread {tid:#x}] {ea:#x}: New thread started')\n        '''\n        add_bpt(ea)\n        select_thread(tid)\n        request_suspend_process()\n\n        #if tid != self.current_tid:\n        if not self.unhook():\n            error(\"Error uninstalling hooks!\")\n        else:\n            info('Hooks uninstalled')\n        #self.current_tid = tid\n        end = prev_head(get_func_attr(ea, FUNCATTR_END))\n        self.target_ea = end\n        info(f'Selecting the new thread to trace until {end:#x}')\n        #dbg_del_thread(self.current_tid)\n        #suspend_thread(self.current_tid)\n        select_thread(tid)\n        set_trace_base_address(ea)\n        dbg_add_thread(tid)\n        self.hook()\n        enable_step_trace(1) # needed per thread?\n        set_step_trace_options(ST_OPTIONS_MASK)\n        request_enable_step_trace(1)\n        request_run_to(end)\n        #request_continue_process()\n\n        if run_requests():\n            info('Requests successful')\n        else:\n            error('Requests failed')\n        '''\n        \n    def dbg_thread_exit(self, pid, tid, ea, exit_code):\n\n        info(f'[Thread {tid:#x}] {ea:#x}: Thread exited with {exit_code:#x}')\n\n    def dbg_run_to(self, pid, tid=0, ea=0):\n\n        if ea == self.target_ea:\n            info(f'[Thread {tid:#x}] Reached to the target {self.get_fn_offset(ea)}')\n        elif pid != 0:\n            error(f'[Thread {tid:#x}] The suspended address {self.get_fn_offset(ea)} is different from the target {self.get_fn_offset(self.target_ea)}. Probably another breakpoint set?')\n        else:\n            error(f'[Thread {tid:#x}] The suspended address {self.get_fn_offset(ea)} is different from the target {self.get_fn_offset(self.target_ea)}. Probably suspended by users manually?')\n\n        info(f\"Traced {self.traces} instructions\")\n        refresh_debugger_memory()\n        self.print_summary()\n\n    def dbg_process_exit(self, pid, tid, ea, code):\n\n        error(f\"[Thread {tid:#x}] Process exited with {code:#x} before reaching to the target\")\n        info(f\"Traced {self.traces} instructions\")\n        self.print_summary()\n\n        return 0\n    '''\n    def dbg_suspend_process(self):\n\n        self.dbg_run_to(0, ea=get_ip_val())\n    '''\n\n            \ndef main():\n\n    info('start')\n\n    if not is_debugger_on():\n        error(\"Please run the process first!\")\n        return\n\n    end = prev_head(get_func_attr(get_reg_val(\"EIP\"), FUNCATTR_END))\n    info(f\"Tracing to the end of function {end:#x}\")\n\n    debugHook = TraceHook(end)\n    debugHook.hook()\n    enable_step_trace(1) # Only the same thread works\n    #set_step_trace_options(ST_OVER_DEBUG_SEG | ST_OVER_LIB_FUNC | ST_SKIP_LOOPS | ST_ALREADY_LOGGED | ST_DIFFERENTIAL)\n    #set_step_trace_options(ST_OVER_DEBUG_SEG | ST_OVER_LIB_FUNC)\n    set_step_trace_options(ST_OPTIONS_MASK) # all included\n\n    run_to(end)\n\n    while get_process_state() == DSTATE_RUN:\n    #while get_process_state() != DSTATE_NOTASK: # as long as process is currently debugged\n        wait_for_next_event(WFNE_ANY, 0)\n\n    if not debugHook.unhook():\n        error(\"Error uninstalling hooks!\")\n    else:\n        info('Hooks uninstalled')\n    del debugHook\n\n    info('done')\n\nif __name__ == '__main__':\n    main()\n    \n"
  },
  {
    "path": "callstrings/ida_callstrings_flare_emu.py",
    "content": "'''\nida_callstrings_flare_emu.py - string deobfuscation using flare-emu\nTakahiro Haruyama (@cci_forensics)\n'''\n\nimport idaapi\n#idaapi.require('logging') # <- This suppresses the flare-emu debug messages!\nimport logging, hexdump\n#logging.basicConfig(level=logging.DEBUG, force=True)\n\nidaapi.require('hexrays_utils', package='*')\nfrom hexrays_utils import *\n\nidaapi.require('flare_emu')\nidaapi.require('flare_emu_hooks')\nimport flare_emu, flare_emu_hooks, unicorn\n\n# Global options\ng_DEBUG = False\ng_DEBUG_FLARE_EMU = False\ng_FLAG_ALL_PATHS = False # True: iterateAllPaths, False: emulateRange\ng_MAX_SAME_STATE_VAR = 0x1000 # to detect infinite loop by CFF\ng_MAX_INST_VISIT = 10000 # to detect infinite loop\n#g_MAX_EMU_INSN = 1000000\ng_MAX_STACK_BUF = 0x100\n#g_ENC_OFFSET = 0x0\n\ndef info(msg):\n    print(\"\\033[34m\\033[1m[*]\\033[0m {}\".format(msg))\n\ndef success(msg):\n    print(\"\\033[32m\\033[1m[+]\\033[0m {}\".format(msg))\n    \ndef error(msg):\n    print(\"\\033[31m\\033[1m[!]\\033[0m {}\".format(msg))\n\ndef debug(msg):\n    if g_DEBUG:\n        print(\"\\033[33m\\033[1m[D]\\033[0m {}\".format(msg))\n\ndef debug_bin(n, v):\n    if g_DEBUG:\n        debug(n)\n        hexdump.hexdump(v)\n\n\nclass HexRaysEmu(HexRaysUtils):\n\n    def __init__(self, eh):\n\n        HexRaysUtils.__init__(self)\n        self.eh = eh\n\n    def get_reg_value(self, reg_name):\n\n        return self.eh.getRegVal(reg_name.lower())\n\n    def get_ptr_value(self, ptr):\n        \n        return self.eh.getEmuPtr(ptr)\n    \n    def get_string(self, ea, is_unicode=False):\n\n        return self.eh.getEmuWideString(ea).decode('utf-16') if is_unicode else self.eh.getEmuString(ea).decode()\n    \n    def get_bytes(self, ea):\n\n        return self.eh.getEmuBytes(ea, 0x20)\n\n\ndef call_hook(address, argv, funcName, userData):\n\n    debug(f'call_hook at {address:#x}')\n\n    #is_64bit = True if idaapi.get_inf_structure().lflags & idaapi.LFLG_64BIT == 4 else False\n    hremu = userData[\"hremu\"]\n\n    try:\n        hremu.get_arg_strings(address)\n    except unicorn.UcError as e:\n        error(f'{address:#x} ({hremu.get_fn_offset(address)}): Unicorn emulation exception in get_arg_strings() ({e})')\n\ndef mem_write_hook(unicornObject, accessType, memAccessAddress, memAccessSize, memValue, userData):\n\n    if accessType == unicorn.UC_MEM_WRITE:\n\n        hremu = userData[\"hremu\"]\n        sp = hremu.eh.getRegVal('esp')\n        ip = hremu.eh.getRegVal('ip')\n\n        if sp < memAccessAddress < sp + g_MAX_STACK_BUF:\n            userData[\"enc_heads\"][ip] = memAccessAddress\n\ndef is_high_entropy(v):\n\n    res = True\n    vbytes = v.to_bytes(4, 'little')\n\n    for b in vbytes:\n        if b & 0xff == 0: # e.g., 0, 1, 0x10000000, etc.\n            res = False\n            break\n    else:\n        vlist = [b for b in vbytes]\n        for b in vbytes:\n            if b == vlist[0] and b == vlist[1] and b == vlist[2] and b == vlist[3]: # e.g., 0x11111111, 0xffffffff, etc.\n                res = False\n                break\n        \n    return res\n\ndef inst_hook_cff(unicornObject, address, instructionSize, userData):\n\n    eh = userData[\"EmuHelper\"]\n    state_var_cnt = userData[\"state_var_cnt\"]\n    state_excluded = userData[\"state_excluded\"]\n    abort = False\n\n    if print_insn_mnem(address) == 'cmp' and get_operand_type(address, 0) == o_reg and get_operand_type(address, 1) == o_imm and \\\n        is_high_entropy(get_operand_value(address, 1)) and print_insn_mnem(next_head(address)) in ['jz', 'jnz']:\n        #debug(f'{address:#x}: compare state var with cmp var')\n\n        reg_name = print_operand(address, 0)\n        state_var = eh.getRegVal(reg_name)        \n        cmp_var = get_operand_value(address, 1)\n\n        if state_var != cmp_var:\n            abort = True\n\n    elif print_insn_mnem(address) in ['cmovz'] and get_operand_type(address, 0) == o_reg:\n\n        reg_name = print_operand(address, 0)\n        state_var = eh.getRegVal(reg_name)\n        \n        cmp_var = None\n        if is_high_entropy(state_var):\n\n            op1type = get_operand_type(address, 1)\n            if op1type == o_imm:\n                cmp_var = get_operand_value(address, 1)\n            elif op1type == o_reg:\n                op1_reg_name = print_operand(address, 1)\n                cmp_var = eh.getRegVal(op1_reg_name)\n\n            if cmp_var and state_var != cmp_var:\n                abort = True\n\n    if abort:        \n        if address not in state_excluded:\n            uid = (address, state_var)\n            state_var_cnt[uid] = 1 if uid not in state_var_cnt else state_var_cnt[uid] + 1\n            #debug(f'{address:#x}: The same state variable is compared or conditional moved {state_var_cnt[uid]} times')\n\n            if state_var_cnt[uid] >= g_MAX_SAME_STATE_VAR:\n                error(f'{address:#x}: CFF infinite loop detected. Update the state variable {state_var:#x} with the new one {cmp_var:#x}')\n                debug([f'{ea:#x}: {var=:#x}, {cnt=}' for (ea, var), cnt in state_var_cnt.items()])\n                debug(f'excluded: {[f\"{e:#x}\" for e in state_excluded]}')\n\n                eh.uc.reg_write(eh.regs[reg_name], cmp_var)\n                state_excluded.append(address)\n                # Reset the counts of the external loops\n                state_var_cnt = {}\n\ndef inst_hook(unicornObject, address, instructionSize, userData):\n\n    eh = userData[\"EmuHelper\"]\n    inst_visit_cnt = userData[\"inst_visit_cnt\"]\n\n    inst_visit_cnt[address] = 1 if address not in inst_visit_cnt else inst_visit_cnt[address] + 1\n    if inst_visit_cnt[address] >= g_MAX_INST_VISIT:\n        error(f'{address:#x}: Infinite loop detected. Aborted.')\n        eh.stopEmulation(userData)\n\ndef noop(*args):\n\n    pass\n\ndef main():\n\n    info('start')\n    #breakpoint()\n\n    if g_DEBUG_FLARE_EMU:\n        eh = flare_emu.EmuHelper(verbose=10)\n        eh.logger.setLevel(logging.DEBUG)\n    else:\n        eh = flare_emu.EmuHelper()\n\n    hremu = HexRaysEmu(eh)\n\n    selection = idaapi.read_range_selection(None)\n    if selection[0]:\n        info(f'Emulating the selection {selection[1]:#x} to {selection[2]:#x}')\n        enc_heads = {}\n        userData = {\n            'hremu': hremu,\n            'enc_heads': enc_heads\n        }\n        eh.emulateSelection(memAccessHook=mem_write_hook, hookData=userData)\n\n        # Get the head of encoded string\n        stack_buf = eh.getEmuBytes(eh.getRegVal('esp'), g_MAX_STACK_BUF)\n        debug_bin('stack', stack_buf)\n        for i in range(len(stack_buf)):\n            if 65 <= stack_buf[i] <= 122: # A to z\n                offset = i\n                break\n        else:\n            offset = 0\n        #offset = 0x48 # Sometimes you need to adjust the offset manually :-(\n        debug(f'detected offset = {offset:#x}')\n        \n        # Decode the string after detecting the constant value\n        cfunc = get_ctree_root(selection[1], cache=g_CACHE)\n        cvf = cnt_val_finder_t()\n        cvf.apply_to_exprs(cfunc.body, None)\n        cnt_val = cvf.get_cnt_val()\n\n        if cnt_val:\n            if stack_buf[offset + 1] != 0:\n                enc = stack_buf[offset:]\n                debug(f'enc {enc} is ascii')\n            else:\n                enc = eh.getEmuWideString(eh.getRegVal('esp') + offset).decode('utf-16-le')\n                enc = enc.encode()\n                debug(f'enc {enc} is unicode (utf-16-le)')\n            dec = hremu.decode(enc, cnt_val)\n            debug_bin('dec', dec)\n\n            # Extract the ascii strings (no null termination)\n            head = eh.getRegVal('esp') + offset\n            ascs = extract_ascii(dec)\n            if ascs:\n                keys = [k for k, v in enc_heads.items() if v == head]\n                if len(keys) == 1:\n                    success(f'{keys[0]:#x}: string decoded \"{ascs[0]}\"')\n                    hremu.set_decomplier_cmt(cfunc, keys[0], ascs[0])\n                else:\n                    success(f'string decoded \"{ascs[0]}\"')\n\n        else:\n            error(f'A constant value for decoding is not found')            \n\n    else:        \n        ans = ida_kernwin.ask_yn(0, 'only decode the selected function?')\n        if ans == ida_kernwin.ASKBTN_YES:\n            fvas = [get_func_attr(get_screen_ea(), FUNCATTR_START)]\n        elif ans == ida_kernwin.ASKBTN_NO:\n            fvas = idautils.Functions()\n        else:\n            info('canceled')\n            return\n\n        for fva in fvas:\n            if get_func_flags(fva) & (FUNC_LIB | FUNC_THUNK):\n                debug(f\"{fva:#x}: skipping library or thunk function\")\n                continue\n\n            fn_name = get_name(get_func_attr(fva, FUNCATTR_START))\n            if fn_name.find(g_stub_GetProcAddress) != -1:\n                debug(f\"{fva:#x}: skipping GetProcAddress stub function\")\n                continue\n\n            print('-' * 100)\n            info(f'{get_name(fva)} ({fva:#x})')\n        \n            '''\n            state_var_cnt = {}\n            state_excluded = []\n            userData = {\n                'hremu': hremu,\n                'state_var_cnt': state_var_cnt,\n                'state_excluded': state_excluded,\n            }\n            eh.emulateRange(fva, callHook=call_hook, instructionHook=inst_hook_cff, hookData=userData, count=g_MAX_EMU_INSN)\n            '''\n            inst_visit_cnt = {}\n            userData = {\n                'hremu': hremu,\n                'inst_visit_cnt': inst_visit_cnt,\n            }\n\n            try:\n                if g_FLAG_ALL_PATHS:\n                    info('The mode is iterateAllPaths')\n                    eh.iterateAllPaths(fva, noop, hookData=userData, callHook=call_hook)\n                else:\n                    info('The mode is emulateRange')\n                    eh.emulateRange(fva, callHook=call_hook, instructionHook=inst_hook, hookData=userData)\n            except unicorn.unicorn.UcError as e:\n                error(f'{fva:#x}: unicorn error ({e})')\n\n            refresh_idaview_anyway()\n            eh.resetEmulatorHeapAndStack()\n\n    print('-' * 100)\n    hremu.print_summary()\n\n    info('done')\n\nif __name__ == '__main__':\n    main()\n    \n"
  },
  {
    "path": "callstrings/ida_callstrings_static.py",
    "content": "'''\nida_callstrings_static.py - string deobfuscation for Hodur\nTakahiro Haruyama (@cci_forensics)\n'''\n\nimport idaapi\nidaapi.require('hexrays_utils', package='*')\nfrom hexrays_utils import *\n\ng_DEBUG = False\ng_CACHE = True\ng_memcpy_names = ['qmemcpy', 'wmemcpy', 'strcpy']\n\ndef info(msg):\n    print(\"\\033[34m\\033[1m[*]\\033[0m {}\".format(msg))\n\ndef success(msg):\n    print(\"\\033[32m\\033[1m[+]\\033[0m {}\".format(msg))\n    \ndef error(msg):\n    print(\"\\033[31m\\033[1m[!]\\033[0m {}\".format(msg))\n\ndef debug(msg):\n    if g_DEBUG:\n        print(\"\\033[33m\\033[1m[D]\\033[0m {}\".format(msg))\n\n\nclass static_decoder_t(ctree_visitor_t, HexRaysUtils):\n\n    def __init__(self, cst_val, cfunc):\n        \n        ctree_visitor_t.__init__(self, CV_PARENTS | CV_POST | CV_RESTART)\n        HexRaysUtils.__init__(self)\n\n        self.cst_val = cst_val\n        self.cfunc = cfunc\n\n    def visit_expr(self, expr):\n\n        # Decode the src string by the constant value\n        if expr.op == cot_call and expr.x.op == cot_helper and expr.x.helper in g_memcpy_names:\n            #breakpoint()\n            info(f'{expr.ea:#x}: target helper function \"{expr.x.helper}\" is called')\n            arg_dst = expr.a.at(0)\n            arg_src = expr.a.at(1)\n            #arg_size = expr.a.at(2)\n\n            #if (arg_dst.op == cot_var or (arg_dst.op == cot_ref and arg_dst.x.op == cot_var)) and \\\n            #    (arg_src.op == cot_str or (arg_src.op == cot_cast and arg_src.x.op == cot_str)):\n            if (arg_src.op == cot_str or (arg_src.op == cot_cast and arg_src.x.op == cot_str)):\n                enc = arg_src.string if arg_src.op == cot_str else arg_src.x.string\n                enc = enc.encode('utf-16-le') if expr.x.helper == 'wmemcpy' else enc.encode()\n                info(f'{expr.ea:#x}: src bytes = {enc}')\n                dec = self.decode(enc, self.cst_val).decode()\n                if dec:\n                    success(f'{expr.ea:#x}: string decoded \"{dec}\"')\n                    self.set_decomplier_cmt(self.cfunc, expr.ea, dec)\n                else:\n                    error(f'{expr.ea:#x}: string decoding failed using a constant value ({self.cst_val:#x})')\n\n        return 0\n    \n\ndef main():\n\n    info('start')\n\n    ans = ida_kernwin.ask_yn(0, 'only decode the selected function?')\n    if ans == ida_kernwin.ASKBTN_YES:\n        fvas = [get_func_attr(get_screen_ea(), FUNCATTR_START)]\n    elif ans == ida_kernwin.ASKBTN_NO:\n        fvas = idautils.Functions()\n    else:\n        info('canceled')\n        return\n    \n    for fva in fvas:\n        if get_func_flags(fva) & (FUNC_LIB | FUNC_THUNK):\n            debug(f\"{fva:#x}: skipping library or thunk function\")\n            continue\n\n        fn_name = get_name(get_func_attr(fva, FUNCATTR_START))\n        if fn_name.find(g_stub_GetProcAddress) != -1:\n            debug(f\"{fva:#x}: skipping GetProcAddress stub function\")\n            continue\n\n        print('-' * 100)\n        info(f'{get_name(fva)} ({fva:#x})')\n\n        cfunc = get_ctree_root(fva, cache=g_CACHE)\n\n        cvf = cnt_val_finder_t()\n        cvf.apply_to_exprs(cfunc.body, None)\n        cnt_val = cvf.get_cnt_val()\n\n        if cnt_val:\n            sd = static_decoder_t(cnt_val, cfunc)\n            sd.apply_to_exprs(cfunc.body, None)\n        else:\n            error(f'{fva:#x}: A constant value for decoding is not found')\n\n        refresh_idaview_anyway()\n\n    print('-' * 100)\n\n    info('done')    \n\nif __name__ == '__main__':\n    main()"
  },
  {
    "path": "eset_crackme/README.org",
    "content": "* IDA Pro loader/processor modules for ESET CrackMe driver VM\n\nYou can download the initial sample for the CrackMe challenge from [[https://join.eset.com/en/challenges/crack-me][here]].\n\nbefore:\n\n[[./img/eset_before.png]]\n\nafter:\n\n[[./img/eset_after.png]]\n\n** Reference\n\n- https://quequero.org/2016/01/eset-crackme-challenge-2015-walkthrough/\n- http://mshetta.blogspot.jp/2016/11/join-eset-crackme-2015-solution.html\n\n\n\n"
  },
  {
    "path": "eset_crackme/loaders/ida_loader_drv_vm.py",
    "content": "import idaapi\nimport ida_segment\nfrom idc import *\nfrom struct import *\n\nDATA_SEG_START = 0x10000 # may be changed\n\ndef accept_file(li, filename):\n    sig = int16(li.read(2))\n    if sig in [0x3713, 0x481c, 0x1337]:\n        return {'format': \"ESET Crackme driver VM program\"}\n    else:\n        return 0\n\ndef int16(b):\n    return unpack('<H', b)[0]\n    \ndef int32(b):\n    return unpack('<I', b)[0]\n\ndef myAddSeg(startea, endea, base, use32, name, clas):\n    s = idaapi.segment_t()\n    s.start_ea = startea\n    s.end_ea   = endea\n    s.sel      = idaapi.setup_selector(base)\n    s.bitness  = use32\n    s.align    = idaapi.saRelPara\n    s.comb     = idaapi.scPub\n    #idaapi.add_segm_ex(s, name, clas, idaapi.ADDSEG_NOSREG|idaapi.ADDSEG_OR_DIE)\n    idaapi.add_segm(base, startea, endea, name, clas)\n\ndef load_file(li, neflags, format):\n    li.seek(0) # needed to read signature\n    sig = int16(li.read(2)) \n    size = int32(li.read(4)) # the program size\n    code_off = int32(li.read(4)) # the code segment offset\n    if sig != 0x3713: # for inline VM\n        code_off = 0x12 \n    data_off = int32(li.read(4)) # the data segment offset\n    flag_kernel_mode = int32(li.read(4))\n    \n    #set_processor_type('eset_vm', SETPROC_USER | SETPROC_LOADER)\n    set_processor_type('eset_vm', SETPROC_LOADER)\n\n    # Create segment & Populate\n    #'''\n    myAddSeg(0, data_off - code_off, 0, 1, 'VM_CODE', \"CODE\")\n    li.file2base(li.tell(), 0, data_off - code_off, 1)\n    myAddSeg(DATA_SEG_START, DATA_SEG_START + size - data_off, 0, 1, 'VM_DATA', \"DATA\") # flat memory space\n    #myAddSeg(DATA_SEG_START, DATA_SEG_START + size - data_off, DATA_SEG_START >> 4, 1, 'VM_DATA', \"DATA\") # segmentation (base should be in paragraphs 16-bits)    \n    li.file2base(li.tell(), DATA_SEG_START, DATA_SEG_START + size - data_off, 1)\n    '''\n    myAddSeg(code_off, data_off, 0, 1, 'VM_CODE', \"CODE\")\n    li.file2base(li.tell(), code_off, data_off, 1)\n    myAddSeg(data_off, size, 0, 1, 'VM_DATA', \"DATA\")\n    li.file2base(li.tell(), data_off, size, 1)\n    '''\n\n    # initialize\n    set_inf_attr(INF_START_EA, 0)\n    set_inf_attr(INF_START_IP, 0)\n    set_inf_attr(INF_START_CS, 0)\n    #add_entry(0, ep, \"start\", 1)\n    add_entry(0, 0, \"start\", 1)\n\n    # should return 1 or terminate immediately\n    return 1 \n\n"
  },
  {
    "path": "eset_crackme/procs/ida_processor_drv_vm.py",
    "content": "import sys\nimport copy\n\nimport ida_idaapi\nimport ida_idp\nimport ida_ua\nimport ida_bytes\nimport ida_xref\nimport ida_offset\nimport ida_problems\nimport ida_lines\nimport ida_segment\n\nfrom ida_idp import CF_USE1, CF_USE2, CF_CHG1, CF_CHG2, CF_STOP, CF_JUMP, CF_SHFT, CF_CALL\n\n# enum definitions from VM engine idb\n# enum_vm_size\nSIZE_BYTE = 0\nSIZE_WORD = 1\nSIZE_DWORD = 2\n# enum_vm_type\nTYPE_REG_VAL = 0\nTYPE_REG_PTR = 1\nTYPE_IMM_VAL = 2 \nTYPE_DATA_OFF = 3\n# enum_vm_cmp\nCMP_EQUAL = 0\nCMP_NOT_EQUAL = 1\nCMP_LESS_THAN = 2\n# enum_vm_arith\nARITH_XOR = 0\nARITH_ADD = 1\nARITH_SUB = 2\nARITH_SHL = 3\nARITH_SHR = 4\nARITH_ROL = 5\nARITH_ROR = 6\nARITH_MOD = 7\n\n# ----------------------------------------------------------------------\nclass eset_drv_vm_processor_t(ida_idp.processor_t):\n    \"\"\"\n    Processor module classes must derive from ida_idp.processor_t\n    \"\"\"\n\n    # IDP id ( Numbers above 0x8000 are reserved for the third-party modules)\n    id = 0x8fff\n\n    # Processor features\n    flag = ida_idp.PRN_HEX | ida_idp.PR_RNAMESOK \n\n    # Number of bits in a byte for code segments (usually 8)\n    # IDA supports values up to 32 bits\n    cnbits = 8\n\n    # Number of bits in a byte for non-code segments (usually 8)\n    # IDA supports values up to 32 bits\n    dnbits = 8\n\n    # short processor names\n    # Each name should be shorter than 9 characters\n    psnames = ['eset_vm']\n\n    # long processor names\n    # No restriction on name lengthes.\n    plnames = ['ESET Crackme driver VM processor']\n\n    # size of a segment register in bytes\n    segreg_size = 0\n\n    # Array of instructions\n    instruc = [\n      {'name': '',      'feature': 0},  # placeholder for \"not an instruction\"\n      {'name': 'hlt',   'feature': CF_STOP,   'cmt': \"halt CPU\"},\n      {'name': 'mov',   'feature': CF_USE1 | CF_USE2 | CF_CHG1,   'cmt': \"move\"},      \n      {'name': 'ncall', 'feature': CF_USE1 | CF_CALL,   'cmt': \"call native function\"},\n      {'name': 'lcall', 'feature': CF_USE1 | CF_USE2 | CF_CALL,   'cmt': \"call library function\"},\n      {'name': 'push',  'feature': CF_USE1,   'cmt': \"push to stack\"},\n      {'name': 'pop',   'feature': CF_USE1 | CF_CHG1,   'cmt': \"pop from stack\"},      \n      {'name': 'cmpeq', 'feature': CF_USE1 | CF_USE2,   'cmt': \"compare #0 (equal)\"},\n      {'name': 'cmpne', 'feature': CF_USE1 | CF_USE2,   'cmt': \"compare #1 (not equal)\"},\n      {'name': 'cmpb',  'feature': CF_USE1 | CF_USE2,   'cmt': \"compare #2 (less than)\"},\n      {'name': 'jmp',   'feature': CF_USE1 | CF_JUMP | CF_STOP,   'cmt': \"jump #0 (unconditional)\"},\n      {'name': 'cjmp',  'feature': CF_USE1 | CF_JUMP,   'cmt': \"jump #1 (conditional)\"},\n      {'name': 'call',  'feature': CF_USE1 | CF_CALL,   'cmt': \"call VM function\"},\n      {'name': 'ret',   'feature': 0,   'cmt': \"return\"},\n      {'name': 'xor',   'feature': CF_USE1 | CF_USE2 | CF_CHG1,   'cmt': \"arithmetic operation #0 (xor)\"},\n      {'name': 'add',   'feature': CF_USE1 | CF_USE2 | CF_CHG1,   'cmt': \"arithmetic operation #1 (add)\"},\n      {'name': 'sub',   'feature': CF_USE1 | CF_USE2 | CF_CHG1,   'cmt': \"arithmetic operation #2 (sub)\"},\n      {'name': 'shl',   'feature': CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT,   'cmt': \"arithmetic operation #3 (shift left)\"},\n      {'name': 'shr',   'feature': CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT,   'cmt': \"arithmetic operation #4 (shift right)\"},\n      {'name': 'rol',   'feature': CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT,   'cmt': \"arithmetic operation #5 (rotation left)\"},\n      {'name': 'ror',   'feature': CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT,   'cmt': \"arithmetic operation #6 (rotation right)\"},\n      {'name': 'mod',   'feature': CF_USE1 | CF_USE2 | CF_CHG1,   'cmt': \"arithmetic operation #7 (modulo)\"},\n      {'name': 'alloc', 'feature': CF_USE1,   'cmt': \"allocate buffer\"},\n      {'name': 'free',  'feature': CF_USE1,   'cmt': \"free buffer\"},\n      {'name': 'loadVM','feature': CF_USE1 | CF_USE2,   'cmt': \"load another VM\"},\n      {'name': 'nop',   'feature': 0,   'cmt': \"nop\"},      \n    ]\n\n    # icode of the first instruction\n    instruc_start = 0\n\n    # icode of the last instruction + 1\n    instruc_end = len(instruc) + 1\n\n    # Size of long double (tbyte) for this processor (meaningful only if ash.a_tbyte != NULL) (optional)\n    # tbyte_size = 0\n\n    #\n    # Number of digits in floating numbers after the decimal point.\n    # If an element of this array equals 0, then the corresponding\n    # floating point data is not used for the processor.\n    # This array is used to align numbers in the output.\n    #      real_width[0] - number of digits for short floats (only PDP-11 has them)\n    #      real_width[1] - number of digits for \"float\"\n    #      real_width[2] - number of digits for \"double\"\n    #      real_width[3] - number of digits for \"long double\"\n    # Example: IBM PC module has { 0,7,15,19 }\n    #\n    # (optional)\n    #real_width = (0, 7, 0, 0)\n\n\n    # only one assembler is supported\n    assembler = {\n        # flag (mostly for the format)\n        'flag' : ida_idp.ASH_HEXF3 | ida_idp.ASD_DECF0 | ida_idp.ASO_OCTF5 | ida_idp.ASB_BINF0 | ida_idp.AS_N2CHR,\n\n        # user defined flags (local only for IDP) (optional)\n        #'uflag' : 0,\n\n        # Assembler name (displayed in menus)\n        'name': \"ESET Crackme driver VM assembler\",\n\n        # array of automatically generated header lines they appear at the start of disassembled text (optional)\n        'header': [\".esetvm\"],\n\n        # array of unsupported instructions (array of insn.itype) (optional)\n        #'badworks': [],\n\n        # org directive\n        'origin': \".org\",\n\n        # end directive\n        'end': \".end\",\n\n        # comment string (see also cmnt2)\n        'cmnt': \";\",\n\n        # ASCII string delimiter\n        'ascsep': \"\\\"\",\n\n        # ASCII char constant delimiter\n        'accsep': \"'\",\n\n        # ASCII special chars (they can't appear in character and ascii constants)\n        'esccodes': \"\\\"'\",\n\n        #\n        #      Data representation (db,dw,...):\n        #\n        # ASCII string directive\n        'a_ascii': \".char\",\n\n        # byte directive\n        'a_byte': \"db\",\n\n        # word directive\n        'a_word': \"dw\",\n\n        # remove if not allowed\n        'a_dword': \"dd\",\n\n        # remove if not allowed\n        # 'a_qword': \"dq\",\n\n        # float;  4bytes; remove if not allowed\n        #'a_float': \".float\",\n\n        # uninitialized data directive (should include '%s' for the size of data)\n        'a_bss': \".space %s\",\n\n        # 'equ' Used if AS_UNEQU is set (optional)\n        #'a_equ': \".equ\",\n\n        # 'seg ' prefix (example: push seg seg001)\n        'a_seg': \"seg\",\n\n        # current IP (instruction pointer) symbol in assembler\n        'a_curip': \"$\",\n\n        # \"public\" name keyword. NULL-gen default, \"\"-do not generate\n        'a_public': \".def\",\n\n        # \"weak\"   name keyword. NULL-gen default, \"\"-do not generate\n        'a_weak': \"\",\n\n        # \"extrn\"  name keyword\n        'a_extrn': \".ref\",\n\n        # \"comm\" (communal variable)\n        'a_comdef': \"\",\n\n        # \"align\" keyword\n        'a_align': \".align\",\n\n        # Left and right braces used in complex expressions\n        'lbrace': \"(\",\n        'rbrace': \")\",\n\n        # %  mod     assembler time operation\n        'a_mod': \"%\",\n\n        # &  bit and assembler time operation\n        'a_band': \"&\",\n\n        # |  bit or  assembler time operation\n        'a_bor': \"|\",\n\n        # ^  bit xor assembler time operation\n        'a_xor': \"^\",\n\n        # ~  bit not assembler time operation\n        'a_bnot': \"~\",\n\n        # << shift left assembler time operation\n        'a_shl': \"<<\",\n\n        # >> shift right assembler time operation\n        'a_shr': \">>\",\n\n        # size of type (format string) (optional)\n        'a_sizeof_fmt': \"size %s\",\n\n        'flag2': 0,\n\n        # the include directive (format string) (optional)\n        'a_include_fmt': '.include \"%s\"',\n    } # Assembler\n\n\n    # ----------------------------------------------------------------------\n    # The following callbacks are optional\n    #\n\n    #def notify_newprc(self, nproc):\n    #    \"\"\"\n    #    Before changing proccesor type\n    #    nproc - processor number in the array of processor names\n    #    return 1-ok,0-prohibit\n    #    \"\"\"\n    #    return 1\n\n    #def notify_assemble(self, ea, cs, ip, use32, line):\n    #    \"\"\"\n    #    Assemble an instruction\n    #     (make sure that ida_idp.PR_ASSEMBLE flag is set in the processor flags)\n    #     (display a warning if an error occurs)\n    #     args:\n    #       ea -  linear address of instruction\n    #       cs -  cs of instruction\n    #       ip -  ip of instruction\n    #       use32 - is 32bit segment?\n    #       line - line to assemble\n    #    returns the opcode string\n    #    \"\"\"\n    #    pass\n\n    def notify_get_frame_retsize(self, func_ea):\n        \"\"\"\n        Get size of function return address in bytes\n        If this function is absent, the kernel will assume\n             4 bytes for 32-bit function\n             2 bytes otherwise\n        \"\"\"\n        return 2\n\n    def notify_get_autocmt(self, insn):\n        \"\"\"\n        Get instruction comment. 'insn' describes the instruction in question\n        @return: None or the comment string\n        \"\"\"\n        if 'cmt' in self.instruc[insn.itype]:\n          return self.instruc[insn.itype]['cmt']\n\n    # ----------------------------------------------------------------------\n    def notify_is_sane_insn(self, insn, no_crefs):\n        \"\"\"\n        is the instruction sane for the current file type?\n        args: no_crefs\n        1: the instruction has no code refs to it.\n           ida just tries to convert unexplored bytes\n           to an instruction (but there is no other\n           reason to convert them into an instruction)\n        0: the instruction is created because\n           of some coderef, user request or another\n           weighty reason.\n        The instruction is in 'insn'\n        returns: 1-ok, <=0-no, the instruction isn't\n        likely to appear in the program\n        \"\"\"\n        #w = ida_bytes.get_wide_word(insn.ea)\n        #if w == 0 or w == 0xFFFF:\n        #  return 0\n        #return 1\n        return -1\n\n    # ----------------------------------------------------------------------\n    def handle_operand(self, insn, op, isRead):\n      flags     = ida_bytes.get_flags(insn.ea)\n      is_offs   = ida_bytes.is_off(flags, op.n)\n      dref_flag = ida_xref.dr_R if isRead else ida_xref.dr_W\n      def_arg   = ida_bytes.is_defarg(flags, op.n)\n      optype    = op.type\n\n      itype = insn.itype\n      # create code xrefs\n      if optype == ida_ua.o_imm:        \n        makeoff = False\n        if itype in [self.itype_ncall, self.itype_call]:\n          insn.add_cref(op.value, op.offb, ida_xref.fl_CN)\n          makeoff = True\n        #elif itype == self.itype_mov: # e.g., mov #addr, PC\n        #  insn.add_cref(op.value, op.offb, ida_xref.fl_JN)\n        #  makeoff = True        \n        if makeoff and not def_arg:\n          otype = ida_offset.get_default_reftype(insn.ea)\n          ida_offset.op_offset(insn.ea, op.n, otype, ida_idaapi.BADADDR, insn.cs)\n          is_offs = True\n        if is_offs:\n          insn.add_off_drefs(op, ida_xref.dr_O, 0)\n      elif optype == ida_ua.o_near:\n        if insn.itype in [self.itype_ncall, self.itype_call]:\n            fl = ida_xref.fl_CN\n        else:\n            fl = ida_xref.fl_JN\n        insn.add_cref(op.addr, op.offb, fl)\n      # create data xrefs\n      elif optype == ida_ua.o_mem:\n        insn.create_op_data(op.addr, op.offb, op.dtype)\n        insn.add_dref(op.addr, op.offb, dref_flag)\n        '''\n        ds = ida_segment.get_segm_by_name('VM_DATA')        \n        start = ds.start_ea\n        insn.create_op_data(start + op.addr, op.offb, op.dtype)\n        insn.add_dref(start + op.addr, op.offb, dref_flag)\n        '''\n\n    # ----------------------------------------------------------------------\n    # The following callbacks are mandatory\n    #\n    def notify_emu(self, insn):\n      \"\"\"\n      Emulate instruction, create cross-references, plan to analyze\n      subsequent instructions, modify flags etc. Upon entrance to this function\n      all information about the instruction is in 'insn' structure.\n      If zero is returned, the kernel will delete the instruction.\n      \"\"\"\n      aux = self.get_auxpref(insn)\n      Feature = insn.get_canon_feature()\n\n      if Feature & CF_USE1:\n        self.handle_operand(insn, insn.Op1, 1)\n      if Feature & CF_CHG1:\n        self.handle_operand(insn, insn.Op1, 0)\n      if Feature & CF_USE2:\n        self.handle_operand(insn, insn.Op2, 1)\n      if Feature & CF_CHG2:\n        self.handle_operand(insn, insn.Op2, 0)\n      if Feature & CF_JUMP:\n        ida_problems.remember_problem(ida_problems.PR_JUMP, insn.ea)\n\n      # is it an unconditional jump?\n      uncond_jmp = insn.itype in [self.itype_jmp]\n\n      # add flow\n      flow = (Feature & CF_STOP == 0) and not uncond_jmp\n      if flow:\n        insn.add_cref(insn.ea + insn.size, 0, ida_xref.fl_F)\n\n      return 1\n\n    # ----------------------------------------------------------------------\n    def notify_out_operand(self, ctx, op):\n      \"\"\"\n        Generate text representation of an instructon operand.\n        This function shouldn't change the database, flags or anything else.\n        All these actions should be performed only by the emu() function.\n        This function uses out_...() functions from ua.hpp to generate the operand text\n        Returns: 1-ok, 0-operand is hidden.\n      \"\"\"\n      optype = op.type\n      dtype = op.dtype\n      signed = 0\n\n      if optype == ida_ua.o_reg:\n        if dtype == ida_ua.dt_byte:          \n          #ctx.out_register('b')\n          ctx.out_keyword('byte ')\n        elif dtype == ida_ua.dt_word:          \n          #ctx.out_register('w')\n          ctx.out_keyword('word ')\n        ctx.out_register(self.reg_names[op.reg])\n      elif optype == ida_ua.o_phrase:\n        if dtype == ida_ua.dt_dword:          \n          ctx.out_keyword('dword ptr ')\n        elif dtype == ida_ua.dt_byte:\n          ctx.out_keyword('byte ptr ')\n        elif dtype == ida_ua.dt_word:          \n          ctx.out_keyword('word ptr ')\n        ctx.out_symbol('[')\n        ctx.out_register(self.reg_names[op.reg])\n        ctx.out_symbol(']')\n      elif optype == ida_ua.o_imm:\n        ctx.out_symbol('#')\n        ctx.out_value(op, ida_ua.OOFW_IMM | signed )\n      elif optype in [ida_ua.o_near, ida_ua.o_mem]:\n        r = ctx.out_name_expr(op, op.addr, ida_idaapi.BADADDR)\n        if not r:\n          ctx.out_tagon(ida_lines.COLOR_ERROR)\n          ctx.out_long(op.addr, 16)\n          ctx.out_tagoff(ida_lines.COLOR_ERROR)\n          ida_problems.remember_problem(ida_problems.PR_NONAME, ctx.insn.ea)\n      else:\n        return False\n        \n      # for Op2 of mov instruction\n      #if op.specflag1:\n      #  ctx.out_keyword(' as ptr')\n\n      return True\n\n    # ----------------------------------------------------------------------\n    def notify_out_insn(self, ctx):\n        \"\"\"\n        Generate text representation of an instruction in 'ctx.insn' structure.\n        This function shouldn't change the database, flags or anything else.\n        All these actions should be performed only by emu() function.\n        Returns: nothing\n        \"\"\"\n        postfix = \"\"\n\n        ctx.out_mnemonic()\n\n        # output first operand\n        # kernel will call outop()\n        if ctx.insn.Op1.type != ida_ua.o_void:\n            ctx.out_one_operand(0)\n\n        # output the rest of operands separated by commas\n        for i in xrange(1, 3):\n            if ctx.insn[i].type == ida_ua.o_void:\n                break\n            ctx.out_symbol(',')\n            ctx.out_char(' ')\n            ctx.out_one_operand(i)\n\n        ctx.set_gen_cmt() # generate comment at the next call to MakeLine()\n        ctx.flush_outbuf()\n\n    def fill_reg(self, op, dtype, regno):\n      op.type = ida_ua.o_reg\n      op.dtype = dtype\n      op.reg = regno\n      #op.specflag1 = 0     \n\n    def fill_phrase(self, op, dtype, regno):\n      op.type = ida_ua.o_phrase\n      op.dtype = dtype\n      op.phrase = regno\n      #op.specflag1 = 0     \n\n    def fill_imm(self, op, dtype, val):\n      op.type = ida_ua.o_imm\n      op.dtype = dtype\n      op.value = val\n      #op.specflag1 = 0     \n\n    def fill_near(self, op, dtype, addr):\n      op.type = ida_ua.o_near\n      op.dtype = dtype\n      op.addr = addr\n      #op.specflag1 = 0     \n\n    def fill_mem(self, op, dtype, addr):\n      op.type = ida_ua.o_mem\n      op.dtype = dtype\n      #op.addr = addr\n      # add data segment base addr\n      ds = ida_segment.get_segm_by_name('VM_DATA')        \n      op.addr = ds.start_ea + addr\n      #op.specflag1 = 0     \n\n    def get_next_bytes(self, insn, dtype):\n      if dtype == ida_ua.dt_byte:\n        return insn.get_next_byte()\n      elif dtype == ida_ua.dt_word:\n        return insn.get_next_word()\n      elif dtype == ida_ua.dt_dword:\n        return insn.get_next_dword()\n\n    def set_operand(self, insn, op, type_, regno, dtype):\n      # check dtype\n      if dtype > 2:\n        return -1\n      \n      # IDA data type enum is matched with enum_vm_size of the idb\n      if type_ == TYPE_REG_VAL:\n        self.fill_reg(op, dtype, regno)\n      elif type_ == TYPE_REG_PTR:\n        self.fill_phrase(op, dtype, regno)\n      elif type_ == TYPE_IMM_VAL:\n        val = self.get_next_bytes(insn, dtype)\n        self.fill_imm(op, dtype, val)\n      elif type_ == TYPE_DATA_OFF:\n        dt_off = insn.get_next_dword()\n        self.fill_mem(op, dtype, dt_off)\n      return 0\n    \n    # ----------------------------------------------------------------------\n    def notify_ana(self, insn):\n      \"\"\"\n      Decodes an instruction into 'insn'.\n      Returns: insn.size (=the size of the decoded instruction) or zero\n      \"\"\"      \n      opc = insn.get_next_byte()        \n      # cmp (0x6), jmp (0x7),  arithmetic operation (0xa): multiple instructions\n      # 0xe - 0xff: nop\n      if opc > 0xd:\n        insn.itype = self.itype_nop\n      elif opc > 0xa:\n        insn.itype = self.itype_hlt + opc + 2 + 1 + 7\n      elif opc > 7:\n        insn.itype = self.itype_hlt + opc + 2 + 1\n      elif opc > 6:\n        insn.itype = self.itype_hlt + opc + 2\n      else:\n        insn.itype = self.itype_hlt + opc\n\n      if insn.itype not in [self.itype_hlt, self.itype_ret, self.itype_nop]:\n        if insn.itype in [self.itype_call, self.itype_jmp]:\n          if insn.itype == self.itype_jmp:\n            cflag = insn.get_next_byte() # check conditional flag\n            if cflag > 1:\n              return 0 # invalid flag value\n            insn.itype += cflag\n          addr = insn.get_next_dword()\n          self.fill_near(insn.Op1, ida_ua.dt_dword, addr)\n        elif insn.itype == self.itype_pop:\n          regno = insn.get_next_byte() & 0xf\n          self.fill_reg(insn.Op1, ida_ua.dt_dword,  regno)\n        elif insn.itype in [self.itype_push, self.itype_alloc, self.itype_free, self.itype_ncall]:\n          b1 = insn.get_next_byte()\n          dtype = ida_ua.dt_dword if insn.itype == self.itype_ncall else b1 >> 6\n          if self.set_operand(insn, insn.Op1, (b1 >> 4) & 3, b1 & 0xf, dtype):\n            return 0 # invalid dtype\n        elif insn.itype in [self.itype_lcall, self.itype_loadVM]:\n          b1 = insn.get_next_byte()\n          b2 = insn.get_next_byte()\n          if self.set_operand(insn, insn.Op1, b2 & 3, b1 & 0xf, ida_ua.dt_dword):\n            return 0 # invalid dtype\n          dtype = ida_ua.dt_dword if insn.itype == self.itype_lcall else (b2 >> 4) & 3\n          if self.set_operand(insn, insn.Op2, (b2 >> 2) & 3, b1 >> 4, dtype):\n            return 0 # invalid dtype\n        elif insn.itype == self.itype_mov:\n          b1 = insn.get_next_byte()\n          b2 = insn.get_next_byte()            \n          dtype = (b2 >> 4) & 3\n          if self.set_operand(insn, insn.Op2, b2 & 3, b1 >> 4, dtype):\n            return 0 # invalid dtype\n          dst_regno = b1 & 0xf\n          if (b2 >> 2) & 3: # used as pointer\n            self.fill_phrase(insn.Op1, dtype, dst_regno)\n            #insn.Op2.specflag1 = 1\n          else:\n            self.fill_reg(insn.Op1, dtype, dst_regno)\n        elif insn.itype in [self.itype_cmpeq, self.itype_xor]:\n          b1 = insn.get_next_byte()\n          b2 = insn.get_next_byte()\n          self.fill_reg(insn.Op1, ida_ua.dt_dword, b1 & 0xf)            \n          if self.set_operand(insn, insn.Op2, b2 & 3, b1 >> 4, (b2 >> 2) & 3):\n            return 0 # invalid dtype\n          # update itype\n          itype_idx = (b2 >> 4) & 7\n          if insn.itype == self.itype_cmpeq and itype_idx > 2:\n            return 0 # invalid cmp operation\n          else:\n            insn.itype += itype_idx                          \n\n      # Return decoded instruction size or zero\n      return insn.size if insn.itype != self.itype_null else 0\n\n    # ----------------------------------------------------------------------\n    def init_instructions(self):\n        Instructions = []\n        i = 0\n        for x in self.instruc:\n            if x['name'] != '':\n                setattr(self, 'itype_' + x['name'], i)\n            else:\n                setattr(self, 'itype_null', i)\n            i += 1\n\n        # icode of the last instruction + 1\n        self.instruc_end = len(self.instruc) + 1\n\n    # ----------------------------------------------------------------------\n    def init_registers(self):\n      \"\"\"\n      This function parses the register table and creates corresponding ireg_XXX constants\n      \"\"\"\n\n      # Registers definition\n      self.reg_names = [\n        # General purpose registers\n        \"r0\", \n        \"r1\", \n        \"r2\", \n        \"r3\", \n        \"r4\", \n        \"r5\",        \n        # SP\n        \"r6\",\n        # VM pointer\n        \"r7\",        \n        # VM size\n        \"r8\",\n        # ntoskrnl_base\n        \"r9\",\n        # arg registers\n        \"r10\",\n        \"r11\",\n        \"r12\",\n        \"r13\",\n        \"r14\",\n        \"r15\",\n        # Fake segment registers\n        \"CS\",\n        \"DS\",\n      ]\n\n      # Create the ireg_XXXX constants\n      for i in xrange(len(self.reg_names)):\n        setattr(self, 'ireg_' + self.reg_names[i], i)\n\n      # Segment register information (use virtual CS and DS registers if your\n      # processor doesn't have segment registers):\n      self.reg_first_sreg = self.ireg_CS\n      self.reg_last_sreg  = self.ireg_DS\n\n      # number of CS register\n      self.reg_code_sreg = self.ireg_CS\n\n      # number of DS register\n      self.reg_data_sreg = self.ireg_DS\n\n    # ----------------------------------------------------------------------\n    def __init__(self):\n        ida_idp.processor_t.__init__(self)\n        self.init_instructions()\n        self.init_registers()\n\n# ----------------------------------------------------------------------\n# Every processor module script must provide this function.\n# It should return a new instance of a class derived from ida_idp.processor_t\ndef PROCESSOR_ENTRY():\n    return eset_drv_vm_processor_t()\n"
  },
  {
    "path": "fn_fuzzy/README.org",
    "content": "#+OPTIONS: ^:{}\n\n#+TITLE: fn_fuzzy.py - IDAPython script for fast multiple binary diffing triage\n\n* Motivation\n\nSee the [[https://conference.hitb.org/hitbsecconf2019ams/sessions/fn_fuzzy-fast-multiple-binary-diffing-triage-with-ida/][conference information]] or [[https://www.carbonblack.com/2019/05/09/fn_fuzzy-fast-multiple-binary-diffing-triage-with-ida/][blog]] post.\n\n* how to use\n\n- fn_fuzzy.py :: IDAPython script to export/compare fuzzy hashes of the sample\n- cli_export.py :: python wrapper script to export fuzzy hashes of multiple samples\n\nThe typical usage is to run cli_export.py to make a database for large idbs then compare on IDA by executing fn_fuzzy.py.\n\n[[./img/fn_fuzzy.png]]\n\n[[./img/res_summary.png]]\n\n[[./img/res_funcs.png]]\n\n* supported IDB version\n\nIDBs generated by IDA 6.9 or later due to SHA256 API\n\n* required python packages\n\n- mmh3\n- [[https://github.com/williballenthin/python-idb%0A][python-idb]]\n\n"
  },
  {
    "path": "fn_fuzzy/cli_export.py",
    "content": "# cli_export.py - batch export script for fn_fuzzy\n# Takahiro Haruyama (@cci_forensics)\n\nimport argparse, subprocess, os, sqlite3, time, sys\nimport idb # python-idb\nimport logging\nlogging.basicConfig(level=logging.ERROR) # to suppress python-idb warning\n\n# plz edit the following paths\ng_ida_dir = r'C:\\analysisw\\tool\\IDA'\ng_db_path = r'C:\\analysisw\\tics\\fn_fuzzy.sqlite'\ng_fn_fuzzy_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'fn_fuzzy.py')\n\ng_min_bytes = 0x10 # minimum number of extracted code bytes per function\ng_analyzed_prefix = r'fn_' # analyzed function name prefix (regex)\n\nclass LocalError(Exception): pass\nclass ProcExportError(LocalError): pass\n\ndef info(msg):\n    print(\"[*] {}\".format(msg))\n\ndef success(msg):\n    print(\"[+] {}\".format(msg))\n\ndef error(msg):\n    print(\"[!] {}\".format(msg))\n\ndef init_db(cur):\n    cur.execute(\"SELECT * FROM sqlite_master WHERE type='table'\")\n    if cur.fetchone() is None:\n        info('DB initialized')\n        cur.execute(\"CREATE TABLE IF NOT EXISTS sample(sha256 UNIQUE, path)\")\n        #cur.execute(\"CREATE INDEX sha256_index ON sample(sha256)\")\n        cur.execute(\"CREATE INDEX path_index ON sample(path)\")\n        cur.execute(\"CREATE TABLE IF NOT EXISTS function(sha256, fva, fname, fhd, fhm, f_ana, bsize, ptype, UNIQUE(sha256, fva))\")\n        cur.execute(\"CREATE INDEX f_ana_index ON function(f_ana)\")        \n        cur.execute(\"CREATE INDEX bsize_index ON function(bsize)\")\n\ndef existed(cur, sha256):\n    cur.execute(\"SELECT * FROM sample WHERE sha256 = ?\", (sha256,))\n    if cur.fetchone() is None:\n        return False\n    else:\n        return True        \n\ndef remove(cur, sha256):\n    cur.execute(\"DELETE FROM sample WHERE sha256 = ?\", (sha256,))\n    cur.execute(\"DELETE FROM function WHERE sha256 = ?\", (sha256,))    \n    \ndef export(f_debug, idb_path, outdb, min_, f_ex_libthunk, f_update, f_ana_exp, ana_pre, f_remove):\n    # check the ext and signature\n    ext = os.path.splitext(idb_path)[1]\n    if ext != '.idb' and ext != '.i64':\n        return 0   \n    with open(idb_path, 'rb') as f:\n        sig = f.read(4)        \n    if sig != b'IDA1' and sig != b'IDA2':\n        return 0\n\n    # check the database record for the idb\n    #print idb_path\n    conn = sqlite3.connect(outdb)\n    cur = conn.cursor()\n    init_db(cur)\n    with idb.from_file(idb_path) as db: # Fix: Cause NameError. need to rewrite in IDA batch mode to calculate SHA256\n        api = idb.IDAPython(db)\n        try:\n            sha256 = api.ida_nalt.retrieve_input_file_sha256()            \n        except KeyError:\n            error('{}: ida_nalt.retrieve_input_file_sha256() failed. The API is supported in 6.9 or later idb version. Check the API on IDA for validation.'.format(idb_path))\n            return 0\n        sha256 = sha256.lower()\n    if f_remove:\n        remove(cur, sha256)\n        success('{}: the records successfully removed (SHA256={})'.format(idb_path, sha256))\n        conn.commit()\n        cur.close()            \n        return 0        \n    if existed(cur, sha256) and not f_update:\n        info('{}: The sample records are present in DB (SHA256={}). Skipped.'.format(idb_path, sha256))\n        return 0\n    conn.commit()\n    cur.close()    \n    \n    ida = 'ida.exe' if sig == 'IDA1' else 'ida64.exe'\n    ida_path = os.path.join(g_ida_dir, ida)\n    #cmd = [ida_path, '-L{}'.format(os.path.join(g_ida_dir, 'debug.log')), '-S{}'.format(g_fn_fuzzy_path), '-Ofn_fuzzy:{}:{}:{}:{}:{}:{}'.format(min_, f_ex_libthunk, f_update, f_ana_exp, ana_pre, outdb), idb_path]\n    cmd = [ida_path, '-S{}'.format(g_fn_fuzzy_path), '-Ofn_fuzzy:{}:{}:{}:{}:{}:{}'.format(min_, f_ex_libthunk, f_update, f_ana_exp, ana_pre, outdb), idb_path]\n    if not f_debug:\n        cmd.insert(1, '-A')\n    #print cmd        \n    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n    stdout, stderr = proc.communicate()\n    if proc.returncode == 0:\n        success('{}: successfully exported'.format(idb_path))\n        return 1\n    elif proc.returncode == 2: # skipped\n        return 0\n    else: # maybe 1\n        raise ProcExportError('{}: Something wrong with the IDAPython script (returncode={}). Use -d for debug'.format(idb_path, proc.returncode))\n\ndef list_file(d):\n    for entry in os.listdir(d):\n        if os.path.isfile(os.path.join(d, entry)):\n            yield os.path.join(d, entry)\n\ndef list_file_recursive(d):\n    for root, dirs, files in os.walk(d):\n        for file_ in files:\n            yield os.path.join(root, file_)    \n\ndef main():\n    info('start')\n    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)\n    parser.add_argument('target', help=\"idb file or folder to export\")\n    parser.add_argument('--outdb', '-o', default=g_db_path, help=\"export DB path\")\n    parser.add_argument('--min_', '-m', type=int, default=g_min_bytes, help=\"minimum number of extracted code bytes per function\")\n    parser.add_argument('--exclude', '-e', action='store_true', help=\"exclude library/thunk functions\")\n    parser.add_argument('--update', '-u', action='store_true', help=\"update the DB records\")\n    parser.add_argument('--ana_exp', '-a', action='store_true', help=\"check analyzed functions\")\n    parser.add_argument('--ana_pre', '-p', default=g_analyzed_prefix, help=\"analyzed function name prefix (regex)\")    \n    parser.add_argument('--recursively', '-r', action='store_true', help=\"export idbs recursively\")\n    parser.add_argument('--debug', '-d', action='store_true', help=\"display IDA dialog for debug\")\n    parser.add_argument('--remove', action='store_true', help=\"remove records from db\")\n    args = parser.parse_args()\n\n    start = time.time()\n    cnt = 0\n    if os.path.isfile(args.target):\n        try:\n            cnt += export(args.debug, args.target, args.outdb, args.min_, args.exclude, args.update, args.ana_exp, args.ana_pre, args.remove)\n        except LocalError as e:\n            error('{} ({})'.format(str(e), type(e)))\n            return         \n    elif os.path.isdir(args.target):\n        gen_lf = list_file_recursive if args.recursively else list_file\n        for t in gen_lf(args.target):\n            try:\n                cnt += export(args.debug, t, args.outdb, args.min_, args.exclude, args.update, args.ana_exp, args.ana_pre, args.remove)\n            except LocalError as e:\n                error('{} ({})'.format(str(e), type(e)))\n                return         \n    else:\n        error('the target is not file/dir')\n        return\n    elapsed = time.time() - start\n    success('totally {} samples exported'.format(cnt))\n    info('elapsed time = {} sec'.format(elapsed))\n    info('done')\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "fn_fuzzy/dump_types.py",
    "content": "import os\n\ndef main():\n    path = os.path.splitext(get_idb_path())[0] + '.idc'\n    gen_file(OFILE_IDC, path, 0, 0, GENFLG_IDCTYPE)\n    Exit(0)\n\nif ( __name__ == \"__main__\" ):\n    main()\n"
  },
  {
    "path": "fn_fuzzy/fn_fuzzy.py",
    "content": "# fn_fuzzy.py - IDAPython script for fast multiple binary diffing triage\n# Takahiro Haruyama (@cci_forensics)\n\nimport os, ctypes, sqlite3, re, time, sys, subprocess\nimport cProfile\nfrom collections import defaultdict\nfrom pprint import PrettyPrinter\nfrom io import StringIO\nfrom tqdm import tqdm\n\nfrom idc import *\nimport idautils, ida_nalt, ida_kernwin, idaapi, ida_expr\n\nimport mmh3\nimport yara_fn # modified version in the same folder\n\ng_db_path = r'Z:\\haru\\analysis\\tics\\fn_fuzzy.sqlite' # plz edit your path\ng_min_bytes = 0x10 # minimum number of extracted code bytes per function\ng_analyzed_prefix = r'fn_|func_' # analyzed function name prefix (regex)\ng_threshold = 50 # function similarity score threshold without CFG match\ng_threshold_cfg = 10 # function similarity score threshold with CFG match\ng_max_bytes_for_score = 0x100 # more code bytes are evaluated by only CFG match\ng_bsize_ratio = 40 # function binary size correction ratio to compare (40 is enough)\n\n# debug purpose to check one function matching\ng_dbg_flag = False\ng_dbg_fva = 0x180015978\ng_dbg_fname = 'fn_blob_get_word_param_and_seek'\ng_dbg_sha256 = ''\n\n# initialization for ssdeep\nSPAMSUM_LENGTH = 64\nFUZZY_MAX_RESULT = (2 * SPAMSUM_LENGTH + 20)\ndirpath = os.path.dirname(__file__)\n_lib_path = os.path.join(dirpath, 'fuzzy64.dll')\nfuzzy_lib = ctypes.cdll.LoadLibrary(_lib_path)\n\ng_dump_types_path = os.path.join(dirpath, 'dump_types.py')\n\nclass defaultdictRecurse(defaultdict):\n    def __init__(self):\n        self.default_factory = type(self)\n\nclass import_handler_t(ida_kernwin.action_handler_t):\n    def __init__(self, items, idb_path, title):\n        ida_kernwin.action_handler_t.__init__(self)\n        self.items = items\n        self.idb_path = idb_path\n        self.title = title\n        \n    def import_types(self):        \n        idc_path = os.path.splitext(self.idb_path)[0] + '.idc'\n        # dump type information from the 2nd idb\n        if not (os.path.exists(idc_path)):\n            with open(self.idb_path, 'rb') as f:\n                sig = f.read(4)\n            ida = 'ida.exe' if sig == 'IDA1' else 'ida64.exe'\n            ida_path = os.path.join(idadir(), ida)                \n            cmd = [ida_path, '-S{}'.format(g_dump_types_path), self.idb_path]\n            #print cmd        \n            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n            stdout, stderr = proc.communicate()\n            if proc.returncode == 0:\n                success('{}: type information successfully dumped'.format(self.idb_path))\n            else: \n                error('{}: type information dumping failed'.format(self.idb_path))\n                return False\n\n        # import the type information\n        idc_path = os.path.splitext(self.idb_path)[0] + '.idc'\n        ida_expr.exec_idc_script(None, str(idc_path), \"main\", None, 0)\n        return True\n        \n    def activate(self, ctx):\n        sel = []\n        for idx in ctx.chooser_selection:\n            # rename the function\n            ea = get_name_ea_simple(self.items[idx][2])\n            sfname = str(self.items[idx][4])\n            #set_name(ea, sfname)\n            idaapi.do_name_anyway(ea, sfname)\n            success('{:#x}: renamed to {}'.format(ea, sfname))\n            # set the function prototype\n            sptype = str(self.items[idx][5])\n            if sptype != 'None':\n                tinfo = idaapi.tinfo_t()\n                idaapi.parse_decl2(idaapi.cvar.idati, sptype, tinfo, 0)\n                #idaapi.apply_callee_tinfo(ea, tinfo)\n                if idaapi.apply_tinfo(ea, tinfo, 0):\n                    success('{:#x}: function prototype set to {}'.format(ea, sptype))\n                else:\n                    error('{:#x}: function prototype set FAILED (maybe you should import the types?)'.format(ea))\n                    if ask_yn(0, 'Do you import types from the secondary idb?') == 1:\n                        if self.import_types():\n                            tinfo = idaapi.tinfo_t()\n                            idaapi.parse_decl2(idaapi.cvar.idati, sptype, tinfo, 0)\n                            if idaapi.apply_tinfo(ea, tinfo, 0):\n                                success('{:#x}: function prototype set to {}'.format(ea, sptype))\n                            else:\n                                error('{:#x}: function prototype set FAILED again'.format(ea))\n                        \n            # insert the comment\n            score = self.items[idx][0]\n            mmatch = self.items[idx][1]\n            cmt = 'fn_fuzzy: ssdeep={}, machoc={}'.format(score, mmatch)\n            set_func_cmt(ea, cmt, 1)\n            #set_decomplier_cmt(ea, cmt) # not sure how to avoid orphan comment\n\n        # update the Choose rows\n        ida_kernwin.refresh_chooser(self.title)\n\n    def update(self, ctx):\n        return idaapi.AST_ENABLE_ALWAYS\n    '''\n        return ida_kernwin.AST_ENABLE_FOR_WIDGET \\\n            if ida_kernwin.is_chooser_widget(ctx.widget_type) \\\n          else ida_kernwin.AST_DISABLE_FOR_WIDGET\n    '''\n\nclass FnCh(ida_kernwin.Choose):\n    def __init__(self, title, mfn, idb_path):\n        self.mfn = mfn\n        self.idb_path = idb_path\n        self.title = title\n        ida_kernwin.Choose.__init__(\n            self,\n            title,\n            [\n              [\"ssdeep score\",   10 | ida_kernwin.Choose.CHCOL_DEC],\n              [\"machoc matched\",   10 | ida_kernwin.Choose.CHCOL_PLAIN],\n              [\"primary function\", 30 | ida_kernwin.Choose.CHCOL_PLAIN],\n              [\"primary bsize\",   10 | ida_kernwin.Choose.CHCOL_DEC],\n              [\"secondary analyzed function\",   30 | ida_kernwin.Choose.CHCOL_PLAIN], \n              [\"secondary prototype\", 40 | ida_kernwin.Choose.CHCOL_PLAIN]\n            ],\n            flags = ida_kernwin.Choose.CH_MULTI)\n\n    def OnInit(self):\n        self.items = []\n        for fva,v in sorted(list(self.mfn.items()), key=lambda x:x[1]['score'], reverse=True):\n            if v['sfname']:\n                self.items.append(['{}'.format(v['score']), '{}'.format(v['cfg_match']), str(get_name(fva)), '{}'.format(v['pbsize']), str(v['sfname']), '{}'.format(v['sptype'])])\n        return True\n\n    def OnPopup(self, form, popup_handle):\n        actname = \"choose:actFnFuzzyImport\"\n        desc = ida_kernwin.action_desc_t(actname, 'Import function name and prototype', import_handler_t(self.items, self.idb_path, self.title))\n        ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)\n\n    def OnGetSize(self):\n        return len(self.items)\n\n    def OnGetLine(self, n):\n        return self.items[n]\n\n    def OnSelectLine(self, n):\n        idx = n[0] # due to CH_MULTI\n        idc.Jump(get_name_ea_simple(self.items[idx][2]))\n\n    def OnRefresh(self, n):\n        self.OnInit()\n        # try to preserve the cursor\n        #return [ida_kernwin.Choose.ALL_CHANGED] + self.adjust_last_item(n)\n        #return n\n        return None\n\n    def OnClose(self):\n        print(\"closed \", self.title)\n\nclass SummaryCh(ida_kernwin.Choose):\n    def __init__(self, title, res):\n        self.res = res\n        ida_kernwin.Choose.__init__(\n            self,\n            title,\n            [ [\"SHA256\", 20 | ida_kernwin.Choose.CHCOL_PLAIN],\n              [\"total similar functions\",   20 | ida_kernwin.Choose.CHCOL_DEC],\n              [\"analyzed similar functions\",   20 | ida_kernwin.Choose.CHCOL_DEC],\n              [\"IDB path\",   80 | ida_kernwin.Choose.CHCOL_PATH] ])\n        self.items = []\n\n    def OnInit(self):\n        for sha256,v in sorted(list(self.res.items()), key=lambda x:x[1]['mcnt']['total'], reverse=True):\n            if v['mcnt']['total'] > 0:\n                self.items.append([str(sha256), '{}'.format(v['mcnt']['total']), '{}'.format(v['mcnt']['analyzed']), str(v['path'])])\n        return True\n            \n    def OnGetSize(self):\n        return len(self.items)\n\n    def OnGetLine(self, n):\n        return self.items[n]\n\n    def OnSelectLine(self, n):\n        sha256 = self.items[n][0]\n        c = FnCh(\"similarities with {}(snip)\".format(sha256[:8]), self.res[sha256]['mfn'], self.res[sha256]['path'])\n        c.Show()\n\n    def OnRefresh(self, n):\n        return n\n\n    def OnClose(self):\n        print(\"closed \", self.title)\n\nclass FnFuzzyForm(ida_kernwin.Form):\n    def __init__(self):\n        ida_kernwin.Form.__init__(self,\nr\"\"\"BUTTON YES* Run\nBUTTON CANCEL Cancel\nfn_fuzzy\n\n{FormChangeCb}\nGeneral Options\n<DB file path:{iDBSave}>\n<minimum function code size:{iMinBytes}>\n<exclude library/thunk functions:{cLibthunk}>\n<enable debug messages:{cDebug}>{cGroup}>\n\n<##Commands##Export:{rExport}>\n<Compare:{rCompare}>{rGroup}>\n\nExport Options\n<update the DB records:{cUpdate}>\n<store flags as analyzed functions:{cAnaExp}>{cEGroup}>\n<analyzed function name prefix/suffix (regex):{iPrefix}>\n\nCompare Options\n<compare with only analyzed functions:{cAnaCmp}>\n<compare with only IDBs in the specified folder:{cFolCmp}>{cCGroup}>\n<the folder path:{iFolder}>\n<function code size comparison criteria (0-100):{iRatio}>\n<function similarity score threshold (0-100) without CFG match:{iSimilarity}>\n<function similarity score threshold (0-100) with CFG match:{iSimilarityCFG}>\n<function code size threshold evaluated by only CFG match:{iMaxBytesForScore}>\n\"\"\",\n        {\n            'FormChangeCb': ida_kernwin.Form.FormChangeCb(self.OnFormChange),\n            'cGroup': ida_kernwin.Form.ChkGroupControl((\"cLibthunk\", \"cDebug\")),\n            'iDBSave': ida_kernwin.Form.FileInput(save=True),\n            'iMinBytes': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_HEX),\n            'rGroup': ida_kernwin.Form.RadGroupControl((\"rCompare\", \"rExport\")),\n            'cEGroup': ida_kernwin.Form.ChkGroupControl((\"cUpdate\", \"cAnaExp\")),\n            'iPrefix': ida_kernwin.Form.StringInput(),\n            'cCGroup': ida_kernwin.Form.ChkGroupControl((\"cAnaCmp\", \"cFolCmp\")),\n            'iFolder': ida_kernwin.Form.DirInput(),\n            'iRatio': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),\n            'iSimilarity': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),\n            'iSimilarityCFG': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),\n            'iMaxBytesForScore': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_HEX),            \n        })\n\n    def OnFormChange(self, fid):\n        if fid == -1:\n            self.SetControlValue(self.cLibthunk, True)\n            self.SetControlValue(self.cAnaExp, True)\n            self.SetControlValue(self.cAnaCmp, True)\n            self.SetControlValue(self.rCompare, True)\n            \n            self.EnableField(self.cEGroup, False)            \n            self.EnableField(self.iPrefix, False)\n            self.EnableField(self.cCGroup, True)\n            self.EnableField(self.iSimilarity, True)\n            self.EnableField(self.iSimilarityCFG, True)\n            self.EnableField(self.iMaxBytesForScore, True)\n            self.EnableField(self.iRatio, True)\n        if fid == self.rExport.id:\n            self.EnableField(self.cEGroup, True)\n            self.EnableField(self.iPrefix, True)\n            self.EnableField(self.cCGroup, False)\n            self.EnableField(self.iSimilarity, False)\n            self.EnableField(self.iSimilarityCFG, False)\n            self.EnableField(self.iMaxBytesForScore, False)\n            self.EnableField(self.iRatio, False)\n        elif fid == self.rCompare.id:\n            self.EnableField(self.cEGroup, False)\n            self.EnableField(self.iPrefix, False)\n            self.EnableField(self.cCGroup, True)\n            self.EnableField(self.iSimilarity, True)\n            self.EnableField(self.iSimilarityCFG, True)\n            self.EnableField(self.iMaxBytesForScore, True)\n            self.EnableField(self.iRatio, True)\n        return 1\n\nclass FnFuzzy(object):\n    def __init__(self, f_debug, db_path, min_bytes, f_ex_libthunk, f_update, f_ana_exp, ana_pre, f_ana_cmp = False, f_fol_cmp = False, ana_fol='', threshold = None, threshold_cfg = None, max_bytes_for_score = None, ratio = 0):\n        self.f_debug = f_debug\n        self.conn = sqlite3.connect(db_path)\n        self.cur = self.conn.cursor()\n        self.init_db()\n        self.in_memory_db()        \n        self.min_bytes = min_bytes\n        self.f_ex_libthunk = f_ex_libthunk\n        # for export\n        self.f_update = f_update\n        self.f_ana_exp = f_ana_exp        \n        self.ana_pre = ana_pre\n        if f_ana_exp:\n            self.ana_pat = re.compile(self.ana_pre)\n        # for compare\n        self.f_ana_cmp = f_ana_cmp\n        self.f_fol_cmp = f_fol_cmp\n        self.ana_fol = ana_fol\n        self.threshold = threshold\n        self.threshold_cfg = threshold_cfg\n        self.max_bytes_for_score = max_bytes_for_score\n        self.ratio = float(ratio)\n\n        self.idb_path = get_idb_path()\n        self.sha256 = ida_nalt.retrieve_input_file_sha256()\n        try:\n            #self.sha256 = self.sha256.lower()\n            self.sha256 = self.sha256.hex()\n            self.md5 = ida_nalt.retrieve_input_file_md5().lower()\n        except AttributeError:\n            message = 'ida_nalt.retrieve_input_file_sha256() returned None. Probably the IDB was generated by old IDA (<6.9). Check the version by ida_netnode.cvar.root_node.supstr(ida_nalt.RIDX_IDA_VERSION)'\n            error(message)\n            #ida_kernwin.warning(message)\n\n    def debug(self, msg):\n        if self.f_debug:\n            print(\"[D] {}\".format(msg))\n\n    def init_db(self):\n        self.cur.execute(\"SELECT * FROM sqlite_master WHERE type='table'\")\n        if self.cur.fetchone() is None:\n            info('DB initialized')\n            self.cur.execute(\"CREATE TABLE IF NOT EXISTS sample(sha256 UNIQUE, path)\")\n            #self.cur.execute(\"CREATE INDEX sha256_index ON sample(sha256)\")\n            self.cur.execute(\"CREATE INDEX path_index ON sample(path)\")\n            self.cur.execute(\"CREATE TABLE IF NOT EXISTS function(sha256, fva, fname, fhd, fhm, f_ana, bsize, ptype, UNIQUE(sha256, fva))\")\n            self.cur.execute(\"CREATE INDEX f_ana_index ON function(f_ana)\") \n            self.cur.execute(\"CREATE INDEX bsize_index ON function(bsize)\")\n\n    def in_memory_db(self): # for SELECT\n        tempfile = StringIO()\n        for line in self.conn.iterdump():\n            tempfile.write(\"{}\\n\".format(line))\n        tempfile.seek(0)\n        self.mconn = sqlite3.connect(\":memory:\")\n        self.mconn.cursor().executescript(tempfile.read())\n        self.mconn.commit()\n        self.mconn.row_factory=sqlite3.Row\n        self.mcur = self.mconn.cursor()\n\n    def calc_fn_machoc(self, fva, fname): # based on Machoc hash implementation (https://github.com/0x00ach/idadiff)\n        func = idaapi.get_func(fva)\n        if type(func) == type(None):\n            self.debug('{}: ignored due to lack of function object'.format(fname))\n            return None, None\n\n        flow = idaapi.FlowChart(f=func)\n        cur_hash_rev = \"\"\n        addrIds = []\n        cur_id = 1\n        for c in range(0,flow.size):\n            cur_basic = flow.__getitem__(c)\n            cur_hash_rev += shex(cur_basic.start_ea)+\":\"\n            addrIds.append((shex(cur_basic.start_ea),str(cur_id)))\n            cur_id += 1\n            addr = cur_basic.start_ea\n            blockEnd = cur_basic.end_ea\n            mnem = GetMnem(addr)\n            while mnem != \"\":\n                if mnem == \"call\": # should be separated into 2 blocks by call\n                     cur_hash_rev += \"c,\"\n                     addr = NextHead(addr,blockEnd)\n                     mnem = GetMnem(addr)\n                     if addr != BADADDR:\n                        cur_hash_rev += shex(addr)+\";\"+shex(addr)+\":\"\n                        addrIds.append((shex(addr),str(cur_id)))\n                        cur_id += 1\n                else:\n                    addr = NextHead(addr,blockEnd)\n                    mnem = GetMnem(addr)\n            refs = []\n            for suc in cur_basic.succs():\n                refs.append(suc.start_ea)\n            refs.sort()\n            refsrev = \"\"\n            for ref in refs:\n                refsrev += shex(ref)+\",\"\n            if refsrev != \"\":\n                refsrev = refsrev[:-1]\n            cur_hash_rev +=  refsrev+\";\"\n\n        # change addr to index\n        for aid in addrIds:\n            #cur_hash_rev = string.replace(cur_hash_rev,aid[0],aid[1])\n            cur_hash_rev = cur_hash_rev.replace(aid[0],aid[1])\n        # calculate machoc hash value\n        self.debug('{}: CFG = {}'.format(fname, cur_hash_rev))\n        return mmh3.hash(cur_hash_rev) & 0xFFFFFFFF, cur_id-1\n\n    def calc_fn_ssdeep(self, fva, fname):\n        d2h = b''\n        for bb in yara_fn.get_basic_blocks(fva):\n            rule = yara_fn.get_basic_block_rule(bb)\n            if rule:\n                chk = rule.cut_bytes_for_hash\n                if len(chk) < yara_fn.MIN_BB_BYTE_COUNT:\n                    continue\n                d2h += chk.encode()\n                #self.debug('chunk at {:#x}: {}'.format(bb.va, get_hex_pat(chk)))\n\n        #self.debug('total func seq at {:#x}: {}'.format(fva, get_hex_pat(d2h)))\n        if len(d2h) < self.min_bytes:\n            self.debug('{}: ignored because of the number of extracted code bytes {}'.format(fname, len(d2h)))\n            return None, None\n\n        result_buffer = ctypes.create_string_buffer(FUZZY_MAX_RESULT)\n        file_buffer = ctypes.create_string_buffer(d2h)\n        hash_result = fuzzy_lib.fuzzy_hash_buf(file_buffer, len(file_buffer) - 1, result_buffer)\n        hash_value = result_buffer.value.decode(\"ascii\")\n        return hash_value, len(d2h)\n\n    def existed(self):\n        self.mcur.execute(\"SELECT sha256 FROM sample WHERE sha256 = ?\", (self.sha256,))\n        if self.mcur.fetchone() is None:\n            return False\n        else:\n            return True\n\n    def exclude_libthunk(self, fva, fname):\n        if self.f_ex_libthunk:\n            flags = get_func_attr(fva, FUNCATTR_FLAGS)\n            if flags & FUNC_LIB:\n                self.debug('{}: ignored because of library function'.format(fname))\n                return True\n            if flags & FUNC_THUNK:\n                self.debug('{}: ignored because of thunk function'.format(fname))\n                return True\n        return False\n\n    def export(self):\n        if self.existed() and not self.f_update:\n            info('{}: The sample records are present in DB. skipped.'.format(self.sha256))\n            return False\n\n        self.cur.execute(\"REPLACE INTO sample values(?, ?)\", (self.sha256, self.idb_path))\n\n        pnum = tnum = 0\n        records = []\n        for fva in idautils.Functions():\n            fname = get_func_name(fva)\n            tnum += 1\n            if self.exclude_libthunk(fva, fname):\n                continue\n            fhd, bsize = self.calc_fn_ssdeep(fva, fname)\n            fhm, cfgnum = self.calc_fn_machoc(fva, fname)\n            if fhd and fhm:\n                pnum += 1\n                f_ana = bool(self.ana_pat.search(fname)) if self.f_ana_exp else False\n                tinfo = idaapi.tinfo_t()\n                idaapi.get_tinfo(fva, tinfo)\n                ptype = idaapi.print_tinfo('', 0, 0, idaapi.PRTYPE_1LINE, tinfo, fname, '')\n                ptype = ptype + ';' if ptype is not None else ptype\n                # fva is 64-bit int causing OverflowError\n                records.append((self.sha256, '{:#x}'.format(fva), fname, fhd, fhm, f_ana, bsize, ptype)) \n                self.debug('EXPORT {} at {:#x}: ssdeep={} (size={}), machoc={} (num of CFG={})'.format(fname, fva, fhd, bsize, fhm, cfgnum))\n\n        self.cur.executemany(\"REPLACE INTO function values (?, ?, ?, ?, ?, ?, ?, ?)\", records)\n        success ('{} of {} functions exported'.format(pnum, tnum))\n        return True\n\n    def compare(self):\n        res = defaultdictRecurse()\n        if self.f_fol_cmp:\n            self.mcur.execute(\"SELECT sha256,path FROM sample WHERE path LIKE ?\", (self.ana_fol+'%',))\n        else:\n            self.mcur.execute(\"SELECT sha256,path FROM sample\")\n        frows = self.mcur.fetchall()\n        num_of_samples = len(frows)\n        for sha256, path in frows:\n            res[sha256]['path'] = path\n            res[sha256]['mcnt'].default_factory = lambda: 0\n        \n        #sql = \"SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE f_ana == 1 AND bsize BETWEEN ? AND ?\" if self.f_ana_cmp else \"SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE bsize BETWEEN ? AND ?\"\n        sql = \"SELECT function.sha256,fname,fhd,fhm,f_ana,ptype FROM function INNER JOIN sample on function.sha256 == sample.sha256 WHERE path LIKE ? AND \" if self.f_fol_cmp else \"SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE \"\n        sql += \"f_ana == 1 AND bsize BETWEEN ? AND ?\" if self.f_ana_cmp else \"bsize BETWEEN ? AND ?\"\n        fns = list(idautils.Functions())\n        for fva in tqdm(fns, desc='comparing functions'):\n            fname = get_func_name(fva)\n            if self.exclude_libthunk(fva, fname) or not num_of_samples:\n                continue\n            pfhd, pbsize = self.calc_fn_ssdeep(fva, fname)\n            pfhm, pcfgnum = self.calc_fn_machoc(fva, fname)\n            if pfhd and pfhm:\n                pbuf = ctypes.create_string_buffer(pfhd.encode())                \n                self.debug('COMPARE {}: ssdeep={} (size={}), machoc={} (num of bb={})'.format(fname, pfhd, pbsize, pfhm, pcfgnum))                \n                min_ = pbsize * (1 - (self.ratio / 100))\n                max_ = pbsize * (1 + (self.ratio / 100))\n                self.debug('min={}, max={}'.format(min_, max_))\n                if self.f_fol_cmp:\n                    self.mcur.execute(sql, (self.ana_fol+'%', min_, max_))\n                else:\n                    self.mcur.execute(sql, (min_, max_))\n                frows = self.mcur.fetchall()\n                self.debug('targeted {} records'.format(len(frows)))                \n                for sha256, sfname, sfhd, sfhm, sf_ana, sptype in frows:\n                    if sha256 == self.sha256: # skip the self\n                        continue\n                    res[sha256]['mfn'][fva].default_factory = lambda: 0\n                    sbuf = ctypes.create_string_buffer(sfhd.encode())\n                    score = fuzzy_lib.fuzzy_compare(pbuf, sbuf)\n\n                    dbg_cond = g_dbg_flag and fva == g_dbg_fva and sfname == g_dbg_fname and sha256 == g_dbg_sha256\n                    if dbg_cond:\n                        print(('{:#x}: compared with {} in {} score = {} machoc match = {}'.format(fva, sfname, sha256, score, bool(pfhm == sfhm))))\n                        \n                    if (score >= self.threshold) or (score >= self.threshold_cfg and pfhm == sfhm) or (pbsize > self.max_bytes_for_score and pfhm == sfhm):\n                        if dbg_cond:\n                            print(('{:#x}: counting {} in {} for total number'.format(fva, sfname, sha256)))\n                        res[sha256]['mcnt']['total'] += 1\n                        if sf_ana:\n                            res[sha256]['mcnt']['analyzed'] += 1\n                            if score > res[sha256]['mfn'][fva]['score'] or (res[sha256]['mfn'][fva]['score'] == 0 and pbsize > self.max_bytes_for_score):\n                                res[sha256]['mfn'][fva]['score'] = score\n                                res[sha256]['mfn'][fva]['cfg_match'] = bool(pfhm == sfhm)\n                                res[sha256]['mfn'][fva]['sfname'] = sfname\n                                res[sha256]['mfn'][fva]['sptype'] = sptype\n                                res[sha256]['mfn'][fva]['pbsize'] = pbsize\n                                if dbg_cond:\n                                    print(('{:#x}: appended record = {} in {}'.format(fva, sfname, sha256)))\n\n        \n        c = SummaryCh(\"fn_fuzzy summary\", res)\n        c.Show()\n        success('totally {} samples compared'.format(num_of_samples))\n\n    def close(self):\n        self.conn.commit()\n        self.cur.close()\n\ndef info(msg):\n    print(\"[*] {}\".format(msg))\n\ndef success(msg):\n    print(\"[+] {}\".format(msg))\n\ndef error(msg):\n    print(\"[!] {}\".format(msg))\n\ndef get_hex_pat(buf):\n    # get hex pattern\n    return ' '.join(['{:02x}'.format(ord(x)) for x in buf])\n\ndef shex(a):\n    return hex(a).rstrip(\"L\")\n\ndef set_decomplier_cmt(ea, cmt):\n    cfunc = idaapi.decompile(ea)\n    tl = idaapi.treeloc_t()\n    tl.ea = ea\n    tl.itp = idaapi.ITP_SEMI\n    if cfunc:\n      cfunc.set_user_cmt(tl, cmt)\n      cfunc.save_user_cmts()\n    else:\n      error(\"Decompile failed: {:#x}\".formart(ea))\n\ndef main():\n    info('start')\n        \n    if idaapi.get_plugin_options(\"fn_fuzzy\"): # CLI (export only)\n        # not change the database to maintain the window setting\n        process_config_line(\"ABANDON_DATABASE=YES\")\n        \n        start = time.time()\n        options = idaapi.get_plugin_options(\"fn_fuzzy\").split(':')\n        #print options\n        min_bytes = int(options[0])\n        f_ex_libthunk = eval(options[1])\n        f_update = eval(options[2])\n        f_ana_exp = eval(options[3])\n        ana_pre = options[4]\n        db_path = ':'.join(options[5:])\n        ff = FnFuzzy(False, db_path, min_bytes, f_ex_libthunk, f_update, f_ana_exp, ana_pre)        \n        res = ff.export()\n        ff.close()\n        elapsed = time.time() - start\n        info('done (CLI)')\n        if res: # return code 1 is reserved for error\n            qexit(0) \n        else:\n            qexit(2) # already exported (skipped)\n    else: \n        f = FnFuzzyForm()\n        f.Compile()\n        f.iDBSave.value = g_db_path\n        f.iMinBytes.value = g_min_bytes\n        f.iPrefix.value = g_analyzed_prefix\n        f.iFolder.value = os.path.dirname(get_idb_path())\n        f.iSimilarity.value = g_threshold\n        f.iSimilarityCFG.value = g_threshold_cfg\n        f.iMaxBytesForScore.value = g_max_bytes_for_score\n        f.iRatio.value = g_bsize_ratio\n        r = f.Execute()\n        if r == 1: # Run\n            start = time.time()\n            ff = FnFuzzy(f.cDebug.checked, f.iDBSave.value, f.iMinBytes.value, f.cLibthunk.checked, f.cUpdate.checked, f.cAnaExp.checked, f.iPrefix.value, f.cAnaCmp.checked, f.cFolCmp.checked, f.iFolder.value, f.iSimilarity.value, f.iSimilarityCFG.value, f.iMaxBytesForScore.value, f.iRatio.value)\n            if f.rExport.selected:\n                if ff.sha256 is None:\n                    print('aborted')\n                    return        \n                ff.export()\n                #cProfile.runctx('ff.export()', None, locals())\n            else: \n                ff.compare()\n                #cProfile.runctx('ff.compare()', None, locals())\n            ff.close()\n            elapsed = time.time() - start\n        else:  \n            print('canceled')\n            return\n    \n    info('elapsed time = {} sec'.format(elapsed))            \n    info('done')\n\nif __name__ == '__main__':\n    main()\n\n\n\n"
  },
  {
    "path": "fn_fuzzy/fn_fuzzy_7x.py",
    "content": "# fn_fuzzy.py - IDAPython script for fast multiple binary diffing triage\r\n# Takahiro Haruyama (@cci_forensics)\r\n\r\nimport os, ctypes, sqlite3, re, time, sys, subprocess\r\nimport cProfile\r\nfrom collections import defaultdict\r\nfrom pprint import PrettyPrinter\r\nfrom io import StringIO\r\nfrom tqdm import tqdm\r\n\r\nfrom idc import *\r\nimport idautils, ida_nalt, ida_kernwin, idaapi, ida_expr, ida_typeinf\r\n\r\nimport mmh3\r\nimport yara_fn_7x # modified version in the same folder\r\n\r\ng_db_path = r'C:\\analysisw\\tics\\fn_fuzzy.sqlite' # plz edit your path\r\ng_min_bytes = 0x10 # minimum number of extracted code bytes per function\r\ng_analyzed_prefix = r'fn_|func_' # analyzed function name prefix (regex)\r\ng_threshold = 50 # function similarity score threshold without CFG match\r\ng_threshold_cfg = 10 # function similarity score threshold with CFG match\r\ng_max_bytes_for_score = 0x100 # more code bytes are evaluated by only CFG match\r\ng_bsize_ratio = 40 # function binary size correction ratio to compare (40 is enough)\r\n\r\n# debug purpose to check one function matching\r\ng_dbg_flag = False\r\ng_dbg_fva = 0x180015978\r\ng_dbg_fname = 'fn_blob_get_word_param_and_seek'\r\ng_dbg_sha256 = ''\r\n\r\n# initialization for ssdeep\r\nSPAMSUM_LENGTH = 64\r\nFUZZY_MAX_RESULT = (2 * SPAMSUM_LENGTH + 20)\r\ndirpath = os.path.dirname(__file__)\r\n_lib_path = os.path.join(dirpath, 'fuzzy64.dll')\r\nfuzzy_lib = ctypes.cdll.LoadLibrary(_lib_path)\r\n\r\ng_dump_types_path = os.path.join(dirpath, 'dump_types.py')\r\n\r\nclass defaultdictRecurse(defaultdict):\r\n    def __init__(self):\r\n        self.default_factory = type(self)\r\n\r\nclass import_handler_t(ida_kernwin.action_handler_t):\r\n    def __init__(self, items, idb_path, title):\r\n        ida_kernwin.action_handler_t.__init__(self)\r\n        self.items = items\r\n        self.idb_path = idb_path\r\n        self.title = title\r\n        \r\n    def import_types(self):        \r\n        idc_path = os.path.splitext(self.idb_path)[0] + '.idc'\r\n        # dump type information from the 2nd idb\r\n        if not (os.path.exists(idc_path)):\r\n            with open(self.idb_path, 'rb') as f:\r\n                sig = f.read(4)\r\n            ida = 'ida.exe' if sig == 'IDA1' else 'ida64.exe'\r\n            ida_path = os.path.join(idadir(), ida)                \r\n            cmd = [ida_path, '-S{}'.format(g_dump_types_path), self.idb_path]\r\n            #print cmd        \r\n            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\r\n            stdout, stderr = proc.communicate()\r\n            if proc.returncode == 0:\r\n                success('{}: type information successfully dumped'.format(self.idb_path))\r\n            else: \r\n                error('{}: type information dumping failed'.format(self.idb_path))\r\n                return False\r\n\r\n        # import the type information\r\n        idc_path = os.path.splitext(self.idb_path)[0] + '.idc'\r\n        ida_expr.exec_idc_script(None, str(idc_path), \"main\", None, 0)\r\n        return True\r\n        \r\n    def activate(self, ctx):\r\n        sel = []\r\n        for idx in ctx.chooser_selection:\r\n            # rename the function\r\n            ea = get_name_ea_simple(self.items[idx][2])\r\n            sfname = str(self.items[idx][4])\r\n            #set_name(ea, sfname)\r\n            ida_name.force_name(ea, sfname)\r\n            success('{:#x}: renamed to {}'.format(ea, sfname))\r\n            # set the function prototype\r\n            sptype = str(self.items[idx][5])\r\n            if sptype != 'None':\r\n                tinfo = idaapi.tinfo_t()\r\n                idaapi.parse_decl2(idaapi.cvar.idati, sptype, tinfo, 0)\r\n                #idaapi.apply_callee_tinfo(ea, tinfo)\r\n                if idaapi.apply_tinfo(ea, tinfo, 0):\r\n                    success('{:#x}: function prototype set to {}'.format(ea, sptype))\r\n                else:\r\n                    error('{:#x}: function prototype set FAILED (maybe you should import the types?)'.format(ea))\r\n                    if ask_yn(0, 'Do you import types from the secondary idb?') == 1:\r\n                        if self.import_types():\r\n                            tinfo = idaapi.tinfo_t()\r\n                            idaapi.parse_decl2(idaapi.cvar.idati, sptype, tinfo, 0)\r\n                            if idaapi.apply_tinfo(ea, tinfo, 0):\r\n                                success('{:#x}: function prototype set to {}'.format(ea, sptype))\r\n                            else:\r\n                                error('{:#x}: function prototype set FAILED again'.format(ea))\r\n                        \r\n            # insert the comment\r\n            score = self.items[idx][0]\r\n            mmatch = self.items[idx][1]\r\n            cmt = 'fn_fuzzy: ssdeep={}, machoc={}'.format(score, mmatch)\r\n            set_func_cmt(ea, cmt, 1)\r\n            #set_decomplier_cmt(ea, cmt) # not sure how to avoid orphan comment\r\n\r\n        # update the Choose rows\r\n        ida_kernwin.refresh_chooser(self.title)\r\n\r\n    def update(self, ctx):\r\n        return idaapi.AST_ENABLE_ALWAYS\r\n    '''\r\n        return ida_kernwin.AST_ENABLE_FOR_WIDGET \\\r\n            if ida_kernwin.is_chooser_widget(ctx.widget_type) \\\r\n          else ida_kernwin.AST_DISABLE_FOR_WIDGET\r\n    '''\r\n\r\nclass FnCh(ida_kernwin.Choose):\r\n    def __init__(self, title, mfn, idb_path):\r\n        self.mfn = mfn\r\n        self.idb_path = idb_path\r\n        self.title = title\r\n        ida_kernwin.Choose.__init__(\r\n            self,\r\n            title,\r\n            [\r\n              [\"ssdeep score\",   10 | ida_kernwin.Choose.CHCOL_DEC],\r\n              [\"machoc matched\",   10 | ida_kernwin.Choose.CHCOL_PLAIN],\r\n              [\"primary function\", 30 | ida_kernwin.Choose.CHCOL_PLAIN],\r\n              [\"primary bsize\",   10 | ida_kernwin.Choose.CHCOL_DEC],\r\n              [\"secondary analyzed function\",   30 | ida_kernwin.Choose.CHCOL_PLAIN], \r\n              [\"secondary prototype\", 40 | ida_kernwin.Choose.CHCOL_PLAIN]\r\n            ],\r\n            flags = ida_kernwin.Choose.CH_MULTI)\r\n\r\n    def OnInit(self):\r\n        self.items = []\r\n        for fva,v in sorted(list(self.mfn.items()), key=lambda x:x[1]['score'], reverse=True):\r\n            if v['sfname']:\r\n                self.items.append(['{}'.format(v['score']), '{}'.format(v['cfg_match']), str(get_name(fva)), '{}'.format(v['pbsize']), str(v['sfname']), '{}'.format(v['sptype'])])\r\n        return True\r\n\r\n    def OnPopup(self, form, popup_handle):\r\n        actname = \"choose:actFnFuzzyImport\"\r\n        desc = ida_kernwin.action_desc_t(actname, 'Import function name and prototype', import_handler_t(self.items, self.idb_path, self.title))\r\n        ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)\r\n\r\n    def OnGetSize(self):\r\n        return len(self.items)\r\n\r\n    def OnGetLine(self, n):\r\n        return self.items[n]\r\n\r\n    def OnSelectLine(self, n):\r\n        idx = n[0] # due to CH_MULTI\r\n        ida_kernwin.jumpto(get_name_ea_simple(self.items[idx][2]))\r\n\r\n    def OnRefresh(self, n):\r\n        self.OnInit()\r\n        # try to preserve the cursor\r\n        #return [ida_kernwin.Choose.ALL_CHANGED] + self.adjust_last_item(n)\r\n        #return n\r\n        return None\r\n\r\n    def OnClose(self):\r\n        print(\"closed \", self.title)\r\n\r\nclass SummaryCh(ida_kernwin.Choose):\r\n    def __init__(self, title, res):\r\n        self.res = res\r\n        ida_kernwin.Choose.__init__(\r\n            self,\r\n            title,\r\n            [ [\"SHA256\", 20 | ida_kernwin.Choose.CHCOL_PLAIN],\r\n              [\"total similar functions\",   20 | ida_kernwin.Choose.CHCOL_DEC],\r\n              [\"analyzed similar functions\",   20 | ida_kernwin.Choose.CHCOL_DEC],\r\n              [\"IDB path\",   80 | ida_kernwin.Choose.CHCOL_PATH] ])\r\n        self.items = []\r\n\r\n    def OnInit(self):\r\n        for sha256,v in sorted(list(self.res.items()), key=lambda x:x[1]['mcnt']['total'], reverse=True):\r\n            if v['mcnt']['total'] > 0:\r\n                self.items.append([str(sha256), '{}'.format(v['mcnt']['total']), '{}'.format(v['mcnt']['analyzed']), str(v['path'])])\r\n        return True\r\n            \r\n    def OnGetSize(self):\r\n        return len(self.items)\r\n\r\n    def OnGetLine(self, n):\r\n        return self.items[n]\r\n\r\n    def OnSelectLine(self, n):\r\n        sha256 = self.items[n][0]\r\n        c = FnCh(\"similarities with {}(snip)\".format(sha256[:8]), self.res[sha256]['mfn'], self.res[sha256]['path'])\r\n        c.Show()\r\n\r\n    def OnRefresh(self, n):\r\n        return n\r\n\r\n    def OnClose(self):\r\n        print(\"closed \", self.title)\r\n\r\nclass FnFuzzyForm(ida_kernwin.Form):\r\n    def __init__(self):\r\n        ida_kernwin.Form.__init__(self,\r\nr\"\"\"BUTTON YES* Run\r\nBUTTON CANCEL Cancel\r\nfn_fuzzy\r\n\r\n{FormChangeCb}\r\nGeneral Options\r\n<DB file path:{iDBSave}>\r\n<minimum function code size:{iMinBytes}>\r\n<exclude library/thunk functions:{cLibthunk}>\r\n<enable debug messages:{cDebug}>{cGroup}>\r\n\r\n<##Commands##Export:{rExport}>\r\n<Compare:{rCompare}>{rGroup}>\r\n\r\nExport Options\r\n<update the DB records:{cUpdate}>\r\n<store flags as analyzed functions:{cAnaExp}>{cEGroup}>\r\n<analyzed function name prefix/suffix (regex):{iPrefix}>\r\n\r\nCompare Options\r\n<compare with only analyzed functions:{cAnaCmp}>\r\n<compare with only IDBs in the specified folder:{cFolCmp}>{cCGroup}>\r\n<the folder path:{iFolder}>\r\n<function code size comparison criteria (0-100):{iRatio}>\r\n<function similarity score threshold (0-100) without CFG match:{iSimilarity}>\r\n<function similarity score threshold (0-100) with CFG match:{iSimilarityCFG}>\r\n<function code size threshold evaluated by only CFG match:{iMaxBytesForScore}>\r\n\"\"\",\r\n        {\r\n            'FormChangeCb': ida_kernwin.Form.FormChangeCb(self.OnFormChange),\r\n            'cGroup': ida_kernwin.Form.ChkGroupControl((\"cLibthunk\", \"cDebug\")),\r\n            'iDBSave': ida_kernwin.Form.FileInput(save=True),\r\n            'iMinBytes': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_HEX),\r\n            'rGroup': ida_kernwin.Form.RadGroupControl((\"rCompare\", \"rExport\")),\r\n            'cEGroup': ida_kernwin.Form.ChkGroupControl((\"cUpdate\", \"cAnaExp\")),\r\n            'iPrefix': ida_kernwin.Form.StringInput(),\r\n            'cCGroup': ida_kernwin.Form.ChkGroupControl((\"cAnaCmp\", \"cFolCmp\")),\r\n            'iFolder': ida_kernwin.Form.DirInput(),\r\n            'iRatio': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),\r\n            'iSimilarity': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),\r\n            'iSimilarityCFG': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),\r\n            'iMaxBytesForScore': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_HEX),            \r\n        })\r\n\r\n    def OnFormChange(self, fid):\r\n        if fid == -1:\r\n            self.SetControlValue(self.cLibthunk, True)\r\n            self.SetControlValue(self.cAnaExp, True)\r\n            self.SetControlValue(self.cAnaCmp, True)\r\n            self.SetControlValue(self.rCompare, True)\r\n            \r\n            self.EnableField(self.cEGroup, False)            \r\n            self.EnableField(self.iPrefix, False)\r\n            self.EnableField(self.cCGroup, True)\r\n            self.EnableField(self.iSimilarity, True)\r\n            self.EnableField(self.iSimilarityCFG, True)\r\n            self.EnableField(self.iMaxBytesForScore, True)\r\n            self.EnableField(self.iRatio, True)\r\n        if fid == self.rExport.id:\r\n            self.EnableField(self.cEGroup, True)\r\n            self.EnableField(self.iPrefix, True)\r\n            self.EnableField(self.cCGroup, False)\r\n            self.EnableField(self.iSimilarity, False)\r\n            self.EnableField(self.iSimilarityCFG, False)\r\n            self.EnableField(self.iMaxBytesForScore, False)\r\n            self.EnableField(self.iRatio, False)\r\n        elif fid == self.rCompare.id:\r\n            self.EnableField(self.cEGroup, False)\r\n            self.EnableField(self.iPrefix, False)\r\n            self.EnableField(self.cCGroup, True)\r\n            self.EnableField(self.iSimilarity, True)\r\n            self.EnableField(self.iSimilarityCFG, True)\r\n            self.EnableField(self.iMaxBytesForScore, True)\r\n            self.EnableField(self.iRatio, True)\r\n        return 1\r\n\r\nclass FnFuzzy(object):\r\n    def __init__(self, f_debug, db_path, min_bytes, f_ex_libthunk, f_update, f_ana_exp, ana_pre, f_ana_cmp = False, f_fol_cmp = False, ana_fol='', threshold = None, threshold_cfg = None, max_bytes_for_score = None, ratio = 0):\r\n        self.f_debug = f_debug\r\n        self.conn = sqlite3.connect(db_path)\r\n        self.cur = self.conn.cursor()\r\n        self.init_db()\r\n        self.in_memory_db()        \r\n        self.min_bytes = min_bytes\r\n        self.f_ex_libthunk = f_ex_libthunk\r\n        # for export\r\n        self.f_update = f_update\r\n        self.f_ana_exp = f_ana_exp        \r\n        self.ana_pre = ana_pre\r\n        if f_ana_exp:\r\n            self.ana_pat = re.compile(self.ana_pre)\r\n        # for compare\r\n        self.f_ana_cmp = f_ana_cmp\r\n        self.f_fol_cmp = f_fol_cmp\r\n        self.ana_fol = ana_fol\r\n        self.threshold = threshold\r\n        self.threshold_cfg = threshold_cfg\r\n        self.max_bytes_for_score = max_bytes_for_score\r\n        self.ratio = float(ratio)\r\n\r\n        self.idb_path = get_idb_path()\r\n        self.sha256 = ida_nalt.retrieve_input_file_sha256()\r\n        try:\r\n            #self.sha256 = self.sha256.lower()\r\n            self.sha256 = self.sha256.hex()\r\n            self.md5 = ida_nalt.retrieve_input_file_md5().lower()\r\n        except AttributeError:\r\n            message = 'ida_nalt.retrieve_input_file_sha256() returned None. Probably the IDB was generated by old IDA (<6.9). Check the version by ida_netnode.cvar.root_node.supstr(ida_nalt.RIDX_IDA_VERSION)'\r\n            error(message)\r\n            #ida_kernwin.warning(message)\r\n\r\n    def debug(self, msg):\r\n        if self.f_debug:\r\n            print(\"[D] {}\".format(msg))\r\n\r\n    def init_db(self):\r\n        self.cur.execute(\"SELECT * FROM sqlite_master WHERE type='table'\")\r\n        if self.cur.fetchone() is None:\r\n            info('DB initialized')\r\n            self.cur.execute(\"CREATE TABLE IF NOT EXISTS sample(sha256 UNIQUE, path)\")\r\n            #self.cur.execute(\"CREATE INDEX sha256_index ON sample(sha256)\")\r\n            self.cur.execute(\"CREATE INDEX path_index ON sample(path)\")\r\n            self.cur.execute(\"CREATE TABLE IF NOT EXISTS function(sha256, fva, fname, fhd, fhm, f_ana, bsize, ptype, UNIQUE(sha256, fva))\")\r\n            self.cur.execute(\"CREATE INDEX f_ana_index ON function(f_ana)\") \r\n            self.cur.execute(\"CREATE INDEX bsize_index ON function(bsize)\")\r\n\r\n    def in_memory_db(self): # for SELECT\r\n        tempfile = StringIO()\r\n        for line in self.conn.iterdump():\r\n            tempfile.write(\"{}\\n\".format(line))\r\n        tempfile.seek(0)\r\n        self.mconn = sqlite3.connect(\":memory:\")\r\n        self.mconn.cursor().executescript(tempfile.read())\r\n        self.mconn.commit()\r\n        self.mconn.row_factory=sqlite3.Row\r\n        self.mcur = self.mconn.cursor()\r\n\r\n    def calc_fn_machoc(self, fva, fname): # based on Machoc hash implementation (https://github.com/0x00ach/idadiff)\r\n        func = idaapi.get_func(fva)\r\n        if type(func) == type(None):\r\n            self.debug('{}: ignored due to lack of function object'.format(fname))\r\n            return None, None\r\n\r\n        flow = idaapi.FlowChart(f=func)\r\n        cur_hash_rev = \"\"\r\n        addrIds = []\r\n        cur_id = 1\r\n        for c in range(0,flow.size):\r\n            cur_basic = flow.__getitem__(c)\r\n            cur_hash_rev += shex(cur_basic.start_ea)+\":\"\r\n            addrIds.append((shex(cur_basic.start_ea),str(cur_id)))\r\n            cur_id += 1\r\n            addr = cur_basic.start_ea\r\n            blockEnd = cur_basic.end_ea\r\n            mnem = idc.print_insn_mnem(addr)\r\n            while mnem != \"\":\r\n                if mnem == \"call\": # should be separated into 2 blocks by call\r\n                     cur_hash_rev += \"c,\"\r\n                     addr = idc.next_head(addr,blockEnd)\r\n                     mnem = idc.print_insn_mnem(addr)\r\n                     if addr != BADADDR:\r\n                        cur_hash_rev += shex(addr)+\";\"+shex(addr)+\":\"\r\n                        addrIds.append((shex(addr),str(cur_id)))\r\n                        cur_id += 1\r\n                else:\r\n                    addr = idc.next_head(addr,blockEnd)\r\n                    mnem = idc.print_insn_mnem(addr)\r\n            refs = []\r\n            for suc in cur_basic.succs():\r\n                refs.append(suc.start_ea)\r\n            refs.sort()\r\n            refsrev = \"\"\r\n            for ref in refs:\r\n                refsrev += shex(ref)+\",\"\r\n            if refsrev != \"\":\r\n                refsrev = refsrev[:-1]\r\n            cur_hash_rev +=  refsrev+\";\"\r\n\r\n        # change addr to index\r\n        for aid in addrIds:\r\n            #cur_hash_rev = string.replace(cur_hash_rev,aid[0],aid[1])\r\n            cur_hash_rev = cur_hash_rev.replace(aid[0],aid[1])\r\n        # calculate machoc hash value\r\n        self.debug('{}: CFG = {}'.format(fname, cur_hash_rev))\r\n        return mmh3.hash(cur_hash_rev) & 0xFFFFFFFF, cur_id-1\r\n\r\n    def calc_fn_ssdeep(self, fva, fname):\r\n        d2h = b''\r\n        for bb in yara_fn_7x.get_basic_blocks(fva):\r\n            rule = yara_fn_7x.get_basic_block_rule(bb)\r\n            if rule:\r\n                chk = rule.cut_bytes_for_hash\r\n                if len(chk) < yara_fn_7x.MIN_BB_BYTE_COUNT:\r\n                    continue\r\n                d2h += chk.encode()\r\n                #self.debug('chunk at {:#x}: {}'.format(bb.va, get_hex_pat(chk)))\r\n\r\n        #self.debug('total func seq at {:#x}: {}'.format(fva, get_hex_pat(d2h)))\r\n        if len(d2h) < self.min_bytes:\r\n            self.debug('{}: ignored because of the number of extracted code bytes {}'.format(fname, len(d2h)))\r\n            return None, None\r\n\r\n        result_buffer = ctypes.create_string_buffer(FUZZY_MAX_RESULT)\r\n        file_buffer = ctypes.create_string_buffer(d2h)\r\n        hash_result = fuzzy_lib.fuzzy_hash_buf(file_buffer, len(file_buffer) - 1, result_buffer)\r\n        hash_value = result_buffer.value.decode(\"ascii\")\r\n        return hash_value, len(d2h)\r\n\r\n    def existed(self):\r\n        self.mcur.execute(\"SELECT sha256 FROM sample WHERE sha256 = ?\", (self.sha256,))\r\n        if self.mcur.fetchone() is None:\r\n            return False\r\n        else:\r\n            return True\r\n\r\n    def exclude_libthunk(self, fva, fname):\r\n        if self.f_ex_libthunk:\r\n            flags = get_func_attr(fva, FUNCATTR_FLAGS)\r\n            if flags & FUNC_LIB:\r\n                self.debug('{}: ignored because of library function'.format(fname))\r\n                return True\r\n            if flags & FUNC_THUNK:\r\n                self.debug('{}: ignored because of thunk function'.format(fname))\r\n                return True\r\n        return False\r\n\r\n    def export(self):\r\n        if self.existed() and not self.f_update:\r\n            info('{}: The sample records are present in DB. skipped.'.format(self.sha256))\r\n            return False\r\n\r\n        self.cur.execute(\"REPLACE INTO sample values(?, ?)\", (self.sha256, self.idb_path))\r\n\r\n        pnum = tnum = 0\r\n        records = []\r\n        for fva in idautils.Functions():\r\n            fname = get_func_name(fva)\r\n            tnum += 1\r\n            if self.exclude_libthunk(fva, fname):\r\n                continue\r\n            fhd, bsize = self.calc_fn_ssdeep(fva, fname)\r\n            fhm, cfgnum = self.calc_fn_machoc(fva, fname)\r\n            if fhd and fhm:\r\n                pnum += 1\r\n                f_ana = bool(self.ana_pat.search(fname)) if self.f_ana_exp else False\r\n                #tinfo = idaapi.tinfo_t()\r\n                #idc.get_tinfo(fva, tinfo)\r\n                #tif = ida_typeinf.tinfo_t()\r\n                #tinfo = idc.get_tinfo(fva)\r\n                #ptype = idaapi.print_tinfo('', 0, 0, idaapi.PRTYPE_1LINE, tinfo, fname, '')\r\n                ptype = ida_typeinf.idc_get_type(fva)\r\n                ptype = ptype + ';' if ptype is not None else ptype\r\n                # fva is 64-bit int causing OverflowError\r\n                records.append((self.sha256, '{:#x}'.format(fva), fname, fhd, fhm, f_ana, bsize, ptype)) \r\n                self.debug('EXPORT {} at {:#x}: ssdeep={} (size={}), machoc={} (num of CFG={})'.format(fname, fva, fhd, bsize, fhm, cfgnum))\r\n\r\n        self.cur.executemany(\"REPLACE INTO function values (?, ?, ?, ?, ?, ?, ?, ?)\", records)\r\n        success ('{} of {} functions exported'.format(pnum, tnum))\r\n        return True\r\n\r\n    def compare(self):\r\n        res = defaultdictRecurse()\r\n        if self.f_fol_cmp:\r\n            self.mcur.execute(\"SELECT sha256,path FROM sample WHERE path LIKE ?\", (self.ana_fol+'%',))\r\n        else:\r\n            self.mcur.execute(\"SELECT sha256,path FROM sample\")\r\n        frows = self.mcur.fetchall()\r\n        num_of_samples = len(frows)\r\n        for sha256, path in frows:\r\n            res[sha256]['path'] = path\r\n            res[sha256]['mcnt'].default_factory = lambda: 0\r\n        \r\n        #sql = \"SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE f_ana == 1 AND bsize BETWEEN ? AND ?\" if self.f_ana_cmp else \"SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE bsize BETWEEN ? AND ?\"\r\n        sql = \"SELECT function.sha256,fname,fhd,fhm,f_ana,ptype FROM function INNER JOIN sample on function.sha256 == sample.sha256 WHERE path LIKE ? AND \" if self.f_fol_cmp else \"SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE \"\r\n        sql += \"f_ana == 1 AND bsize BETWEEN ? AND ?\" if self.f_ana_cmp else \"bsize BETWEEN ? AND ?\"\r\n        fns = list(idautils.Functions())\r\n        for fva in tqdm(fns, desc='comparing functions'):\r\n            fname = get_func_name(fva)\r\n            if self.exclude_libthunk(fva, fname) or not num_of_samples:\r\n                continue\r\n            pfhd, pbsize = self.calc_fn_ssdeep(fva, fname)\r\n            pfhm, pcfgnum = self.calc_fn_machoc(fva, fname)\r\n            if pfhd and pfhm:\r\n                pbuf = ctypes.create_string_buffer(pfhd.encode())                \r\n                self.debug('COMPARE {}: ssdeep={} (size={}), machoc={} (num of bb={})'.format(fname, pfhd, pbsize, pfhm, pcfgnum))                \r\n                min_ = pbsize * (1 - (self.ratio / 100))\r\n                max_ = pbsize * (1 + (self.ratio / 100))\r\n                self.debug('min={}, max={}'.format(min_, max_))\r\n                if self.f_fol_cmp:\r\n                    self.mcur.execute(sql, (self.ana_fol+'%', min_, max_))\r\n                else:\r\n                    self.mcur.execute(sql, (min_, max_))\r\n                frows = self.mcur.fetchall()\r\n                self.debug('targeted {} records'.format(len(frows)))                \r\n                for sha256, sfname, sfhd, sfhm, sf_ana, sptype in frows:\r\n                    if sha256 == self.sha256: # skip the self\r\n                        continue\r\n                    res[sha256]['mfn'][fva].default_factory = lambda: 0\r\n                    sbuf = ctypes.create_string_buffer(sfhd.encode())\r\n                    score = fuzzy_lib.fuzzy_compare(pbuf, sbuf)\r\n\r\n                    dbg_cond = g_dbg_flag and fva == g_dbg_fva and sfname == g_dbg_fname and sha256 == g_dbg_sha256\r\n                    if dbg_cond:\r\n                        print(('{:#x}: compared with {} in {} score = {} machoc match = {}'.format(fva, sfname, sha256, score, bool(pfhm == sfhm))))\r\n                        \r\n                    if (score >= self.threshold) or (score >= self.threshold_cfg and pfhm == sfhm) or (pbsize > self.max_bytes_for_score and pfhm == sfhm):\r\n                        if dbg_cond:\r\n                            print(('{:#x}: counting {} in {} for total number'.format(fva, sfname, sha256)))\r\n                        res[sha256]['mcnt']['total'] += 1\r\n                        if sf_ana:\r\n                            res[sha256]['mcnt']['analyzed'] += 1\r\n                            if score > res[sha256]['mfn'][fva]['score'] or (res[sha256]['mfn'][fva]['score'] == 0 and pbsize > self.max_bytes_for_score):\r\n                                res[sha256]['mfn'][fva]['score'] = score\r\n                                res[sha256]['mfn'][fva]['cfg_match'] = bool(pfhm == sfhm)\r\n                                res[sha256]['mfn'][fva]['sfname'] = sfname\r\n                                res[sha256]['mfn'][fva]['sptype'] = sptype\r\n                                res[sha256]['mfn'][fva]['pbsize'] = pbsize\r\n                                if dbg_cond:\r\n                                    print(('{:#x}: appended record = {} in {}'.format(fva, sfname, sha256)))\r\n\r\n        \r\n        c = SummaryCh(\"fn_fuzzy summary\", res)\r\n        c.Show()\r\n        success('totally {} samples compared'.format(num_of_samples))\r\n\r\n    def close(self):\r\n        self.conn.commit()\r\n        self.cur.close()\r\n\r\ndef info(msg):\r\n    print(\"[*] {}\".format(msg))\r\n\r\ndef success(msg):\r\n    print(\"[+] {}\".format(msg))\r\n\r\ndef error(msg):\r\n    print(\"[!] {}\".format(msg))\r\n\r\ndef get_hex_pat(buf):\r\n    # get hex pattern\r\n    return ' '.join(['{:02x}'.format(ord(x)) for x in buf])\r\n\r\ndef shex(a):\r\n    return hex(a).rstrip(\"L\")\r\n\r\ndef set_decomplier_cmt(ea, cmt):\r\n    cfunc = idaapi.decompile(ea)\r\n    tl = idaapi.treeloc_t()\r\n    tl.ea = ea\r\n    tl.itp = idaapi.ITP_SEMI\r\n    if cfunc:\r\n      cfunc.set_user_cmt(tl, cmt)\r\n      cfunc.save_user_cmts()\r\n    else:\r\n      error(\"Decompile failed: {:#x}\".formart(ea))\r\n\r\ndef main():\r\n    info('start')\r\n        \r\n    if idaapi.get_plugin_options(\"fn_fuzzy\"): # CLI (export only)\r\n        # not change the database to maintain the window setting\r\n        process_config_line(\"ABANDON_DATABASE=YES\")\r\n        \r\n        start = time.time()\r\n        options = idaapi.get_plugin_options(\"fn_fuzzy\").split(':')\r\n        #print options\r\n        min_bytes = int(options[0])\r\n        f_ex_libthunk = eval(options[1])\r\n        f_update = eval(options[2])\r\n        f_ana_exp = eval(options[3])\r\n        ana_pre = options[4]\r\n        db_path = ':'.join(options[5:])\r\n        ff = FnFuzzy(False, db_path, min_bytes, f_ex_libthunk, f_update, f_ana_exp, ana_pre)        \r\n        res = ff.export()\r\n        ff.close()\r\n        elapsed = time.time() - start\r\n        info('done (CLI)')\r\n        if res: # return code 1 is reserved for error\r\n            qexit(0) \r\n        else:\r\n            qexit(2) # already exported (skipped)\r\n    else: \r\n        f = FnFuzzyForm()\r\n        f.Compile()\r\n        f.iDBSave.value = g_db_path\r\n        f.iMinBytes.value = g_min_bytes\r\n        f.iPrefix.value = g_analyzed_prefix\r\n        f.iFolder.value = os.path.dirname(get_idb_path())\r\n        f.iSimilarity.value = g_threshold\r\n        f.iSimilarityCFG.value = g_threshold_cfg\r\n        f.iMaxBytesForScore.value = g_max_bytes_for_score\r\n        f.iRatio.value = g_bsize_ratio\r\n        r = f.Execute()\r\n        if r == 1: # Run\r\n            start = time.time()\r\n            ff = FnFuzzy(f.cDebug.checked, f.iDBSave.value, f.iMinBytes.value, f.cLibthunk.checked, f.cUpdate.checked, f.cAnaExp.checked, f.iPrefix.value, f.cAnaCmp.checked, f.cFolCmp.checked, f.iFolder.value, f.iSimilarity.value, f.iSimilarityCFG.value, f.iMaxBytesForScore.value, f.iRatio.value)\r\n            if f.rExport.selected:\r\n                if ff.sha256 is None:\r\n                    print('aborted')\r\n                    return        \r\n                ff.export()\r\n                #cProfile.runctx('ff.export()', None, locals())\r\n            else: \r\n                ff.compare()\r\n                #cProfile.runctx('ff.compare()', None, locals())\r\n            ff.close()\r\n            elapsed = time.time() - start\r\n        else:  \r\n            print('canceled')\r\n            return\r\n    \r\n    info('elapsed time = {} sec'.format(elapsed))            \r\n    info('done')\r\n\r\nif __name__ == '__main__':\r\n    main()\r\n\r\n\r\n\r\n"
  },
  {
    "path": "fn_fuzzy/yara_fn.py",
    "content": "'''\nIDAPython script that generates a YARA rule to match against the\nbasic blocks of the current function. It masks out relocation bytes\nand ignores jump instructions (given that we're already trying to\nmatch compiler-specific bytes, this is of arguable benefit).\n\nIf python-yara is installed, the IDAPython script also validates that\nthe generated rule matches at least one segment in the current file.\n\nauthor: Willi Ballenthin <william.ballenthin@fireeye.com>\n'''\n# 2018/8/6 Takahiro Haruyama modified to calculate fixup (relocation) size correctly\n# and exclude direct memory reference data and other ignorable variable code\n\nimport logging\nfrom collections import namedtuple\n\nfrom idc import *\nimport idaapi\nimport idautils\nimport ida_ua, ida_kernwin\n\nlogger = logging.getLogger(__name__)\n\nBasicBlock = namedtuple('BasicBlock', ['va', 'size'])\n\n\n# each rule must have at least this many non-masked bytes\nMIN_BB_BYTE_COUNT = 4\n\ndef get_basic_blocks(fva):\n    '''\n    return sequence of `BasicBlock` instances for given function.\n    '''\n    ret = []\n    func = idaapi.get_func(fva)\n    if func is None:\n        return ret\n\n    for bb in idaapi.FlowChart(func):\n        ret.append(BasicBlock(va=bb.start_ea,\n                              size=bb.end_ea - bb.start_ea))\n\n    return ret\n\n\ndef get_function(va):\n    '''\n    return va for first instruction in function that contains given va.\n    '''\n    return idaapi.get_func(va).start_ea\n\n\nRule = namedtuple('Rule', ['name', 'bytes', 'masked_bytes', 'cut_bytes_for_hash'])\n\n\ndef is_jump(va):\n    '''\n    return True if the instruction at the given address appears to be a jump.\n    '''\n    return print_insn_mnem(va).startswith('j')\n\ndef get_fixup_va_and_size(va):\n    fva = idaapi.get_next_fixup_ea(va)\n    ftype = get_fixup_target_type(fva)\n    fsize = ida_fixup.calc_fixup_size(ftype)\n    return fva, fsize\n\ndef get_basic_block_rule(bb):\n    '''\n    create and format a YARA rule for a single basic block.\n    The following bytes are ignored:\n        - relocation bytes\n        - the last jump instruction\n        - direct memory references / immediate values and other igorable data\n    '''\n    # fetch the instruction start addresses\n    insns = []\n    va = bb.va\n    while va < bb.va + bb.size:\n        insns.append(va)\n        va = next_head(va)\n\n    # drop the last instruction if its a jump\n    if insns and is_jump(insns[-1]):\n        insns = insns[:-1]\n\n    _bytes = []\n    # `masked_bytes` is the list of formatted bytes,\n    #   not yet join'd for performance.\n    masked_bytes = []\n    cut_bytes_for_hash = ''\n    for va in insns:\n        insn = ida_ua.insn_t()\n        size = ida_ua.decode_insn(insn, va)\n        mnem = insn.get_canon_mnem()\n        op1 = insn.Op1\n        op2 = insn.Op2\n\n        fixup_byte_addrs = set([])\n        if idaapi.contains_fixups(va, size): # not work for x64 binaries? (e.g., idaapi.contains_fixups(here(), 0x2d000) -> False)\n            logging.debug('ea = {:#x}, fixups'.format(va))\n            # fetch the fixup locations and sizes within this one instruction.\n            fixups = []\n            fva, fsize = get_fixup_va_and_size(va)\n            fixups.append((fva, fsize))\n            fva += fsize\n            while fva < va + size:\n                fva, fsize = get_fixup_va_and_size(fva - 1) # to detect consecutive fixups\n                fixups.append((fva, fsize))\n                fva += fsize\n            logging.debug('fixups: {}'.format(fixups))\n            # compute the addresses of each component byte.\n            for fva, fsize in fixups:\n                for i in range(fva, fva+fsize):\n                    fixup_byte_addrs.add(i)\n\n        # fetch and format each byte of the instruction,\n        #  possibly masking it into an unknown byte if its a fixup or several operand types like direct mem ref.\n        masked_types = [o_mem, o_imm, o_displ, o_near, o_far]\n        #masked_types = [o_mem, o_imm, o_near, o_far]\n        bytes_ = get_bytes(va, size)\n        if bytes_ is None:\n            return None\n        for i, byte in enumerate(bytes_):\n            _bytes.append(byte)\n            byte_addr = i + va\n            if byte_addr in fixup_byte_addrs:\n                logging.debug('{:#x}: fixup byte (masked)'.format(byte_addr))\n                masked_bytes.append('??')\n            elif op1.type in masked_types and i >= op1.offb and (i < op2.offb or op2.offb == 0):\n                logging.debug('{:#x}: Op1 masked byte'.format(byte_addr))\n                masked_bytes.append('??')\n            elif op2.type in masked_types and i >= op2.offb:\n                logging.debug('{:#x}: Op2 masked byte'.format(byte_addr))\n                masked_bytes.append('??')\n            else:\n                masked_bytes.append('%02X' % (byte)) # for Python3\n                cut_bytes_for_hash += chr(byte)\n\n    return Rule('$0x%x' % (bb.va), _bytes, masked_bytes, cut_bytes_for_hash)\n\n\ndef format_rules(fva, rules):\n    '''\n    given the address of a function, and the byte signatures for basic blocks in\n     the function, format a complete YARA rule that matches all of the\n     basic block signatures.\n    '''\n    name = GetFunctionName(fva)\n    if not rules:\n        logging.info('no rules for {}'.format(name))\n        return None\n\n    # some characters aren't valid for YARA rule names\n    safe_name = name\n    BAD_CHARS = '@ /\\\\!@#$%^&*()[]{};:\\'\",./<>?'\n    for c in BAD_CHARS:\n        safe_name = safe_name.replace(c, '')\n\n    md5 = idautils.GetInputFileMD5()\n    ret = []\n    ret.append('rule a_{hash:s}_{name:s} {{'.format(\n        hash=md5.hex(),\n        name=safe_name))\n    ret.append('  meta:')\n    ret.append('    sample_md5 = \"{md5:s}\"'.format(md5=md5.hex()))\n    ret.append('    function_address = \"0x{fva:x}\"'.format(fva=fva))\n    ret.append('    function_name = \"{name:s}\"'.format(name=name))\n    ret.append('  strings:')\n    for rule in rules:\n        formatted_rule = ' '.join(rule.masked_bytes).rstrip('?? ')\n        ret.append('    {name:s} = {{ {hex:s} }}'.format(\n            name=rule.name,\n            hex=formatted_rule))\n    ret.append('  condition:')\n    ret.append('    all of them')\n    ret.append('}')\n    return '\\n'.join(ret)\n\n\ndef create_yara_rule_for_function(fva):\n    '''\n    given the address of a function, generate and format a complete YARA rule\n     that matches the basic blocks.\n    '''\n    rules = []\n    for bb in get_basic_blocks(fva):\n        rule = get_basic_block_rule(bb)\n\n        if rule:\n            # ensure there at least MIN_BB_BYTE_COUNT\n            #  non-masked bytes in the rule, or ignore it.\n            # this will reduce the incidence of many very small matches.\n            unmasked_count = len([b for b in rule.masked_bytes if b != '??'])\n            if unmasked_count < MIN_BB_BYTE_COUNT:\n                continue\n\n            rules.append(rule)\n\n    return format_rules(fva, rules)\n\n\ndef get_segment_buffer(segstart):\n    '''\n    fetch the bytes of the section that starts at the given address.\n    if the entire section cannot be accessed, try smaller regions until it works.\n    '''\n    segend = idaapi.getseg(segstart).end_ea\n    buf = None\n    segsize = segend - segstart\n    while buf is None and segsize > 0:\n        buf = GetManyBytes(segstart, segsize)\n        if buf is None:\n            segsize -= 0x1000\n    return buf\n\n\nSegment = namedtuple('Segment', ['start', 'size', 'name', 'buf'])\n\n\ndef get_segments():\n    '''\n    fetch the segments in the current executable.\n    '''\n    for segstart in idautils.Segments():\n         segend = idaapi.getseg(segstart).end_ea\n         segsize = segend - segstart\n         segname = str(SegName(segstart)).rstrip('\\x00')\n         segbuf = get_segment_buffer(segstart)\n         yield Segment(segstart, segend, segname, segbuf)\n\n\nclass TestDidntRunError(Exception):\n    pass\n\n\ndef test_yara_rule(rule):\n    '''\n    try to match the given rule against each segment in the current exectuable.\n    raise TestDidntRunError if its not possible to import the YARA library.\n    return True if there's at least one match, False otherwise.\n    '''\n    try:\n        import yara\n    except ImportError:\n        logger.warning(\"can't test rule: failed to import python-yara\")\n        raise TestDidntRunError('python-yara not available')\n\n    r = yara.compile(source=rule)\n\n    for segment in get_segments():\n        if segment.buf is not None:\n            matches = r.match(data=segment.buf)\n            if len(matches) > 0:\n                logger.info('generated rule matches section: {:s}'.format(segment.name))\n                return True\n    return False\n\n\ndef main():\n    print('Start')\n    ans = ida_kernwin.ask_yn(0, 'define only selected function?')\n    if ans:\n        va = ScreenEA()\n        fva = get_function(va)\n        print(('-' * 80))\n        rule = create_yara_rule_for_function(fva)\n        if rule:\n            print(rule)\n            '''\n            if test_yara_rule(rule):\n                logging.info('success: validated the generated rule')\n            else:\n                logging.error('error: failed to validate generated rule')\n            '''\n    else:\n        for fva in idautils.Functions():\n            print(('-' * 80))\n            rule = create_yara_rule_for_function(fva)\n            if rule:\n                print(rule)\n    print('Done')\n\nif __name__ == '__main__':\n    logging.basicConfig(level=logging.INFO)\n    logging.getLogger().setLevel(logging.INFO)\n    #logging.basicConfig(level=logging.DEBUG)\n    #logging.getLogger().setLevel(logging.DEBUG)\n    main()\n"
  },
  {
    "path": "fn_fuzzy/yara_fn_7x.py",
    "content": "'''\r\nIDAPython script that generates a YARA rule to match against the\r\nbasic blocks of the current function. It masks out relocation bytes\r\nand ignores jump instructions (given that we're already trying to\r\nmatch compiler-specific bytes, this is of arguable benefit).\r\n\r\nIf python-yara is installed, the IDAPython script also validates that\r\nthe generated rule matches at least one segment in the current file.\r\n\r\nauthor: Willi Ballenthin <william.ballenthin@fireeye.com>\r\n'''\r\n# 2018/8/6 Takahiro Haruyama modified to calculate fixup (relocation) size correctly\r\n# and exclude direct memory reference data and other ignorable variable code\r\n\r\nimport logging\r\nfrom collections import namedtuple\r\n\r\nfrom idc import *\r\nimport idaapi\r\nimport idautils\r\nimport ida_ua, ida_kernwin\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\nBasicBlock = namedtuple('BasicBlock', ['va', 'size'])\r\n\r\n\r\n# each rule must have at least this many non-masked bytes\r\nMIN_BB_BYTE_COUNT = 4\r\n\r\ndef get_basic_blocks(fva):\r\n    '''\r\n    return sequence of `BasicBlock` instances for given function.\r\n    '''\r\n    ret = []\r\n    func = idaapi.get_func(fva)\r\n    if func is None:\r\n        return ret\r\n\r\n    for bb in idaapi.FlowChart(func):\r\n        ret.append(BasicBlock(va=bb.start_ea,\r\n                              size=bb.end_ea - bb.start_ea))\r\n\r\n    return ret\r\n\r\n\r\ndef get_function(va):\r\n    '''\r\n    return va for first instruction in function that contains given va.\r\n    '''\r\n    return idaapi.get_func(va).start_ea\r\n\r\n\r\nRule = namedtuple('Rule', ['name', 'bytes', 'masked_bytes', 'cut_bytes_for_hash'])\r\n\r\n\r\ndef is_jump(va):\r\n    '''\r\n    return True if the instruction at the given address appears to be a jump.\r\n    '''\r\n    return print_insn_mnem(va).startswith('j')\r\n\r\ndef get_fixup_va_and_size(va):\r\n    fva = idaapi.get_next_fixup_ea(va)\r\n    ftype = get_fixup_target_type(fva)\r\n    fsize = ida_fixup.calc_fixup_size(ftype)\r\n    return fva, fsize\r\n\r\ndef get_basic_block_rule(bb):\r\n    '''\r\n    create and format a YARA rule for a single basic block.\r\n    The following bytes are ignored:\r\n        - relocation bytes\r\n        - the last jump instruction\r\n        - direct memory references / immediate values and other igorable data\r\n    '''\r\n    # fetch the instruction start addresses\r\n    insns = []\r\n    va = bb.va\r\n    while va < bb.va + bb.size:\r\n        insns.append(va)\r\n        va = next_head(va)\r\n\r\n    # drop the last instruction if its a jump\r\n    if insns and is_jump(insns[-1]):\r\n        insns = insns[:-1]\r\n\r\n    _bytes = []\r\n    # `masked_bytes` is the list of formatted bytes,\r\n    #   not yet join'd for performance.\r\n    masked_bytes = []\r\n    cut_bytes_for_hash = ''\r\n    for va in insns:\r\n        insn = ida_ua.insn_t()\r\n        size = ida_ua.decode_insn(insn, va)\r\n        mnem = insn.get_canon_mnem()\r\n        op1 = insn.Op1\r\n        op2 = insn.Op2\r\n\r\n        fixup_byte_addrs = set([])\r\n        if idaapi.contains_fixups(va, size): # not work for x64 binaries? (e.g., idaapi.contains_fixups(here(), 0x2d000) -> False)\r\n            logging.debug('ea = {:#x}, fixups'.format(va))\r\n            # fetch the fixup locations and sizes within this one instruction.\r\n            fixups = []\r\n            fva, fsize = get_fixup_va_and_size(va)\r\n            fixups.append((fva, fsize))\r\n            fva += fsize\r\n            while fva < va + size:\r\n                fva, fsize = get_fixup_va_and_size(fva - 1) # to detect consecutive fixups\r\n                fixups.append((fva, fsize))\r\n                fva += fsize\r\n            logging.debug('fixups: {}'.format(fixups))\r\n            # compute the addresses of each component byte.\r\n            for fva, fsize in fixups:\r\n                for i in range(fva, fva+fsize):\r\n                    fixup_byte_addrs.add(i)\r\n\r\n        # fetch and format each byte of the instruction,\r\n        #  possibly masking it into an unknown byte if its a fixup or several operand types like direct mem ref.\r\n        masked_types = [o_mem, o_imm, o_displ, o_near, o_far]\r\n        #masked_types = [o_mem, o_imm, o_near, o_far]\r\n        bytes_ = get_bytes(va, size)\r\n        if bytes_ is None:\r\n            return None\r\n        for i, byte in enumerate(bytes_):\r\n            _bytes.append(byte)\r\n            byte_addr = i + va\r\n            if byte_addr in fixup_byte_addrs:\r\n                logging.debug('{:#x}: fixup byte (masked)'.format(byte_addr))\r\n                masked_bytes.append('??')\r\n            elif op1.type in masked_types and i >= op1.offb and (i < op2.offb or op2.offb == 0):\r\n                logging.debug('{:#x}: Op1 masked byte'.format(byte_addr))\r\n                masked_bytes.append('??')\r\n            elif op2.type in masked_types and i >= op2.offb:\r\n                logging.debug('{:#x}: Op2 masked byte'.format(byte_addr))\r\n                masked_bytes.append('??')\r\n            else:\r\n                masked_bytes.append('%02X' % (byte)) # for Python3\r\n                cut_bytes_for_hash += chr(byte)\r\n\r\n    return Rule('$0x%x' % (bb.va), _bytes, masked_bytes, cut_bytes_for_hash)\r\n\r\n\r\ndef format_rules(fva, rules):\r\n    '''\r\n    given the address of a function, and the byte signatures for basic blocks in\r\n     the function, format a complete YARA rule that matches all of the\r\n     basic block signatures.\r\n    '''\r\n    name = idc.get_func_name(fva)\r\n    if not rules:\r\n        logging.info('no rules for {}'.format(name))\r\n        return None\r\n\r\n    # some characters aren't valid for YARA rule names\r\n    safe_name = name\r\n    BAD_CHARS = '@ /\\\\!@#$%^&*()[]{};:\\'\",./<>?'\r\n    for c in BAD_CHARS:\r\n        safe_name = safe_name.replace(c, '')\r\n\r\n    md5 = idautils.GetInputFileMD5()\r\n    ret = []\r\n    ret.append('rule a_{hash:s}_{name:s} {{'.format(\r\n        hash=md5.hex(),\r\n        name=safe_name))\r\n    ret.append('  meta:')\r\n    ret.append('    sample_md5 = \"{md5:s}\"'.format(md5=md5.hex()))\r\n    ret.append('    function_address = \"0x{fva:x}\"'.format(fva=fva))\r\n    ret.append('    function_name = \"{name:s}\"'.format(name=name))\r\n    ret.append('  strings:')\r\n    for rule in rules:\r\n        formatted_rule = ' '.join(rule.masked_bytes).rstrip('?? ')\r\n        ret.append('    {name:s} = {{ {hex:s} }}'.format(\r\n            name=rule.name,\r\n            hex=formatted_rule))\r\n    ret.append('  condition:')\r\n    ret.append('    all of them')\r\n    ret.append('}')\r\n    return '\\n'.join(ret)\r\n\r\n\r\ndef create_yara_rule_for_function(fva):\r\n    '''\r\n    given the address of a function, generate and format a complete YARA rule\r\n     that matches the basic blocks.\r\n    '''\r\n    rules = []\r\n    for bb in get_basic_blocks(fva):\r\n        rule = get_basic_block_rule(bb)\r\n\r\n        if rule:\r\n            # ensure there at least MIN_BB_BYTE_COUNT\r\n            #  non-masked bytes in the rule, or ignore it.\r\n            # this will reduce the incidence of many very small matches.\r\n            unmasked_count = len([b for b in rule.masked_bytes if b != '??'])\r\n            if unmasked_count < MIN_BB_BYTE_COUNT:\r\n                continue\r\n\r\n            rules.append(rule)\r\n\r\n    return format_rules(fva, rules)\r\n\r\n\r\ndef get_segment_buffer(segstart):\r\n    '''\r\n    fetch the bytes of the section that starts at the given address.\r\n    if the entire section cannot be accessed, try smaller regions until it works.\r\n    '''\r\n    segend = idaapi.getseg(segstart).end_ea\r\n    buf = None\r\n    segsize = segend - segstart\r\n    while buf is None and segsize > 0:\r\n        buf = idc.get_bytes(segstart, segsize)\r\n        if buf is None:\r\n            segsize -= 0x1000\r\n    return buf\r\n\r\n\r\nSegment = namedtuple('Segment', ['start', 'size', 'name', 'buf'])\r\n\r\n\r\ndef get_segments():\r\n    '''\r\n    fetch the segments in the current executable.\r\n    '''\r\n    for segstart in idautils.Segments():\r\n         segend = idaapi.getseg(segstart).end_ea\r\n         segsize = segend - segstart\r\n         segname = str(idc.get_segm_name(segstart)).rstrip('\\x00')\r\n         segbuf = get_segment_buffer(segstart)\r\n         yield Segment(segstart, segend, segname, segbuf)\r\n\r\n\r\nclass TestDidntRunError(Exception):\r\n    pass\r\n\r\n\r\ndef test_yara_rule(rule):\r\n    '''\r\n    try to match the given rule against each segment in the current exectuable.\r\n    raise TestDidntRunError if its not possible to import the YARA library.\r\n    return True if there's at least one match, False otherwise.\r\n    '''\r\n    try:\r\n        import yara\r\n    except ImportError:\r\n        logger.warning(\"can't test rule: failed to import python-yara\")\r\n        raise TestDidntRunError('python-yara not available')\r\n\r\n    r = yara.compile(source=rule)\r\n\r\n    for segment in get_segments():\r\n        if segment.buf is not None:\r\n            matches = r.match(data=segment.buf)\r\n            if len(matches) > 0:\r\n                logger.info('generated rule matches section: {:s}'.format(segment.name))\r\n                return True\r\n    return False\r\n\r\n\r\ndef main():\r\n    print('Start')\r\n    ans = ida_kernwin.ask_yn(0, 'define only selected function?')\r\n    if ans:\r\n        va = ida_kernwin.get_screen_ea()\r\n        fva = get_function(va)\r\n        print(('-' * 80))\r\n        rule = create_yara_rule_for_function(fva)\r\n        if rule:\r\n            print(rule)\r\n            '''\r\n            if test_yara_rule(rule):\r\n                logging.info('success: validated the generated rule')\r\n            else:\r\n                logging.error('error: failed to validate generated rule')\r\n            '''\r\n    else:\r\n        for fva in idautils.Functions():\r\n            print(('-' * 80))\r\n            rule = create_yara_rule_for_function(fva)\r\n            if rule:\r\n                print(rule)\r\n    print('Done')\r\n\r\nif __name__ == '__main__':\r\n    logging.basicConfig(level=logging.INFO)\r\n    logging.getLogger().setLevel(logging.INFO)\r\n    #logging.basicConfig(level=logging.DEBUG)\r\n    #logging.getLogger().setLevel(logging.DEBUG)\r\n    main()\r\n"
  },
  {
    "path": "stackstring_static/README.org",
    "content": "* stackstring_static.py - IDAPython script statically-recovering strings constructed in stack\n\nThe motivation is the same as [[https://www.fireeye.com/blog/threat-research/2014/08/flare-ida-pro-script-series-automatic-recovery-of-constructed-strings-in-malware.html][FireEye FLARE script]], but I implemented it statically without Vivisect a few years ago.\n\nNote: the script internally renames the stack variables so manually-renamed info in the function will be lost\n\nASCII case:\n\n[[./img/sss_asc_after.jpg]]\n\nUnicode case:\n\n[[./img/sss_uni_after.jpg]]\n\nxor-decoding case:\n\n[[./img/sss_xor_after.jpg]]\n\n\n\n\n"
  },
  {
    "path": "stackstring_static/stackstring_static.py",
    "content": "# stackstring_static.py - IDAPython script statically-recovering strings constructed in stack\n# Takahiro Haruyama (@cci_forensics)\n# Note: the script internally renames the stack variables so manually-renamed info will be lost\n\nimport struct\n\nfrom ida_ua import *\nfrom ida_allins import *\nfrom idautils import *\n#from ida_funcs import *\nfrom idc import *\nimport ida_kernwin\n\ndef extract_unicode(data):\n    pat = re.compile(r'^(?:[\\x20-\\x7E][\\x00]){2,}')\n    return list(set([w.decode('utf-16le') for w in pat.findall(data)]))\n\ndef extract_ascii(data):\n    pat = re.compile(r'^(?:[\\x20-\\x7E]){2,}')\n    return list(set([w.decode('ascii') for w in pat.findall(data)]))\n\nclass StackString(object):\n\n    def __init__ (self, start, end, debug, do_xor, static_xor_key):\n        self.start = start\n        self.end = end\n        self.debug = debug\n        self.do_xor = do_xor\n        self.regs_w_value = {}\n        self.stack_chars = {}\n        self.xor_vars = {}\n        self.stack_imm = None\n        self.static_xor_key = static_xor_key\n\n    def rename_vars(self):\n        stack = GetFrame(self.start)\n        stack_size = GetStrucSize(stack)\n        args_and_ret_size = stack_size - GetFrameLvarSize(self.start)\n\n        for offset, name, size in StructMembers(stack):\n            postfix = stack_size - offset - args_and_ret_size\n            if postfix >= 0:\n                self.stack_chars[postfix] = 0 # initialize vars\n                if name.find('var_') == -1:\n                    #postfix = stack_size - offset - args_and_ret_size\n                    SetMemberName(stack, offset, 'var_{:X}'.format(postfix))\n\n    def store_bytes_to_reg(self, r, b):\n        if r == procregs.sp.reg or r == procregs.bp.reg:\n            return\n        elif procregs.xmm0.reg <= r and r <= procregs.xmm15.reg:\n            self.dprint('reg enum {} = {}'.format(r, repr(b)))\n            self.regs_w_value[r] = b\n        #if (0x1f < b and b < 0x7f) or b == 0:\n        elif 0 <= b and b < 0x100:\n            self.dprint('reg enum {} = {:#x}'.format(r, b))\n            self.regs_w_value[r] = b\n            if procregs.ax.reg <= r and r <= procregs.bx.reg:\n                # ax = eax = rax = 0 but al = 16 / ah = 20\n                self.regs_w_value[r+16] = b\n                self.regs_w_value[r+20] = b\n\n    def store_reg_to_reg(self, dst, src):\n        if dst == procregs.sp.reg or dst == procregs.bp.reg:\n            return\n        if src in self.regs_w_value:\n            self.dprint('reg enum {} = reg enum {} ({:#x})'.format(dst, src, self.regs_w_value[src]))\n            self.regs_w_value[dst] = self.regs_w_value[src]\n\n    def parse_and_get_var_hex(self, vstr):\n        # e.g., mov     [ebp+68h+var_18+0Ch], 61h\n        var_off = vstr.split('_')[1][:-1].rstrip('h').split('+') # '18+0C'\n        if len(var_off) == 2:\n            res = int(var_off[0], 16) - int(var_off[1], 16)\n        else:\n            res = int(var_off[0], 16)\n\n        # handle base+index registers (e.g., mov     [rsp+rax+258h+var_C0], 6Fh)\n        try:\n            the_reg = eval('procregs.{}.reg'.format(vstr.split('+')[1]))\n            if the_reg in self.regs_w_value:\n                res = res - self.regs_w_value[the_reg]\n        except SyntaxError:\n            pass\n        return res\n        #return eval('0x{}'.format(var_num)) # '18-4' = 20\n\n    def store_byte_to_var(self, v, b):\n        #if (0x1f < b and b < 0x7f) or b == 0:\n        if 0 <= b and b < 0x100:\n            #'''\n            try:\n                if self.stack_chars[v] != 0: # should not be overwritten\n                    return\n            except KeyError: # when not initialized (to handle the bytes one by one)\n                #print 'keyerror var_{:X} = {}'.format(v, b)\n                pass\n            #'''\n            self.dprint('var_{:X} = {:#x}'.format(v, b))\n            self.stack_chars[v] = b\n\n    def store_bytes_to_vars(self, v, bs):\n        if isinstance(bs, str): # binary sequence for xmm registers\n            blist = [ord(x) for x in bs]\n        else: # int or long\n            blist = self.int_to_bytes_list(bs)\n\n        for i, b in enumerate(blist):\n            #self.store_byte_to_var(v - i, blist[i])\n            self.store_byte_to_var(v - i, b)\n\n    def store_key_to_name(self, v, b):\n        #if (0x1f < b and b < 0x7f) or b == 0:\n        if 0 <= b and b < 0x100:\n            self.dprint('{} ^ {:#x}'.format(v, b))\n            self.xor_vars[v] = b\n\n    def int_to_bytes_list(self, v):\n        if v == 0:\n            return [0]\n        res = []\n        while(1):\n            b = v & 0xff\n            v = v >> 8\n            #if 0x1f < b and b < 0x7f or b == 0:\n            if 0 <= b and b < 0x100:\n                res.append(b)\n                #if v == 0 and (len(res) == 1 or len(res) == 2 or len(res) == 4 or len(res) == 8):\n                if v == 0 and (len(res) == 2 or len(res) == 4 or len(res) == 8):\n                    # e.g., mov     [rsp+3A8h+var_290], 6E0069h\n                    return res\n            else:\n                break\n        return []\n\n    def store_byte_to_stack(self, b):\n        if 0 <= b and b < 0x100:\n            self.stack_imm = b\n\n    def dprint(self, s):\n        if self.debug:\n            print s\n\n    def traverse(self):\n        print '----------------------------------------------'\n        print '{:#x}:'.format(self.start)\n\n        # replace analyzed names with 'var_*' in stack for calculation\n        try:\n            self.rename_vars()\n        #except TypeError: # caused by StructMembers()\n        except:\n            return\n\n        for head in Heads(self.start, self.end):\n            self.dprint('{:#x}'.format(head))\n            insn = insn_t()\n            inslen = decode_insn(insn, head)\n\n            if insn.itype == NN_mov or insn.itype == NN_movsxd:\n                if insn.Op1.type == o_reg and insn.Op2.type == o_imm: # e.g., mov     cl/cx/ecx, 6Ch\n                    self.store_bytes_to_reg(insn.Op1.reg, insn.Op2.value)\n\n                elif insn.Op1.type == o_reg and insn.Op2.type == o_reg: # e.g., mov     cl/cx/ecx, al/ax/eax\n                    self.store_reg_to_reg(insn.Op1.reg, insn.Op2.reg)\n\n                elif insn.Op1.type == o_reg and insn.Op2.dtype == dt_byte and insn.Op2.type == o_mem: # e.g., mov     al, ds:byte_100040F8\n                    self.store_bytes_to_reg(insn.Op1.reg, Byte(insn.Op2.addr))\n\n                elif insn.Op1.type == o_displ and GetOpnd(head, 0).find('var_') != -1 and insn.Op2.type == o_reg and (insn.Op2.dtype == dt_byte or insn.Op2.dtype == dt_word): # e.g., mov     [esp+180h+var_127], cl\n                #elif insn.Op1.type == o_displ and GetOpnd(head, 0).find('var_') != -1 and insn.Op2.type == o_reg: # e.g., mov [rsp+258h+var_1F0], eax (index register)\n                    try:\n                        var_hex = self.parse_and_get_var_hex(GetOpnd(head, 0))\n                    except (AttributeError, IndexError, ValueError): # e.g., var_10.S_un\n                        continue\n                    if insn.Op2.reg in self.regs_w_value:\n                        self.store_bytes_to_vars(var_hex, self.regs_w_value[insn.Op2.reg])\n\n                elif insn.Op1.type == o_displ and insn.Op2.type == o_imm: # e.g., mov     [esp+188h+var_130], 6Ah/2E32h/3362646Fh\n                    #print 'o_displ = o_imm'\n                    try:\n                        var_hex = self.parse_and_get_var_hex(GetOpnd(head, 0))\n                    except (AttributeError, IndexError, ValueError): # e.g., var_10.S_un\n                        continue\n                    self.store_bytes_to_vars(var_hex, insn.Op2.value)\n                elif insn.Op1.type == o_reg and insn.Op2.type == o_displ: # e.g., mov     eax, [rsp+258h+var_1F0]\n                    try:\n                        var_hex = self.parse_and_get_var_hex(GetOpnd(head, 1))\n                    except (AttributeError, IndexError, ValueError): # e.g., var_10.S_un\n                        continue\n                    if var_hex in self.stack_chars:\n                        self.store_bytes_to_reg(insn.Op1.reg, self.stack_chars[var_hex])\n\n            elif insn.itype == NN_xor:\n                if insn.Op1.type == o_reg and insn.Op2.type == o_reg and insn.Op1.reg == insn.Op2.reg:\n                    # e.g., xor ebx, ebx\n                    self.store_bytes_to_reg(insn.Op1.reg, 0)\n                elif insn.Op1.type == o_displ:\n                    # e.g., xor     [esp+eax+384h+var_2A4], bl\n                    try:\n                        var_hex = self.parse_and_get_var_hex(GetOpnd(head, 0))\n                    except (AttributeError, IndexError, ValueError): # e.g., var_10.S_un\n                        continue\n                    str_var_hex = 'var_{:X}'.format(var_hex)\n                    if insn.Op2.type == o_reg and insn.Op2.reg in self.regs_w_value:\n                        self.store_key_to_name(str_var_hex, self.regs_w_value[insn.Op2.reg])\n                    elif insn.Op2.type == o_imm:\n                        self.store_key_to_name(str_var_hex, insn.Op2.value)\n\n            elif insn.itype == NN_and:\n                if insn.Op1.type == o_displ and GetOpnd(head, 0).find('var_') != -1 and insn.Op2.value == 0:\n                    # e.g., and     [ebp+var_24], 0\n                    try:\n                        var_hex = self.parse_and_get_var_hex(GetOpnd(head, 0))\n                    except (AttributeError, IndexError, ValueError): # e.g., var_10.S_un\n                        continue\n                    self.store_byte_to_var(var_hex, 0)\n\n            # e.g., push    7; pop     edx\n            elif insn.itype == NN_push and insn.Op1.type == o_imm:\n                self.store_byte_to_stack(insn.Op1.value)\n            elif insn.itype == NN_pop and insn.Op1.type == o_reg and self.stack_imm:\n                    self.store_bytes_to_reg(insn.Op1.reg, self.stack_imm)\n                    self.stack_imm = None\n\n            # for SSE registers\n            elif (insn.itype == NN_movdqa or insn.itype == NN_movaps) and insn.Op1.type == o_reg:\n                # e.g., movdqa  xmm1, ds:xmmword_155680\n                self.store_bytes_to_reg(insn.Op1.reg, GetManyBytes(insn.Op2.addr, 0x10))\n            elif (insn.itype == NN_movdqu or insn.itype == NN_movups) and insn.Op1.type == o_displ:\n                # e.g., movdqu  [ebp+var_27C], xmm1\n                try:\n                    var_hex = self.parse_and_get_var_hex(GetOpnd(head, 0))\n                except (AttributeError, IndexError, ValueError): # e.g., var_10.S_un\n                    continue\n                if insn.Op2.reg in self.regs_w_value:\n                    self.store_bytes_to_vars(var_hex, self.regs_w_value[insn.Op2.reg])\n\n            # for o_displ operand with base+index registers (increment index)\n            elif insn.itype == NN_inc and insn.Op1.type == o_reg and insn.Op1.reg in self.regs_w_value:\n                self.dprint('{}: incremented {}->{}'.format(GetOpnd(head, 0), self.regs_w_value[insn.Op1.reg], self.regs_w_value[insn.Op1.reg]+1))\n                self.regs_w_value[insn.Op1.reg] += 1\n\n        strings = {}\n        result = []\n        prev = 0\n        len_ = 0\n        uresult = []\n        uprev = 0\n        ulen = 0\n        for k in sorted(self.stack_chars.keys(), reverse=True):\n            self.dprint('{:x}: prev={:x}, uprev={:x}'.format(k, prev, uprev))\n\n            # detect discontinuous chars\n            if prev != 0 and prev != k + 1:\n                self.dprint('discontinuous chars detected')\n                stack_var = 'var_{:X}'.format(prev - 1  + len_)\n                strings[stack_var] = ''.join(result)\n                if strings[stack_var][0] != '\\x00':\n                    print '{} = {}'.format(stack_var, repr(strings[stack_var]))\n                result = []\n                prev = 0\n                len_ = 0\n                uresult = []\n                uprev = 0\n                ulen = 0\n            elif uprev != 0 and uprev != k + 1:\n            #elif uprev != 0 and uprev != k + 1 and uresult[1] == 0: # tiny check for unicode\n                self.dprint('discontinuous chars detected (unicode)')\n                stack_var = 'var_{:X}'.format(uprev - 1  + ulen)\n                try:\n                    #strings[stack_var] = ''.join(uresult).decode('utf-16')\n                    self.dprint('data: {}'.format(repr(''.join(uresult))))\n                    if extract_unicode(''.join(uresult)):\n                        strings[stack_var] = extract_unicode(''.join(uresult))[0]\n                        if strings[stack_var][0] != '\\x00':\n                            print '{} = {}'.format(stack_var, repr(strings[stack_var]))\n                #except UnicodeDecodeError:\n                except (TypeError, IndexError):\n                    self.dprint('exception: {}'.format(stack_var))\n                    #strings[stack_var] = ''.join(uresult)\n                    pass\n                uresult = []\n                uprev = 0\n                ulen = 0\n                result = []\n                prev = 0\n                len_ = 0\n\n            self.dprint('{:x}: {} (len={}, ulen={})'.format(k, repr(chr(self.stack_chars[k])), len_, ulen))\n            result.append(chr(self.stack_chars[k]))\n            uresult.append(chr(self.stack_chars[k]))\n\n            # detect null-terminated chars\n            #'''\n            if self.stack_chars[k] == 0:\n            #if self.stack_chars[k] == 0 and (prev != 0 and self.stack_chars[prev] == 0):\n                #stack_var = 'var_{:X}'.format(k + len_)\n                #if uprev != 0 and self.stack_chars[uprev] == 0:\n                if uprev != 0 and self.stack_chars[uprev] == 0 and uresult[1] == 0: # tiny check for unicode\n                    self.dprint('null-terminated chars detected (unicode)')\n                    stack_var = 'var_{:X}'.format(k + ulen)\n                    try:\n                        #print ''.join(uresult)\n                        #strings[stack_var] = ''.join(uresult)[:-1].decode('utf-16')\n                        if extract_unicode(''.join(uresult)):\n                            strings[stack_var] = extract_unicode(''.join(uresult))[0]\n                            if strings[stack_var][0] != '\\x00':\n                                print '{} = {}'.format(stack_var, repr(strings[stack_var]))\n                    #except UnicodeDecodeError:\n                    except (TypeError, IndexError):\n                        #strings[stack_var] = ''.join(uresult)\n                        pass\n                    uresult = []\n                    uprev = 0\n                    ulen = 0\n                    prev = k\n                    len_ += 1\n                else:\n                    self.dprint('null-terminated chars detected')\n                    stack_var = 'var_{:X}'.format(k + len_)\n                    strings[stack_var] = ''.join(result)\n                    if strings[stack_var][0] != '\\x00':\n                        print '{} = {}'.format(stack_var, repr(strings[stack_var]))\n                    result = []\n                    prev = 0\n                    len_ = 0\n                    uprev = k\n                    ulen += 1\n            else:\n            #'''\n                prev = k\n                len_ += 1\n                uprev = k\n                ulen += 1\n\n        if len(result) > 0:\n            print('the string is not null-terminated: {}'.format(repr(''.join(result))))\n\n        stack = GetFrame(self.start)\n        results = []\n        for offset, name, size in StructMembers(stack):\n            if name in strings:\n                if self.do_xor:\n                    if name in self.xor_vars:\n                        k = self.xor_vars[name]\n                    else:\n                        k = self.static_xor_key\n                    res = ''.join([chr(ord(x) ^ k) for x in strings[name][:-1]])\n                    #print k\n                    print '{} (xor-decoded): {} ({})'.format(name, repr(res), repr(strings[name]))\n                    res = res + ' (decoded)'\n                else:\n                    res = strings[name]\n                if res[0] != '\\x00':\n                    SetMemberComment(stack, offset, repr(res.rstrip('\\x00')), 1)\n                    results.append(repr(res.rstrip('\\x00')))\n\n        # set comment at the function start ea\n        if results:\n            cmt = ', '.join(results)\n            if len(cmt) < 128:\n                set_func_cmt(self.start, cmt, True)\n            else:\n                set_func_cmt(self.start, 'a lot of stack strings recovered (need to be checked)', True)\n\n        # restore analyzed names in stack\n        AnalyzeArea(self.start, self.end)\n\nclass SSSForm(ida_kernwin.Form):\n    def __init__(self):\n        ida_kernwin.Form.__init__(self,\nr\"\"\"BUTTON YES* Run\nBUTTON CANCEL Cancel\nstackstring_static\n\n{FormChangeCb}\n<current function only:{cCurrentOnly}>\n<enable debug messages:{cDebug}>\n<enable xor decoding:{cDecode}>{cGroup}>\n<default xor value in hex (single byte):{iXorValue}>\n\"\"\",\n        {\n            'FormChangeCb': ida_kernwin.Form.FormChangeCb(self.OnFormChange),\n            'cGroup': ida_kernwin.Form.ChkGroupControl((\"cCurrentOnly\", \"cDebug\", \"cDecode\")),\n            'iXorValue': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_HEX),\n        })\n\n    def OnFormChange(self, fid):\n        if fid == -1:\n            self.SetControlValue(self.cCurrentOnly, True)\n            self.EnableField(self.iXorValue, False)                \n        if fid == self.cDecode.id:\n            #print('cDecode changed: {}'.format(self.cDecode.checked))\n            #if self.cDecode.checked:\n            self.EnableField(self.iXorValue, True)\n            #else:\n                #self.EnableField(self.iXorValue, False)                \n        return 1\n\ndef main():\n    print 'start'\n\n    f = SSSForm()\n    f.Compile()\n    f.iXorValue.value = 0x55\n    r = f.Execute()\n    if r == 1: # Run\n        if f.cCurrentOnly.checked:\n            start = GetFunctionAttr(here(), FUNCATTR_START)\n            end = GetFunctionAttr(here(), FUNCATTR_END)\n            ss = StackString(start, end, f.cDebug.checked, f.cDecode.checked, f.iXorValue.value)\n            ss.traverse()\n        else:\n            for start in Functions():\n                end = GetFunctionAttr(start, FUNCATTR_END)\n                ss = StackString(start, end, f.cDebug.checked, f.cDecode.checked, f.iXorValue.value)\n                ss.traverse()\n    else:  # Cancel\n        print 'cancel'\n\n    Refresh()\n    print '----------------------------------------------'\n    print 'done'\n\nif __name__ == '__main__':\n    main()\n\n\n\n\n"
  }
]