Showing preview only (221K chars total). Download the full file or copy to clipboard to get everything.
Repository: TakahiroHaruyama/ida_haru
Branch: master
Commit: 29bd253294c3
Files: 27
Total size: 210.7 KB
Directory structure:
gitextract_b2x18g5t/
├── .gitignore
├── ADVobfuscator/
│ ├── README.org
│ └── idapy3_ADVobfuscator_deob.py
├── LICENSE
├── README.org
├── bindiff/
│ ├── README.org
│ ├── bindiff.py
│ ├── bindiff_export.idc
│ ├── save_func_names.py
│ └── save_func_names_7x.py
├── callstrings/
│ ├── README.org
│ ├── hexrays_utils.py
│ ├── ida_callstrings_dbg.py
│ ├── ida_callstrings_flare_emu.py
│ └── ida_callstrings_static.py
├── eset_crackme/
│ ├── README.org
│ ├── loaders/
│ │ └── ida_loader_drv_vm.py
│ └── procs/
│ └── ida_processor_drv_vm.py
├── fn_fuzzy/
│ ├── README.org
│ ├── cli_export.py
│ ├── dump_types.py
│ ├── fn_fuzzy.py
│ ├── fn_fuzzy_7x.py
│ ├── yara_fn.py
│ └── yara_fn_7x.py
└── stackstring_static/
├── README.org
└── stackstring_static.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
================================================
FILE: ADVobfuscator/README.org
================================================
* IDAPython script deobfuscating ADVobfuscator strings, applied to a TrickBoot sample
The script requires [[https://github.com/fireeye/flare-emu][flare-emu]].
The tested sample is [[https://www.virustotal.com/gui/file/c1f1bc58456cff7413d7234e348d47a8acfdc9d019ae7a4aba1afc1b3ed55ffa/details][491115422a6b94dc952982e6914adc39]] (TrickBot's UEFI firmware reconnaissance module called "TrickBoot").
Note: We may not be able to reuse it for a different sample that was compiled with a different compiler or with different flags but I think the same approach (decoder function pattern matching + emulation) can be applied.
A result example:
#+BEGIN_SRC
[*] 0x1000a124: xor2-encoded function detected (size = 0x2f)
[*] 0x1000b92c: emulating from 0x1000b71b to 0x1000b92c
[+] 0x1000b92c: uefi_expl_port_writeDeviceIoControl() ERROR %d
#+END_SRC
[[./img/adv_result.png]]
** Reference
- https://github.com/andrivet/ADVobfuscator
- https://eclypsium.com/2020/12/03/trickbot-now-offers-trickboot-persist-brick-profit/
- [[http://antonioparata.blogspot.com/2020/06/deobfuscating-c-advobfuscator-with.html]]
================================================
FILE: ADVobfuscator/idapy3_ADVobfuscator_deob.py
================================================
# idapy3_ADVobfuscator_deob.py - IDAPython script deobfuscating ADVobfuscator strings, applied to a TrickBoot sample
# Takahiro Haruyama (@cci_forensics)
from idc import *
from idautils import *
import idaapi
try:
import flare_emu
except ImportError as e:
print(("Could not import flare_emu: {}\nExiting.".format(e.message)))
raise
import re, unicorn
'''
dec
'''
g_pat_sub = re.compile(rb'^\x33\xD2\x8A\x04\x0A\x0F\xBE\xC0\x83\xE8(.)\x88\x04\x0A\x42\x83\xFA(.)\x72\xEE\x8B\xC1\xC3$', re.DOTALL)
g_pat_xor1 = re.compile(rb'^\x53\x55\x56\x57\x8b\xf9\x6a(.)\x5d\x8d\x47\x04\x8a\x10\x0f\xbe\x37\x0f\xbe\xca\x33\xce\x88\x08\x40\x83\xed\x01\x75\xee\xc6\x47.\x00\x8d\x47\x04\x5f\x5e\x5d\x5b\xc3$', re.DOTALL)
g_pat_xor2 = re.compile(rb'^\x53\x56\x57\x8b\xf1\x33\xdb\x8a\x54\x1e\x04\x8b\x06\x02\xc3\x0f\xbe\xca\x33\xc1\x88\x44\x1e\x04\x43\x83\xfb(.)\x72\xe9\x5f\xc6\x46.\x00\x8d\x46\x04\x5e\x5b\xc3$', re.DOTALL)
g_pat_dec = re.compile(rb'^\x33\xd2\x8a\x04\x0a\x0f\xbe\xc0\x48\x88\x04\x0a\x42\x83\xfa(.)\x72\xf0\x8b\xc1\xc3$', re.DOTALL)
g_pats = {
'sub': g_pat_sub,
'xor1': g_pat_xor1,
'xor2': g_pat_xor2,
'dec': g_pat_dec,
}
def info(msg):
print(("[*] {}".format(msg)))
def success(msg):
print(("[+] {}".format(msg)))
def error(msg):
print(("[!] {}".format(msg)))
def set_decomplier_cmt(ea, cmt):
try:
cfunc = idaapi.decompile(ea)
tl = idaapi.treeloc_t()
tl.ea = ea
tl.itp = idaapi.ITP_SEMI
if cfunc:
cfunc.set_user_cmt(tl, cmt)
cfunc.save_user_cmts()
else:
error("Decompile failed: {:#x}".format(ea))
except:
error("Decompile failed: {:#x}".format(ea))
def add_bookmark(ea, comment):
last_free_idx = -1
for i in range(0, 1024):
slot_ea = get_bookmark(i)
if slot_ea == BADADDR or slot_ea == ea:
# empty slot found or overwrite existing one
last_free_idx = i
break
# Check Empty Slot
if last_free_idx < 0:
return False
# Register Slot
put_bookmark(ea, 0, 0, 0, last_free_idx, comment)
return True
def get_emu_range(ea):
func = idaapi.get_func(ea)
if func is None:
return None, None
for bb in idaapi.FlowChart(func):
if bb.start_ea <= ea <= bb.end_ea:
#return bb.start_ea, next_head(ea) #
return bb.start_ea, ea
return None, None
# enable a step into emulation for the decoder (disabled)
def call_hook(address, argv, funcName, userData):
if funcName == userData["dec_fn_name"]:
#print('dec_fn detected')
userData['skipCalls'] = False
else:
userData['skipCalls'] = True
# validate the emulation result, based on the encoded buf ptr (disabled)
def inst_hook(uc, address, size, userData):
#info('instr_hook {:#x}'.format(address))
if address == userData['ref']:
eh = userData["EmuHelper"]
try:
pc = uc.reg_read(eh.regs["pc"])
enc_ea = uc.reg_read(eh.regs["ecx"])
info('pc = {:#x}, address = {:#x}), enc_ea = {:#x}'.format(pc, address, enc_ea))
userData["enc_ea"] = enc_ea
except unicorn.UcError as e:
error("emulation error: {}".format(str(e)))
elif address == userData['end'] and userData.get('enc_ea'):
eh = userData["EmuHelper"]
try:
pc = uc.reg_read(eh.regs["pc"])
if userData["dec_fn_name"].find('sub') != -1:
dec = uc.mem_read(userData["enc_ea"], userData['size'])
else: # xor
dec = uc.mem_read(userData["enc_ea"] + 4, userData['size'])
success('{:#x}: {}'.format(userData['ref'], dec))
except unicorn.UcError as e:
error("emulation error: {}".format(str(e)))
def emulate(pname, eh, dec_fn, size, key):
cnt = 0
refs = CodeRefsTo(dec_fn, False)
for ref in refs:
if GetMnem(ref) == 'call':
start, end = get_emu_range(ref)
if start and end:
info('{:#x}: emulating from {:#x} to {:#x}'.format(ref, start, end))
userData = {
'dec_fn_name': get_name(dec_fn),
'start': start,
'end': end,
'ref': ref,
'size': size,
}
try:
#eh.emulateRange(start, endAddr=end, callHook=call_hook, instructionHook=inst_hook, hookData=userData)
#eh.emulateRange(start, endAddr=end, callHook=call_hook, hookData=userData)
eh.emulateRange(start, endAddr=end)
pc = eh.uc.reg_read(eh.regs["pc"])
ea = eh.uc.reg_read(eh.regs["ecx"])
if pname == 'sub':
enc = eh.uc.mem_read(ea, size)
#info('key = {:#x}, enc = {}'.format(key, enc))
dec = bytes([(x - key) & 0xff for x in enc]).decode()
elif pname == 'dec':
enc = eh.uc.mem_read(ea, size)
dec = bytes([(x - 1) & 0xff for x in enc]).decode()
else:
key = eh.uc.mem_read(ea, 4)[0]
enc = eh.uc.mem_read(ea + 4, size)
#info('key = {:#x}, enc = {}'.format(key, enc))
if pname == 'xor1':
dec = bytes([x ^ key for x in enc]).decode()
else: # xor2
dec = bytes([x ^ (key + i) for i, x in enumerate(enc)]).decode()
# to obtain the step into emulation (disabled)
#dec_ea = eh.uc.reg_read(eh.regs["eax"])
#info('{:#x}: dec_ea = {:#x}'.format(pc, dec_ea))
#dec = eh.uc.mem_read(dec_ea, size)
success('{:#x}: {}'.format(ref, dec))
MakeComm(ref, dec)
set_decomplier_cmt(ref, dec)
add_bookmark(ref, 'decoded: {}'.format(dec))
cnt += 1
except unicorn.UcError as e:
pc = eh.uc.reg_read(eh.regs["pc"])
error("{:#x}: {} when reading {:#x}".format(pc, str(e), ea))
finally:
eh.resetEmulatorHeapAndStack()
return cnt
def main():
info('start')
eh = flare_emu.EmuHelper()
# search the decoding functions
cnts = {}
for fva in Functions():
#if fva != 0x1000A19F:
# continue
if idc.get_func_flags(fva) & (idc.FUNC_LIB | idc.FUNC_THUNK):
continue
size = 0
fn_bytes = idc.get_bytes(fva, get_func_attr(fva, FUNCATTR_END) - fva)
for pname, pat in g_pats.items():
m = pat.search(fn_bytes)
if m:
try:
if pname == 'sub':
key = int.from_bytes(m.group(1), 'little')
size = int.from_bytes(m.group(2), 'little')
else:
key = None
size = int.from_bytes(m.group(1), 'little')
except ValueError:
pass
else:
print('\n')
info('{:#x}: {}-encoded function detected (size = {:#x})'.format(fva, pname, size))
idaapi.do_name_anyway(fva, 'fn_ADVobfuscator_decode_{}_len{}'.format(pname, size))
cnt = emulate(pname, eh, fva, size, key)
if cnts.get(pname):
cnts[pname] += cnt
else:
cnts[pname] = cnt
break
info('number of decoded strings: {}'.format(cnts))
info('done')
if __name__ == '__main__':
main()
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.org
================================================
#+OPTIONS: ^:{}
* ida_haru
Scripts/plugins for IDA Pro
Note: Old scripts don't work for IDA 8.x, but I leave them just for reference.
** eset_crackme
IDA Pro loader/processor modules for ESET CrackMe driver VM
** stackstring_static
IDAPython script statically-recovering strings constructed in stack
** fn_fuzzy
IDAPython script for fast multiple binary diffing triage
** bindiff
python script for multiple binary diffing by BinDiff
** ADVobfuscator
IDAPython script deobfuscating ADVobfuscator strings, applied to a TrickBoot sample
** HexRaysDeob
modified version for defeating APT10 ANEL's code obfuscations (located in a [[https://github.com/carbonblack/HexRaysDeob][corporate github repository]])
** callstrings
scripts for defeating "polymorphic stack strings" obfuscation used by Hodur sample
================================================
FILE: bindiff/README.org
================================================
#+OPTIONS: ^:{}
#+TITLE: BinDiff wrapper script for multiple binary diffing
* Purpose
multiple binary diffing up to 100 samples ([[https://github.com/TakahiroHaruyama/ida_haru/tree/master/fn_fuzzy][fn_fuzzy]] is better for more samples)
* Requirements
- IDA 7.6 and BinDiff 6
- python packages: pefile macholib pyelftools python-idb prettytable
* How to Use
Before using it, you have to edit the paths for executables/scripts in bindiff.py.
#+BEGIN_SRC
# paths (should be edited)
g_out_dir = r'Z:\haru\analysis\tics\bindiff_db'
g_ida_dir = r'C:\work\tool\IDAx64'
g_exp_path = r'Z:\cloud\gd\python\IDAPython\ida_haru\bindiff\bindiff_export.idc'
g_differ_path = r"C:\Program Files\BinDiff\bin\bindiff.exe"
#g_differ_path = r'C:\Program Files (x86)\zynamics\BinDiff 4.2\bin\differ64.exe'
g_save_fname_path = r'Z:\cloud\gd\python\IDAPython\ida_haru\bindiff\save_func_names.py'
#+END_SRC
You can check the command line options by -h or --help.
#+BEGIN_EXAMPLE
Z:\cloud\gd\work\python\IDAPython\bindiff>python bindiff.py -h
usage: bindiff.py [-h] [--out_dir OUT_DIR] [--ws_th WS_TH] [--fs_th FS_TH] [--ins_th INS_TH] [--bb_th BB_TH] [--size_th SIZE_TH] [--func_regex FUNC_REGEX] [--debug]
[--clear] [--noidb] [--use_pyidb]
primary {1,m} ...
positional arguments:
primary primary binary to compare
{1,m} mode: 1, m
1 BinDiff 1 to 1
m BinDiff 1 to many
optional arguments:
-h, --help show this help message and exit
--out_dir OUT_DIR, -o OUT_DIR
output directory including .BinExport/.BinDiff (default: Z:\haru\analysis\tics\bindiff_db)
--ws_th WS_TH, -w WS_TH
whole binary similarity threshold (default: 0.2)
--fs_th FS_TH, -f FS_TH
function similarity threshold (default: 0.8)
--ins_th INS_TH, -i INS_TH
instruction threshold (default: 30)
--bb_th BB_TH, -b BB_TH
basic block threshold (default: 1)
--size_th SIZE_TH, -s SIZE_TH
file size threshold (MB) (default: 10)
--func_regex FUNC_REGEX, -e FUNC_REGEX
function name regex to reduce noise (default: sub_|fn_|chg_)
--debug, -d print debug output (default: False)
--clear, -c clear .BinExport, .BinDiff and function name cache (default: False)
--noidb, -n skip a secondary binary without idb (default: False)
--use_pyidb use python-idb (default: False)
#+END_EXAMPLE
There are 2 modes. One is "1 to 1" mode, the other is "1 to many" mode.
** "1 to 1" mode example
In "1 to 1" mode, we should specify executable file paths for primary and secondary targets.
#+BEGIN_EXAMPLE
Z:\cloud\gd\work\python\IDAPython\bindiff>python bindiff.py Z:\haru\analysis\tics\hoge\[redacted]_worker_fixed
1 Z:\haru\analysis\tics\hoge\samples\checked\[redacted]c2f05
---------------------------------------------
[*] BinDiff result
[*] elapsed time = 0.390000104904 sec, number of diffing = 1
[*] primary binary: (([redacted]_worker_fixed))
============== 1 high similar binaries (>0.2) ================
+----------------+--------------------------------------+
| similarity | secondary binary |
+----------------+--------------------------------------+
| 0.211967127395 | [redacted]c2f05 |
+----------------+--------------------------------------+
---------------------------------------------
#+END_EXAMPLE
"high similar binaries" means some binaries are found with whole binary similarities. You can adjust the similarity by -w option.
** "1 to many" mode example
In "1 to many" mode, we should specify an executable file path for a primary target and a folder path for secondary targets. We can specify to compare secondary binaries recursively (-r option).
#+BEGIN_EXAMPLE
Z:\cloud\gd\work\python\IDAPython\bindiff>python bindiff.py Z:\haru\analysis\tics\hoge\samples\attacker\[redacted]_worker_fixed
m Z:\haru\analysis\tics\hoge\samples\tmp
---------------------------------------------
[*] BinDiff result
[*] elapsed time = 6.71900010109 sec, number of diffing = 3
[*] primary binary: (([redacted]_worker_fixed))
============== 10 high similar functions (>0.8), except high similar binaries ================
+----------------+--------------+--------------------------------+----------------+----------------------------------+-----------------+
| similarity | primary addr | primary name | secondary addr | secondary name |secondary binary |
+----------------+--------------+--------------------------------+----------------+----------------------------------+-----------------+
| 1.0 | 0x180067720 | Virt_sub_180067720 | 0x180004c30 | sub_180004c30 | [redacted]e6504 |
| 1.0 | 0x1800674b0 | sub_1800674b0 | 0x180004930 | sub_180004930 | [redacted]e6504 |
| 1.0 | 0x1800673a0 | chg_peparse_Virt_sub_1800673A0 | 0x180004820 | sub_180004820 | [redacted]e6504 |
| 1.0 | 0x1800672b0 | Virt_sub_1800672B0 | 0x180004730 | sub_180004730 | [redacted]e6504 |
| 1.0 | 0x18005fd84 | sub_18005fd84 | 0x13f69af94 | sub_13f69af94 | [redacted]fb841 |
| 1.0 | 0x18005fd84 | sub_18005fd84 | 0x180012648 | __crtMessageBoxW | [redacted]e6504 |
| 1.0 | 0x180050f30 | sub_180050f30 | 0x1800019f0 | ?erase@?$basic_string@DU?$char_t | [redacted]e6504 |
| 0.98987073046 | 0x1800677e0 | chg_peparse_Virt_sub_1800677E0 | 0x180004cf0 | sub_180004cf0 | [redacted]e6504 |
| 0.963708558784 | 0x180067560 | sub_180067560 | 0x1800049e0 | sub_1800049e0 | [redacted]e6504 |
| 0.946399194338 | 0x180018780 | chg_rotate_sub_180018780 | 0x140004360 | sub_140004360 | [redacted]92023 |
+----------------+--------------+--------------------------------+----------------+----------------------------------+-----------------+
---------------------------------------------
#+END_EXAMPLE
"high similar functions" means some functions are found with function similarities though they have lower whole binary similarities than the threshold. You can ajust the similarity by -f option.
The function similarity result is very noisy so library/thunk functions are filtered out by the script. Additionally, we can specify the number of instructions/basic blocks, file size, and so on to reduce the noise.
And by default, the script newly creates idbs for the target binaries if not found. If you want to only compare existing idbs, please specify -n.
* Notes
- If you can't get the function similarities correctly, adjust the function similarity threshold (--fs_th), instruction threshold (--ins_th), basic block threshold (--bb_th) and function name filter rule (--func_regex) options. The script excludes the matches of small codes because function similarity results of multiple binaries are noisy.
- BinDiff 5.0 and later contains a [[https://issuetracker.google.com/issues/129600738][bug]] that we can't load existing .BinDiff files and import symbols/comments due to missing .BinExport files. I hope it will be fixed someday.
- python-idb doesn't work for IDA 7.6 IDBs. So by default it's not used (enable --use_pyidb option if needed).
================================================
FILE: bindiff/bindiff.py
================================================
# bindiff.py - BinDiff wrapper script for multiple binary diffing
# Takahiro Haruyama (@cci_forensics)
import argparse, subprocess, os, sqlite3, time, pickle, re, multiprocessing, sys, struct, logging
from prettytable import PrettyTable
import pefile
from macholib.MachO import MachO
from macholib.mach_o import *
from elftools.elf.elffile import ELFFile
import idb
logging.basicConfig(level=logging.ERROR) # to suppress python-idb warning
# paths (should be edited)
# Windows
#g_out_dir = r'C:\analysisw\tmp\bindiff'
#g_ida_dir = r'C:\analysisw\tool\IDA'
#g_differ_path = r"C:\Program Files\BinDiff\bin\bindiff.exe"
# MacOS
g_out_dir = r'/Users/haru/analysis/tmp/bindiff'
#g_ida_dir = r'/Applications/IDA/ida.app/Contents/MacOS'
g_ida32_path = r'/Applications/IDA/ida.app/Contents/MacOS/ida'
g_ida64_path = r'/Applications/IDA/ida64.app/Contents/MacOS/ida64'
g_differ_path = r"/Applications/BinDiff/BinDiff.app/Contents/MacOS/bin/bindiff"
g_exp_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'bindiff_export.idc')
g_save_fname_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'save_func_names_7x.py')
# parameters
g_ws_th = 0.15 # whole binary similarity threshold
g_fs_th = 0.70 # function similarity threshold
g_ins_th = 10 # instruction threshold
g_bb_th = 0 # basic block threshold
g_size_th = 10 # file size threshold (MB)
#g_func_regex = r'sub_|fn_|chg_' # function name filter rule
g_func_regex = r'.*' # function name filter rule
class LocalError(Exception): pass
class ProcExportError(LocalError): pass
class ProcDiffError(LocalError): pass
class LoadFuncNamesError(LocalError): pass
class FileNotFoundError(LocalError): pass
class ChildProcessError(LocalError): pass
class BinDiff(object):
def __init__ (self, primary, out_dir, ws_th, fs_th, ins_th, bb_th, size_th, func_regex, debug=False, clear=False, newidb=False, use_pyidb=False):
#def __init__ (self, primary, out_dir, ws_th, fs_th, ins_th, bb_th, size_th, debug=False, clear=False, noidb=False, use_pyidb=False):
self._debug = debug
self._clear = clear
self._newidb = newidb
self._lock = multiprocessing.Lock()
self._primary = primary
self._ws_th = ws_th
self._fs_th = fs_th
self._ins_th = ins_th
self._bb_th = bb_th
self._size_th = size_th
self._out_dir = out_dir
self.use_pyidb = use_pyidb
self._format, self._arch = self._get_machine_type(primary)
if self._format is None:
raise ProcExportError('primary binary should be PE/Mach-O/ELF'.format(primary))
self._dprint('primary binary format: {}'.format(self._format))
self._dprint('primary binary architecture: {}'.format(self._arch))
self._ida_path = self._get_ida_path(self._arch)
res = self._files_not_found()
if res is not None:
raise FileNotFoundError('file is not found: {}'.format(res))
self._dprint('IDA binary path for primary: {}'.format(self._ida_path))
if self._make_BinExport(self._primary, self._ida_path) != 0:
raise ProcExportError('primary BinExport failed: {}'.format(primary))
if self.use_pyidb:
idb_path = self._get_idb_path(primary, self._arch)
self._func_names = self._load_func_names_pyidb(idb_path)
else:
self._func_p = re.compile(func_regex)
self._func_regex = func_regex
self._func_names = self._load_func_names_default(func_regex, primary,
self._ida_path)
self._high_ws = {}
self._high_fs = {}
self._diff_cnt = 0
def _dprint(self, msg):
if self._debug:
self._lock.acquire()
print('[+] [{}]: {}'.format(os.getpid(), msg))
self._lock.release()
def _get_machine_type(self, path):
try:
pe = pefile.PE(path)
format_ = 'PE'
if pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine].find('I386') != -1:
arch = '32-bit'
else:
arch = '64-bit'
except (pefile.PEFormatError,KeyError) as detail:
try:
self._dprint(detail)
m = MachO(path)
format_ = 'Mach-O'
for header in m.headers:
if CPU_TYPE_NAMES.get(header.header.cputype,header.header.cputype) == 'x86_64':
#if header.MH_MAGIC == MH_MAGIC_64:
arch = '64-bit'
else:
arch = '32-bit'
except:
try:
elffile = ELFFile(open(path, 'rb'))
format_ = 'ELF'
e_ident = elffile.header['e_ident']
if e_ident['EI_CLASS'] == 'ELFCLASS64':
arch = '64-bit'
else:
arch = '32-bit'
except:
return None, None
#format_ = 'shellcode'
#arch = '32-bit' # 32-bit fixed
return format_, arch
def _files_not_found(self):
#for path in (self._ida_path, g_exp_path, g_save_fname_path, g_differ_path):
for path in (self._ida_path, g_exp_path, g_differ_path):
if not os.path.isfile(path):
return path
return None
def _get_db_path_noext(self, target):
return os.path.join(self._out_dir, os.path.splitext(os.path.basename(target))[0])
#return os.path.join(self._out_dir, os.path.basename(target))
def _get_idb_path(self, target, arch):
db_ext = '.idb' if arch == '32-bit' else '.i64'
target_split = os.path.splitext(target)[0]
if os.path.exists(target_split + db_ext):
return target_split + db_ext
else:
return target + db_ext # for recent IDA versions
def _get_ida_path(self, arch):
#idaq = 'idaq.exe' if arch == '32-bit' else 'idaq64.exe'
#idaq = g_ida32_name if arch == '32-bit' else g_ida64_name
#return os.path.join(g_ida_dir, idaq)
return g_ida32_path if arch == '32-bit' else g_ida64_path
def _load_func_names_pyidb(self, idb_path): # exlcude library/thunk functions
pickle_path = os.path.splitext(os.path.join(self._out_dir, os.path.basename(idb_path)))[0] + '_func_names.pickle'
if self._clear or not os.path.exists(pickle_path):
func_names = {}
with idb.from_file(idb_path) as db:
api = idb.IDAPython(db)
for ea in api.idautils.Functions(api.idc.MinEA(), api.idc.MaxEA()):
flags = api.idc.GetFunctionFlags(ea)
if flags & api.ida_funcs.FUNC_LIB or flags & api.ida_funcs.FUNC_THUNK:
continue
func_name = api.idc.GetFunctionName(ea)
func_names[ea] = func_name
with open(pickle_path, 'wb') as f:
pickle.dump(func_names, f)
with open(pickle_path, 'rb') as f:
self._dprint('function names loaded: {}'.format(idb_path))
return pickle.load(f)
# default function without python-idb
def _load_func_names_default(self, func_regex, path, ida_path):
pickle_path = os.path.splitext(os.path.join(self._out_dir, os.path.basename(path)))[0] + '_func_names.pickle'
if self._clear or not os.path.exists(pickle_path):
cmd = [ida_path, '-A', '-S{}'.format(g_save_fname_path), '-Osave_func_names:{}:{}'.format(func_regex, pickle_path), path]
#cmd = [ida_path, '-S{}'.format(g_save_fname_path), '-Osave_func_names:{}:{}'.format(func_regex, pickle_path), path]
self._dprint('saving function names for {}'.format(path))
self._dprint(' '.join(cmd))
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
if proc.returncode != 0:
raise LoadFuncNamesError('function names saving failed: {}'.format(path))
with open(pickle_path, 'rb') as f:
self._dprint('function names loaded: {}'.format(path))
return pickle.load(f)
raise LoadFuncNamesError('function names loading failed: {}'.format(path))
def _make_BinExport(self, target, ida_path):
binexp_path = self._get_db_path_noext(target) + '.BinExport'
#binexp_path = os.path.splitext(target)[0] + '.BinExport'
if not self._clear and os.path.exists(binexp_path):
self._dprint('already existed BinExport: {}'.format(binexp_path))
return 0
#cmd = [ida_path, '-A', '-S{}'.format(g_exp_path), '-OExporterModule:{}'.format(binexp_path), target] # the .BinExport filename should be specified in 4.3
#if self._debug:
#cmd = [ida_path, '-S{}'.format(g_exp_path), '-OBinExportModule:{}'.format(binexp_path), target]
#else:
cmd = [ida_path, '-A', '-S{}'.format(g_exp_path), '-OBinExportModule:{}'.format(binexp_path), target]
#print cmd
self._dprint('getting BinExport for {}'.format(target))
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
return proc.returncode
def _get_BinDiff_path(self, secondary):
primary_noext = self._get_db_path_noext(self._primary)
secondary_noext = os.path.splitext(secondary)[0]
return primary_noext + '_vs_' + os.path.basename(secondary_noext) + '.BinDiff'
def _make_BinDiff(self, secondary):
pri_binexp = self._get_db_path_noext(self._primary) + '.BinExport'
sec_binexp = self._get_db_path_noext(secondary) + '.BinExport'
#pri_binexp = os.path.splitext(self._primary)[0] + '.BinExport'
#sec_binexp = os.path.splitext(secondary)[0] + '.BinExport'
bindiff_path = self._get_BinDiff_path(secondary)
if not self._clear and os.path.exists(bindiff_path):
self._dprint('already existed BinDiff: {}'.format(bindiff_path))
return 0, None
cmd = [g_differ_path, '--primary={}'.format(pri_binexp), '--secondary={}'.format(sec_binexp), '--output_dir={}'.format(self._out_dir)]
#print cmd
self._dprint('diffing the binaries..')
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
self._dprint('differ output:')
self._dprint(stdout)
self._dprint(stderr)
return proc.returncode, cmd
def is_skipped(self, secondary):
# file check (in case of the same dir)
#if os.path.splitext(self._primary)[0] == os.path.splitext(secondary)[0]:
if self._primary == secondary:
return True
# target at executables
if os.path.splitext(secondary)[1] in ('.BinExport', '.BinDiff', '.idb', '.i64'):
return True
# size check
if (os.path.getsize(secondary) >> 20) > self._size_th:
self._dprint('The size is bigger (skipped): {}'.format(secondary))
return True
# format/arch check
format_, arch = self._get_machine_type(secondary)
if format_ is None:
return True
#elif format_ != self._format or arch != self._arch:
elif format_ != self._format: # only check the format
self._dprint('different executable format (skipped): {}'.format(secondary))
return True
# skip if idb not found
idb_path = self._get_idb_path(secondary, arch)
if not self._newidb and not os.path.exists(idb_path):
self._dprint('no existing idb (skipped): {}'.format(secondary))
return True
return False
def check_similarity(self, secondary, q=None):
format_, arch = self._get_machine_type(secondary)
ida_path = self._get_ida_path(arch)
self._dprint('IDA binary path for secondary: {}'.format(ida_path))
if self._make_BinExport(secondary, ida_path) != 0:
if q is not None:
q.put((None, None))
raise ProcExportError('secondary BinExport failed: {}'.format(secondary))
retcode, cmd = self._make_BinDiff(secondary)
if retcode != 0:
if q is not None:
q.put((None, None))
raise ProcDiffError('BinDiff failed: {}'.format(cmd))
conn = sqlite3.connect(self._get_BinDiff_path(secondary))
c = conn.cursor()
try:
c.execute("SELECT similarity,confidence FROM metadata")
except sqlite3.OperationalError as detail:
print('[!] .BinDiff database ({}) is something wrong: {}'.format(self._get_BinDiff_path(secondary), detail))
return
ws, wc = c.fetchone()
self._dprint('whole binary similarity={} confidence={}'.format(ws, wc))
c.execute("SELECT address1,address2,similarity,confidence FROM function WHERE similarity > ? and instructions > ? and basicblocks > ?", (self._fs_th, self._ins_th, self._bb_th))
frows = c.fetchall()
self._dprint('{} similar functions detected'.format(len(frows)))
conn.close()
c_high_ws = {}
c_high_fs = {}
if ws > self._ws_th:
c_high_ws[secondary] = {'similarity':ws, 'confidence':wc}
elif frows:
if self.use_pyidb:
idb_path = self._get_idb_path(secondary, arch)
func_names = self._load_func_names_pyidb(idb_path)
else:
func_names = self._load_func_names_default(self._func_regex, secondary,
ida_path)
for row in frows:
addr1, addr2, fs, fc = row
self._dprint('addr1={:#x}, addr2={:#x}, similarity={}, confidence={}'.format(addr1, addr2, fs, fc))
if addr1 in self._func_names and addr2 in func_names:
c_high_fs[(addr1, self._func_names[addr1], addr2, func_names[addr2], secondary)] = {'similarity':fs, 'confidence':fc}
if not c_high_fs and not self._debug:
os.remove(self._get_BinDiff_path(secondary))
else:
if not self._debug:
os.remove(self._get_BinDiff_path(secondary))
#self._dprint(c_high_ws)
#self._dprint(c_high_fs)
if q is None:
self._high_ws = c_high_ws
self._high_fs = c_high_fs
else:
q.put((c_high_ws, c_high_fs))
def check_similarities(self, secondary_dir, recursively):
if recursively:
seconds = [os.path.join(root, file_) for root, dirs, files in os.walk(secondary_dir) for file_ in files]
else:
seconds = [os.path.join(secondary_dir, entry) for entry in os.listdir(secondary_dir) if os.path.isfile(os.path.join(secondary_dir, entry))]
procs = []
for secondary in seconds:
if self.is_skipped(secondary):
continue
q = multiprocessing.Queue()
p = multiprocessing.Process(target=self.check_similarity, args=(secondary, q))
p.start()
procs.append((p,q))
self._diff_cnt = len(procs)
for p,q in procs:
c_high_ws, c_high_fs = q.get()
self._high_ws.update(c_high_ws)
self._high_fs.update(c_high_fs)
p.join()
def increment_count(self):
self._diff_cnt += 1
def get_result(self):
return self._high_ws, self._high_fs, self._diff_cnt
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('primary', help="primary binary to compare")
parser.add_argument('--out_dir', '-o', default=g_out_dir, help="output directory including .BinExport/.BinDiff")
parser.add_argument('--ws_th', '-w', type=float, default=g_ws_th, help="whole binary similarity threshold")
parser.add_argument('--fs_th', '-f', type=float, default=g_fs_th, help="function similarity threshold")
parser.add_argument('--ins_th', '-i', type=int, default=g_ins_th, help="instruction threshold")
parser.add_argument('--bb_th', '-b', type=int, default=g_bb_th, help="basic block threshold")
parser.add_argument('--size_th', '-s', type=int, default=g_size_th, help="file size threshold (MB)")
parser.add_argument('--func_regex', '-e', default=g_func_regex, help="function name regex to include in the result")
parser.add_argument('--debug', '-d', action='store_true', help="print debug output")
parser.add_argument('--clear', '-c', action='store_true', help="clear .BinExport, .BinDiff and function name cache")
parser.add_argument('--newidb', '-n', action='store_true', help="create an idb for the secondary binary")
parser.add_argument('--use_pyidb', action='store_true', help="use python-idb")
subparsers = parser.add_subparsers(dest='mode', help='mode: 1, m')
parser_1 = subparsers.add_parser('1', help='BinDiff 1 to 1')
parser_1.add_argument('secondary', help="secondary binary to compare")
parser_m = subparsers.add_parser('m', help='BinDiff 1 to many')
parser_m.add_argument('secondary_dir', help="secondary directory including binaries to compare")
parser_m.add_argument('--recursively', '-r', action='store_true', help="getting binaries recursively")
args = parser.parse_args()
high_ws = high_fs = None
if os.path.isfile(args.primary):
start = time.time()
try:
bd = BinDiff(args.primary, args.out_dir, args.ws_th, args.fs_th, args.ins_th, args.bb_th, args.size_th, args.func_regex, args.debug, args.clear, args.newidb, args.use_pyidb)
#bd = BinDiff(args.primary, args.out_dir, args.ws_th, args.fs_th, args.ins_th, args.bb_th, args.size_th, args.debug, args.clear, args.noidb, args.use_pyidb)
if args.mode == '1' and os.path.isfile(args.secondary):
if not bd.is_skipped(args.secondary):
bd.check_similarity(args.secondary)
bd.increment_count()
elif args.mode == 'm' and os.path.isdir(args.secondary_dir):
bd.check_similarities(args.secondary_dir, args.recursively)
high_ws, high_fs, cnt = bd.get_result()
except LocalError as e:
print('[!] {} ({})'.format(str(e), type(e)))
return
elapsed = time.time() - start
print('---------------------------------------------')
print('[*] BinDiff result')
print('[*] elapsed time = {} sec, number of diffing = {}'.format(elapsed, cnt))
print('[*] primary binary: (({}))'.format(os.path.basename(args.primary)))
if high_ws:
print('\n============== {} high similar binaries (>{}) ================'.format(len(high_ws), args.ws_th))
table = PrettyTable(['similarity', 'secondary binary'])
for path,res in sorted(list(high_ws.items()), key=lambda x:x[1]['similarity'], reverse=True):
table.add_row([res['similarity'], '(({}))'.format(os.path.basename(path))])
print(table)
if high_fs:
print('\n============== {} high similar functions (>{}), except high similar binaries ================'.format(len(high_fs), args.fs_th))
table = PrettyTable(['similarity', 'primary addr', 'primary name', 'secondary addr', 'secondary name', 'secondary binary'])
for key,res in sorted(list(high_fs.items()), key=lambda x:(x[1]['similarity'], x[0][0]), reverse=True):
addr1, func_name1, addr2, func_name2, path = key
table.add_row([res['similarity'], '{:#x}'.format(addr1), func_name1[:0x20], '{:#x}'.format(addr2), func_name2[:0x20], '{}'.format(os.path.basename(path))])
print(table)
if (not high_ws) and (not high_fs):
print('\nno similar binaries/functions found')
print('---------------------------------------------')
if ( __name__ == "__main__" ):
main()
================================================
FILE: bindiff/bindiff_export.idc
================================================
#include <idc.idc>
static main()
{
ChangeConfig("ABANDON_DATABASE=YES");
Batch(0);
Wait();
//RunPlugin("binexport11", 2 );
//Exit( 1 - RunPlugin("zynamics_binexport_9", 2 ));
//Exit( 1 - RunPlugin("zynamics_binexport_8", 2 ));
//Exit( 1 - RunPlugin("binexport10", 2 ));
//Exit( 1 - RunPlugin("binexport11", 2 ));
//RunPlugin("binexport12_ida", 2 );
Exit( 1 - RunPlugin("binexport12_ida", 2 ));
}
================================================
FILE: bindiff/save_func_names.py
================================================
import os, pickle, re
g_track_parent_th = 2 # parent function tracking level threshold
g_parent_func_exclude_list = ['__NMSG_WRITE', '__fassign_l']
g_pfe_list = [LocByName(p) for p in g_parent_func_exclude_list]
def get_pfuncs(ea, track_th):
pfuncs = [GetFunctionAttr(ref, FUNCATTR_START) for ref in CodeRefsTo(ea, False)]
track_th -= 1
if track_th > 0:
ppfuncs = [ppfunc for pfunc in pfuncs for ppfunc in get_pfuncs(pfunc, track_th)]
pfuncs.extend(ppfuncs)
return pfuncs
def main():
#Wait()
# not change the database to maintain the window setting
process_config_line("ABANDON_DATABASE=YES")
# -Odecomp:option1:option2:option3
options = idaapi.get_plugin_options("save_func_names").split(':')
func_regex = options[0]
pickle_path = ':'.join(options[1:])
p = re.compile(func_regex)
func_names = {}
with open(pickle_path, 'wb') as f:
for ea in Functions(MinEA(), MaxEA()):
func_name = GetFunctionName(ea)
if p.search(func_name):
flags = GetFunctionFlags(ea)
if flags & FUNC_LIB or flags & FUNC_THUNK:
continue
pfuncs = get_pfuncs(ea, g_track_parent_th)
if not (set(pfuncs) & set(g_pfe_list)):
func_names[ea] = func_name
pickle.dump(func_names, f)
Exit(0)
#with open(os.path.splitext(GetIdbPath())[0] + '_func_names.pickle', 'rb') as f:
# func_names = pickle.load(f)
# print func_names
if ( __name__ == "__main__" ):
main()
================================================
FILE: bindiff/save_func_names_7x.py
================================================
import os, pickle, re
from idautils import *
g_track_parent_th = 2 # parent function tracking level threshold
g_parent_func_exclude_list = ['__NMSG_WRITE', '__fassign_l']
g_pfe_list = [get_name_ea_simple(p) for p in g_parent_func_exclude_list]
def get_pfuncs(ea, track_th):
pfuncs = [get_func_attr(ref, FUNCATTR_START) for ref in CodeRefsTo(ea, False)]
track_th -= 1
if track_th > 0:
ppfuncs = [ppfunc for pfunc in pfuncs for ppfunc in get_pfuncs(pfunc, track_th)]
pfuncs.extend(ppfuncs)
return pfuncs
def main():
#Wait()
# not change the database to maintain the window setting
process_config_line("ABANDON_DATABASE=YES")
# -Odecomp:option1:option2:option3
options = idaapi.get_plugin_options("save_func_names").split(':')
func_regex = options[0]
pickle_path = ':'.join(options[1:])
p = re.compile(func_regex)
func_names = {}
with open(pickle_path, 'wb') as f:
for ea in Functions(idaapi.cvar.inf.minEA, idaapi.cvar.inf.maxEA):
func_name = idc.get_func_name(ea)
if p.search(func_name):
flags = idc.get_func_attr(ea, FUNCATTR_FLAGS)
if flags & FUNC_LIB or flags & FUNC_THUNK:
continue
pfuncs = get_pfuncs(ea, g_track_parent_th)
if not (set(pfuncs) & set(g_pfe_list)):
func_names[ea] = func_name
pickle.dump(func_names, f)
ida_pro.qexit(0)
#with open(os.path.splitext(idc.get_idb_path())[0] + '_func_names.pickle', 'rb') as f:
# func_names = pickle.load(f)
# print func_names
if ( __name__ == "__main__" ):
main()
================================================
FILE: callstrings/README.org
================================================
#+OPTIONS: ^:{}
* callstrings - deobfuscating Hodur's global string encryption
- Recover strings using various methods (static decoding, emulation, IDA debug hook)
- Apply API function types to the local variable pointers
The script comparison is below:
[[./img/comparison.png]]
- As the comparison shows, ida_callstrings_dbg.py and ida_callstrings_flare_emu.py (except emulateSelection) can work for other malware.
- As the reference slides say, it is recommended to use modified [[https://github.com/TakahiroHaruyama/flare-emu/tree/xorloop][flare-emu]] and [[https://github.com/TakahiroHaruyama/capa/tree/comment_insertion][CAPA]] to make ida_callstrings_flare_emu.py work better.
** Reference
- https://speakerdeck.com/takahiro_haruyama/the-art-of-malware-c2-scanning-how-to-reverse-and-emulate-protocol-obfuscated-by-compiler
================================================
FILE: callstrings/hexrays_utils.py
================================================
'''
hexrays_utils.py - common classes/functions using Hex-Rays decompiler APIs
Takahiro Haruyama (@cci_forensics)
'''
#from abc import ABCMeta, abstractmethod
from idc import *
import idaapi, ida_ida, ida_ua, ida_typeinf, ida_kernwin
from ida_hexrays import *
from ida_allins import NN_callni, NN_call, NN_callfi
import idautils
import re
# Global options/variables
g_DEBUG = True
g_CACHE = True
g_ASCII_TYPES = ['CHAR *', 'CONST CHAR *', 'LPSTR', 'LPCSTR']
g_UNICODE_TYPES = ['WCHAR *', 'CONST WCHAR *', 'LPWSTR', 'LPCWSTR']
g_STR_TYPES = g_ASCII_TYPES + g_UNICODE_TYPES
g_stub_GetProcAddress = 'fn_resolve_API_addr'
g_RENAME_RETRY_CNT = 100
def info(msg):
print("\033[34m\033[1m[*]\033[0m {}".format(msg))
def success(msg):
print("\033[32m\033[1m[+]\033[0m {}".format(msg))
def error(msg):
print("\033[31m\033[1m[!]\033[0m {}".format(msg))
def debug(msg):
if g_DEBUG:
print("\033[33m\033[1m[D]\033[0m {}".format(msg))
def extract_ascii(data):
pat = re.compile(rb'^(?:[\x20-\x7E]){2,}')
return list(set([w.decode('ascii') for w in pat.findall(data)]))
def extract_unicode(data):
pat = re.compile(r'^(?:[\x20-\x7E][\x00]){2,}')
return list(set([w.decode('utf-16le') for w in pat.findall(data)]))
def get_ctree_root(ea, cache=True):
cfunc = None
try:
if cache:
cfunc = decompile(ea)
else:
cfunc = decompile(ea, flags=DECOMP_NO_CACHE)
except:
error('Decompilation of a function {:#x} failed'.format(ea))
return cfunc
# Detect constant value used in string decoding
class cnt_val_finder_t(ctree_visitor_t):
def __init__(self):
ctree_visitor_t.__init__(self, CV_FAST)
self.cst_val = None
def visit_expr(self, expr):
if expr.op == cot_asgxor and expr.y.op == cot_xor and expr.y.y.op == cot_num:
cst = expr.y.y.n._value
if expr.y.x.op == cot_add:
expr_add = expr.y.x
elif expr.y.x.op == cot_cast and expr.y.x.x.op == cot_add:
expr_add = expr.y.x.x
else:
expr_add = None
if expr_add and expr_add.y.op == cot_num and \
(expr_add.y.n._value == cst) and (0 < cst < 0xff):
success(f'{expr.ea:#x}: string decoding constant value {cst:#x} detected')
self.cst_val = cst
return 1
# x ^ (y - 0x1d) ^ 0xe3 == x ^ (y + 0xe3) ^ 0xe3
if expr.y.x.op == cot_sub:
expr_sub = expr.y.x
elif expr.y.x.op == cot_cast and expr.y.x.x.op == cot_sub:
expr_sub = expr.y.x.x
else:
expr_sub = None
if expr_sub and expr_sub.y.op == cot_num and \
(expr_sub.y.n._value + cst == 0x100) and (0 < cst < 0xff):
success(f'{expr.ea:#x}: string decoding constant value {cst:#x} detected')
self.cst_val = cst
return 1
return 0
def get_cnt_val(self):
return self.cst_val
# Detect assignments when inserting comments
class asg_parent_finder_t(ctree_visitor_t):
def __init__(self, call_ea):
ctree_visitor_t.__init__(self, CV_PARENTS)
self.call_ea = call_ea
self.asg_ea = BADADDR
def visit_expr(self, expr):
if expr.op == cot_asg and \
((expr.y.op == cot_call and expr.y.ea == self.call_ea) or \
(expr.y.op == cot_cast and expr.y.x.op == cot_call and expr.y.x.ea == self.call_ea)):
self.asg_ea = expr.ea
info(f'{self.call_ea:#x}: assignment detected, replaced with the ea {self.asg_ea:#x}')
return 1
return 0
# Change type/name of the specified lvar name
class my_lvar_modifier_t(user_lvar_modifier_t):
def __init__(self, target_name, new_name=None, new_decl=None, new_tif=None):
user_lvar_modifier_t.__init__(self)
self.target_name = target_name
self.new_name = new_name
self.new_decl = new_decl
self.new_tif = new_tif
def modify_lvars(self, lvars):
# Note: Variables without user-specified info are not present in lvvec
if len(lvars.lvvec) == 0:
error('modify_lvars: len(lvars.lvvec) == 0')
for idx, one in enumerate(lvars.lvvec):
debug('modify_lvars: target_name = "{}" current = "{}"'.format(self.target_name, one.name))
# Set the type to the target var
if one.name == self.target_name:
if self.new_name:
one.name = self.new_name
info('modify_lvars: Name "{}" set to {}'.format(one.name, self.target_name))
tif = None
if self.new_decl:
tif = ida_typeinf.tinfo_t()
res = ida_typeinf.parse_decl(tif, None, self.new_decl, 0)
#if not res:
# error('{}: parse_decl from {} FAILED'.format(one.name, self.new_decl))
elif self.new_tif:
tif = self.new_tif
if tif:
one.type = tif
info('modify_lvars: Type "{}" set to {}'.format(str(tif), one.name))
return True
return False
#class HexRaysUtils(metaclass=ABCMeta):
class HexRaysUtils():
def __init__(self):
self.cmts = {}
self.call_eas = []
#@abstractmethod
def get_reg_value(self, reg_name):
raise NotImplementedError()
#@abstractmethod
def get_ptr_value(self, ptr):
raise NotImplementedError()
#@abstractmethod
def get_string(self, ea, is_unicode=False):
raise NotImplementedError()
def get_bytes(self, ea):
raise NotImplementedError()
def get_fn_offset(self, ea):
func_ea = get_func_attr(ea, FUNCATTR_START)
return get_name(func_ea) + f'+{ea-func_ea:#x}'
'''
def set_decomplier_cmt(self, cfunc, ea, cmt):
tl = idaapi.treeloc_t()
tl.ea = ea
tl.itp = idaapi.ITP_SEMI
cfunc.set_user_cmt(tl, cmt)
cfunc.save_user_cmts()
'''
def set_decomplier_cmt(self, cfunc, ea, cmt):
# Prevent orphan comment issues in assignments
finder = asg_parent_finder_t(ea)
finder.apply_to_exprs(cfunc.body, None)
#print(f'{finder.asg_ea=:#x}')
cmt_ea = ea if finder.asg_ea == BADADDR else finder.asg_ea
tl = idaapi.treeloc_t()
tl.ea = cmt_ea
tl.itp = idaapi.ITP_SEMI
cfunc.set_user_cmt(tl, cmt)
cfunc.save_user_cmts()
cfunc.refresh_func_ctext()
# This function was ported from https://github.com/RolfRolles/Miscellaneous/blob/master/PrintTypeSignature.py
# If an indirect API call still has a cast after the var type is set, apply "Force call type" on the var in Pseudocode view
def GetTypeSignature(self, apiName):
# Look up the prototype by name from the main TIL
o = ida_typeinf.get_named_type(None, apiName, ida_typeinf.NTF_SYMU)
# Found?
if o is not None:
code, type_str, fields_str, cmt, field_cmts, sclass, value = o
# Create a tinfo_t by deserializing the data returned above
t = ida_typeinf.tinfo_t()
if t.deserialize(None, type_str, fields_str, field_cmts):
# And change the prototype into a function pointer
ptrType = ida_typeinf.tinfo_t()
ptrType.create_ptr(t)
return ptrType
# On any failure, return None
return None
# IDA decompiler has no API forcing lvar name
def force_rename_lvar(self, ea, var, new_name):
func_ea = get_func_attr(ea, FUNCATTR_START)
debug('force_rename_lvar: function ea = {:#x}'.format(func_ea))
old_name = var.name
if rename_lvar(func_ea, var.name, new_name):
info('force_rename_lvar {:#x}: lvar name changed "{}" -> "{}"'.format(ea, old_name, new_name))
var.name = new_name # to refresh immediately
return
for i in range(g_RENAME_RETRY_CNT):
if rename_lvar(func_ea, var.name, new_name + '_{}'.format(i + 1)):
info('force_rename_lvar {:#x}: lvar name changed "{}" -> "{}"'.format(ea, old_name, new_name + '_{}'.format(i + 1)))
var.name = new_name + '_{}'.format(i + 1)
break
else:
error('{:#x}: renaming {} failed (rename_lvar, {} times)'.format(ea, var.name, g_RENAME_RETRY_CNT))
def get_arg_strings(self, address):
if address in self.call_eas:
info(f'{address:#x} ({self.get_fn_offset(address)}): already-visited call')
return
else:
self.call_eas.append(address)
cfunc = get_ctree_root(address, cache=g_CACHE)
if cfunc:
item = cfunc.body.find_closest_addr(address)
if item.op == cot_call:
expr = item.cexpr
print('-' * 80)
if expr.x.obj_ea == BADADDR:
# dynamically-resolved API
if expr.x.op == cot_var:
callee_name = expr.x.v.getv().name
elif expr.x.op == cot_cast and expr.x.x.op == cot_var:
callee_name = expr.x.x.v.getv().name
# Force call type (remove the cast)
tif = ida_typeinf.tinfo_t()
if print_insn_mnem(expr.ea) == 'call' and not ida_nalt.get_op_tinfo(tif, expr.ea, 0): # Skip an already-specified operand
tif = self.GetTypeSignature(callee_name)
if tif:
if ida_nalt.set_op_tinfo(expr.ea, 0, tif):
success(f'{expr.ea:#x}: Force call type "{str(tif)}" to the operand "{callee_name}"')
else:
error(f'{expr.ea:#x}: Force call type failed')
else:
callee_name = 'UNRESOLVED'
else:
callee_name = get_name(expr.x.obj_ea)
info(f'{address:#x} ({self.get_fn_offset(address)}): call {callee_name} ({expr.x.obj_ea:#x})')
debug(f'{str(expr.x.type)}')
debug(f'argc = {expr.a.size()}')
arg_strs = []
for i in range(expr.a.size()):
#breakpoint()
arg = expr.a.at(i)
# Sometimes the arg type in stubs is int *
if str(arg.type).upper() in g_STR_TYPES or callee_name.find(g_stub_GetProcAddress) != -1:
debug(f'arg{i} = {str(arg.type)}')
ea = 0
if str(expr.x.type).find('__thiscall') != -1:
debug('thiscall')
if i == 0:
ea = self.get_reg_value("ECX")
else:
ea = self.get_ptr_value(self.get_reg_value("ESP") + (i - 1) * 4)
elif str(expr.x.type).find('__fastcall') != -1:
debug('fastcall')
if i == 0:
ea = self.get_reg_value("RCX")
elif i == 1:
ea = self.get_reg_value("RDX")
elif i == 2:
ea = self.get_reg_value("R8")
elif i == 3:
ea = self.get_reg_value("R9")
else:
ea = self.get_ptr_value(self.get_reg_value("RSP") + (i - 4) * 4)
else: # __stdcall, __cdecl, etc.
debug('other calling conventions')
ea = self.get_ptr_value(self.get_reg_value("ESP") + i * 4)
debug(f'{ea=:#x}')
if str(arg.type).upper() in g_ASCII_TYPES or callee_name.find(g_stub_GetProcAddress) != -1:
#if i == 2:
# res = self.get_bytes(ea)
#else:
res = self.get_string(ea)
else: # g_UNICODE_TYPES
res = self.get_string(ea, is_unicode=True)
if res:
arg_strs.append(f'arg{i} = {res}')
debug(f'arg{i} = {res}')
# Set the function prototype if the callee is the GetProcAddress stubs or GetProcAddress API
if (i == 0 and callee_name.find(g_stub_GetProcAddress) != -1) or \
(i == 1 and callee_name == "GetProcAddress"):
#breakpoint()
p_item = cfunc.body.find_parent_of(expr)
p_expr = p_item.cexpr
if p_expr.op == cot_cast:
p_item = cfunc.body.find_parent_of(p_expr)
p_expr = p_item.cexpr
if p_expr.op == cot_asg and p_expr.x.op == cot_var:
var = p_expr.x.v.getv()
tif = self.GetTypeSignature(res)
# We need to use rename_lvar calling modify_user_lvar_info indirectly to add the var into lvvec
self.force_rename_lvar(address, var, res)
my_lvar_mod = my_lvar_modifier_t(var.name, new_tif=tif)
modify_user_lvars(get_func_attr(address, FUNCATTR_START), my_lvar_mod)
# Set the arguments comment at the call instruction address
if arg_strs:
cmt = f'{address:#x} ({self.get_fn_offset(address)}): {",".join(arg_strs)}'
success(cmt)
self.set_decomplier_cmt(cfunc, address, cmt)
self.cmts[address] = cmt
cfunc.refresh_func_ctext()
def print_summary(self):
if self.cmts:
success('Summary:')
for k,v in self.cmts.items():
print(f'{v}')
def decode(self, enc, cst_val):
return bytes([enc[i] ^ ((i + cst_val) & 0xff) ^ cst_val for i in range(len(enc))])
================================================
FILE: callstrings/ida_callstrings_dbg.py
================================================
'''
ida_callstrings_dbg.py - string deobfuscation using IDA debug hook class
Takahiro Haruyama (@cci_forensics)
'''
import idaapi
idaapi.require('hexrays_utils', package='*')
from hexrays_utils import *
from ida_dbg import *
# Global options/variables
g_DEBUG = False
g_MAX_INSTRUCTIONS = 0 # 0 = disabled
def info(msg):
print("\033[34m\033[1m[*]\033[0m {}".format(msg))
def success(msg):
print("\033[32m\033[1m[+]\033[0m {}".format(msg))
def error(msg):
print("\033[31m\033[1m[!]\033[0m {}".format(msg))
def debug(msg):
if g_DEBUG:
print("\033[33m\033[1m[D]\033[0m {}".format(msg))
class TraceHook(DBG_Hooks, HexRaysUtils):
def __init__(self, target_ea):
DBG_Hooks.__init__(self)
HexRaysUtils.__init__(self)
self.traces = 0
self.target_ea = target_ea
#self.current_tid = get_current_thread()
def get_reg_value(self, reg_name):
return get_reg_val(reg_name)
def get_ptr_value(self, ptr):
if idaapi.get_inf_structure().is_64bit():
return get_qword(ptr)
else:
return get_wide_dword(ptr)
def get_string(self, ea, is_unicode=False):
res = get_strlit_contents(ea, strtype=STRTYPE_C_16) if is_unicode else get_strlit_contents(ea)
return res.decode() if res else None
def dbg_trace(self, tid, ea):
debug("[tid %X] trace %08X" % (tid, ea))
if ea < ida_ida.inf_get_min_ea() or ea > ida_ida.inf_get_max_ea():
raise Exception(
"Received a trace callback for an address outside this database!"
)
insn = ida_ua.insn_t()
insnlen = ida_ua.decode_insn(insn, ea)
fn_name = get_name(get_func_attr(ea, FUNCATTR_START))
if insnlen > 0 and insn.itype in [NN_callni, NN_call, NN_callfi] and fn_name.find(g_stub_GetProcAddress) == -1:
refresh_debugger_memory()
self.get_arg_strings(ea)
self.traces += 1
if g_MAX_INSTRUCTIONS and self.traces >= g_MAX_INSTRUCTIONS:
request_disable_step_trace()
request_suspend_process()
if run_requests():
info('Requests suspending the process executed (g_MAX_INSTRUCTIONS)')
else:
error('Requests suspending the process failed (g_MAX_INSTRUCTIONS)')
#return 1
return 0 # log it
def dbg_thread_start(self, pid, tid, ea):
info(f'[Thread {tid:#x}] {ea:#x}: New thread started')
'''
add_bpt(ea)
select_thread(tid)
request_suspend_process()
#if tid != self.current_tid:
if not self.unhook():
error("Error uninstalling hooks!")
else:
info('Hooks uninstalled')
#self.current_tid = tid
end = prev_head(get_func_attr(ea, FUNCATTR_END))
self.target_ea = end
info(f'Selecting the new thread to trace until {end:#x}')
#dbg_del_thread(self.current_tid)
#suspend_thread(self.current_tid)
select_thread(tid)
set_trace_base_address(ea)
dbg_add_thread(tid)
self.hook()
enable_step_trace(1) # needed per thread?
set_step_trace_options(ST_OPTIONS_MASK)
request_enable_step_trace(1)
request_run_to(end)
#request_continue_process()
if run_requests():
info('Requests successful')
else:
error('Requests failed')
'''
def dbg_thread_exit(self, pid, tid, ea, exit_code):
info(f'[Thread {tid:#x}] {ea:#x}: Thread exited with {exit_code:#x}')
def dbg_run_to(self, pid, tid=0, ea=0):
if ea == self.target_ea:
info(f'[Thread {tid:#x}] Reached to the target {self.get_fn_offset(ea)}')
elif pid != 0:
error(f'[Thread {tid:#x}] The suspended address {self.get_fn_offset(ea)} is different from the target {self.get_fn_offset(self.target_ea)}. Probably another breakpoint set?')
else:
error(f'[Thread {tid:#x}] The suspended address {self.get_fn_offset(ea)} is different from the target {self.get_fn_offset(self.target_ea)}. Probably suspended by users manually?')
info(f"Traced {self.traces} instructions")
refresh_debugger_memory()
self.print_summary()
def dbg_process_exit(self, pid, tid, ea, code):
error(f"[Thread {tid:#x}] Process exited with {code:#x} before reaching to the target")
info(f"Traced {self.traces} instructions")
self.print_summary()
return 0
'''
def dbg_suspend_process(self):
self.dbg_run_to(0, ea=get_ip_val())
'''
def main():
info('start')
if not is_debugger_on():
error("Please run the process first!")
return
end = prev_head(get_func_attr(get_reg_val("EIP"), FUNCATTR_END))
info(f"Tracing to the end of function {end:#x}")
debugHook = TraceHook(end)
debugHook.hook()
enable_step_trace(1) # Only the same thread works
#set_step_trace_options(ST_OVER_DEBUG_SEG | ST_OVER_LIB_FUNC | ST_SKIP_LOOPS | ST_ALREADY_LOGGED | ST_DIFFERENTIAL)
#set_step_trace_options(ST_OVER_DEBUG_SEG | ST_OVER_LIB_FUNC)
set_step_trace_options(ST_OPTIONS_MASK) # all included
run_to(end)
while get_process_state() == DSTATE_RUN:
#while get_process_state() != DSTATE_NOTASK: # as long as process is currently debugged
wait_for_next_event(WFNE_ANY, 0)
if not debugHook.unhook():
error("Error uninstalling hooks!")
else:
info('Hooks uninstalled')
del debugHook
info('done')
if __name__ == '__main__':
main()
================================================
FILE: callstrings/ida_callstrings_flare_emu.py
================================================
'''
ida_callstrings_flare_emu.py - string deobfuscation using flare-emu
Takahiro Haruyama (@cci_forensics)
'''
import idaapi
#idaapi.require('logging') # <- This suppresses the flare-emu debug messages!
import logging, hexdump
#logging.basicConfig(level=logging.DEBUG, force=True)
idaapi.require('hexrays_utils', package='*')
from hexrays_utils import *
idaapi.require('flare_emu')
idaapi.require('flare_emu_hooks')
import flare_emu, flare_emu_hooks, unicorn
# Global options
g_DEBUG = False
g_DEBUG_FLARE_EMU = False
g_FLAG_ALL_PATHS = False # True: iterateAllPaths, False: emulateRange
g_MAX_SAME_STATE_VAR = 0x1000 # to detect infinite loop by CFF
g_MAX_INST_VISIT = 10000 # to detect infinite loop
#g_MAX_EMU_INSN = 1000000
g_MAX_STACK_BUF = 0x100
#g_ENC_OFFSET = 0x0
def info(msg):
print("\033[34m\033[1m[*]\033[0m {}".format(msg))
def success(msg):
print("\033[32m\033[1m[+]\033[0m {}".format(msg))
def error(msg):
print("\033[31m\033[1m[!]\033[0m {}".format(msg))
def debug(msg):
if g_DEBUG:
print("\033[33m\033[1m[D]\033[0m {}".format(msg))
def debug_bin(n, v):
if g_DEBUG:
debug(n)
hexdump.hexdump(v)
class HexRaysEmu(HexRaysUtils):
def __init__(self, eh):
HexRaysUtils.__init__(self)
self.eh = eh
def get_reg_value(self, reg_name):
return self.eh.getRegVal(reg_name.lower())
def get_ptr_value(self, ptr):
return self.eh.getEmuPtr(ptr)
def get_string(self, ea, is_unicode=False):
return self.eh.getEmuWideString(ea).decode('utf-16') if is_unicode else self.eh.getEmuString(ea).decode()
def get_bytes(self, ea):
return self.eh.getEmuBytes(ea, 0x20)
def call_hook(address, argv, funcName, userData):
debug(f'call_hook at {address:#x}')
#is_64bit = True if idaapi.get_inf_structure().lflags & idaapi.LFLG_64BIT == 4 else False
hremu = userData["hremu"]
try:
hremu.get_arg_strings(address)
except unicorn.UcError as e:
error(f'{address:#x} ({hremu.get_fn_offset(address)}): Unicorn emulation exception in get_arg_strings() ({e})')
def mem_write_hook(unicornObject, accessType, memAccessAddress, memAccessSize, memValue, userData):
if accessType == unicorn.UC_MEM_WRITE:
hremu = userData["hremu"]
sp = hremu.eh.getRegVal('esp')
ip = hremu.eh.getRegVal('ip')
if sp < memAccessAddress < sp + g_MAX_STACK_BUF:
userData["enc_heads"][ip] = memAccessAddress
def is_high_entropy(v):
res = True
vbytes = v.to_bytes(4, 'little')
for b in vbytes:
if b & 0xff == 0: # e.g., 0, 1, 0x10000000, etc.
res = False
break
else:
vlist = [b for b in vbytes]
for b in vbytes:
if b == vlist[0] and b == vlist[1] and b == vlist[2] and b == vlist[3]: # e.g., 0x11111111, 0xffffffff, etc.
res = False
break
return res
def inst_hook_cff(unicornObject, address, instructionSize, userData):
eh = userData["EmuHelper"]
state_var_cnt = userData["state_var_cnt"]
state_excluded = userData["state_excluded"]
abort = False
if print_insn_mnem(address) == 'cmp' and get_operand_type(address, 0) == o_reg and get_operand_type(address, 1) == o_imm and \
is_high_entropy(get_operand_value(address, 1)) and print_insn_mnem(next_head(address)) in ['jz', 'jnz']:
#debug(f'{address:#x}: compare state var with cmp var')
reg_name = print_operand(address, 0)
state_var = eh.getRegVal(reg_name)
cmp_var = get_operand_value(address, 1)
if state_var != cmp_var:
abort = True
elif print_insn_mnem(address) in ['cmovz'] and get_operand_type(address, 0) == o_reg:
reg_name = print_operand(address, 0)
state_var = eh.getRegVal(reg_name)
cmp_var = None
if is_high_entropy(state_var):
op1type = get_operand_type(address, 1)
if op1type == o_imm:
cmp_var = get_operand_value(address, 1)
elif op1type == o_reg:
op1_reg_name = print_operand(address, 1)
cmp_var = eh.getRegVal(op1_reg_name)
if cmp_var and state_var != cmp_var:
abort = True
if abort:
if address not in state_excluded:
uid = (address, state_var)
state_var_cnt[uid] = 1 if uid not in state_var_cnt else state_var_cnt[uid] + 1
#debug(f'{address:#x}: The same state variable is compared or conditional moved {state_var_cnt[uid]} times')
if state_var_cnt[uid] >= g_MAX_SAME_STATE_VAR:
error(f'{address:#x}: CFF infinite loop detected. Update the state variable {state_var:#x} with the new one {cmp_var:#x}')
debug([f'{ea:#x}: {var=:#x}, {cnt=}' for (ea, var), cnt in state_var_cnt.items()])
debug(f'excluded: {[f"{e:#x}" for e in state_excluded]}')
eh.uc.reg_write(eh.regs[reg_name], cmp_var)
state_excluded.append(address)
# Reset the counts of the external loops
state_var_cnt = {}
def inst_hook(unicornObject, address, instructionSize, userData):
eh = userData["EmuHelper"]
inst_visit_cnt = userData["inst_visit_cnt"]
inst_visit_cnt[address] = 1 if address not in inst_visit_cnt else inst_visit_cnt[address] + 1
if inst_visit_cnt[address] >= g_MAX_INST_VISIT:
error(f'{address:#x}: Infinite loop detected. Aborted.')
eh.stopEmulation(userData)
def noop(*args):
pass
def main():
info('start')
#breakpoint()
if g_DEBUG_FLARE_EMU:
eh = flare_emu.EmuHelper(verbose=10)
eh.logger.setLevel(logging.DEBUG)
else:
eh = flare_emu.EmuHelper()
hremu = HexRaysEmu(eh)
selection = idaapi.read_range_selection(None)
if selection[0]:
info(f'Emulating the selection {selection[1]:#x} to {selection[2]:#x}')
enc_heads = {}
userData = {
'hremu': hremu,
'enc_heads': enc_heads
}
eh.emulateSelection(memAccessHook=mem_write_hook, hookData=userData)
# Get the head of encoded string
stack_buf = eh.getEmuBytes(eh.getRegVal('esp'), g_MAX_STACK_BUF)
debug_bin('stack', stack_buf)
for i in range(len(stack_buf)):
if 65 <= stack_buf[i] <= 122: # A to z
offset = i
break
else:
offset = 0
#offset = 0x48 # Sometimes you need to adjust the offset manually :-(
debug(f'detected offset = {offset:#x}')
# Decode the string after detecting the constant value
cfunc = get_ctree_root(selection[1], cache=g_CACHE)
cvf = cnt_val_finder_t()
cvf.apply_to_exprs(cfunc.body, None)
cnt_val = cvf.get_cnt_val()
if cnt_val:
if stack_buf[offset + 1] != 0:
enc = stack_buf[offset:]
debug(f'enc {enc} is ascii')
else:
enc = eh.getEmuWideString(eh.getRegVal('esp') + offset).decode('utf-16-le')
enc = enc.encode()
debug(f'enc {enc} is unicode (utf-16-le)')
dec = hremu.decode(enc, cnt_val)
debug_bin('dec', dec)
# Extract the ascii strings (no null termination)
head = eh.getRegVal('esp') + offset
ascs = extract_ascii(dec)
if ascs:
keys = [k for k, v in enc_heads.items() if v == head]
if len(keys) == 1:
success(f'{keys[0]:#x}: string decoded "{ascs[0]}"')
hremu.set_decomplier_cmt(cfunc, keys[0], ascs[0])
else:
success(f'string decoded "{ascs[0]}"')
else:
error(f'A constant value for decoding is not found')
else:
ans = ida_kernwin.ask_yn(0, 'only decode the selected function?')
if ans == ida_kernwin.ASKBTN_YES:
fvas = [get_func_attr(get_screen_ea(), FUNCATTR_START)]
elif ans == ida_kernwin.ASKBTN_NO:
fvas = idautils.Functions()
else:
info('canceled')
return
for fva in fvas:
if get_func_flags(fva) & (FUNC_LIB | FUNC_THUNK):
debug(f"{fva:#x}: skipping library or thunk function")
continue
fn_name = get_name(get_func_attr(fva, FUNCATTR_START))
if fn_name.find(g_stub_GetProcAddress) != -1:
debug(f"{fva:#x}: skipping GetProcAddress stub function")
continue
print('-' * 100)
info(f'{get_name(fva)} ({fva:#x})')
'''
state_var_cnt = {}
state_excluded = []
userData = {
'hremu': hremu,
'state_var_cnt': state_var_cnt,
'state_excluded': state_excluded,
}
eh.emulateRange(fva, callHook=call_hook, instructionHook=inst_hook_cff, hookData=userData, count=g_MAX_EMU_INSN)
'''
inst_visit_cnt = {}
userData = {
'hremu': hremu,
'inst_visit_cnt': inst_visit_cnt,
}
try:
if g_FLAG_ALL_PATHS:
info('The mode is iterateAllPaths')
eh.iterateAllPaths(fva, noop, hookData=userData, callHook=call_hook)
else:
info('The mode is emulateRange')
eh.emulateRange(fva, callHook=call_hook, instructionHook=inst_hook, hookData=userData)
except unicorn.unicorn.UcError as e:
error(f'{fva:#x}: unicorn error ({e})')
refresh_idaview_anyway()
eh.resetEmulatorHeapAndStack()
print('-' * 100)
hremu.print_summary()
info('done')
if __name__ == '__main__':
main()
================================================
FILE: callstrings/ida_callstrings_static.py
================================================
'''
ida_callstrings_static.py - string deobfuscation for Hodur
Takahiro Haruyama (@cci_forensics)
'''
import idaapi
idaapi.require('hexrays_utils', package='*')
from hexrays_utils import *
g_DEBUG = False
g_CACHE = True
g_memcpy_names = ['qmemcpy', 'wmemcpy', 'strcpy']
def info(msg):
print("\033[34m\033[1m[*]\033[0m {}".format(msg))
def success(msg):
print("\033[32m\033[1m[+]\033[0m {}".format(msg))
def error(msg):
print("\033[31m\033[1m[!]\033[0m {}".format(msg))
def debug(msg):
if g_DEBUG:
print("\033[33m\033[1m[D]\033[0m {}".format(msg))
class static_decoder_t(ctree_visitor_t, HexRaysUtils):
def __init__(self, cst_val, cfunc):
ctree_visitor_t.__init__(self, CV_PARENTS | CV_POST | CV_RESTART)
HexRaysUtils.__init__(self)
self.cst_val = cst_val
self.cfunc = cfunc
def visit_expr(self, expr):
# Decode the src string by the constant value
if expr.op == cot_call and expr.x.op == cot_helper and expr.x.helper in g_memcpy_names:
#breakpoint()
info(f'{expr.ea:#x}: target helper function "{expr.x.helper}" is called')
arg_dst = expr.a.at(0)
arg_src = expr.a.at(1)
#arg_size = expr.a.at(2)
#if (arg_dst.op == cot_var or (arg_dst.op == cot_ref and arg_dst.x.op == cot_var)) and \
# (arg_src.op == cot_str or (arg_src.op == cot_cast and arg_src.x.op == cot_str)):
if (arg_src.op == cot_str or (arg_src.op == cot_cast and arg_src.x.op == cot_str)):
enc = arg_src.string if arg_src.op == cot_str else arg_src.x.string
enc = enc.encode('utf-16-le') if expr.x.helper == 'wmemcpy' else enc.encode()
info(f'{expr.ea:#x}: src bytes = {enc}')
dec = self.decode(enc, self.cst_val).decode()
if dec:
success(f'{expr.ea:#x}: string decoded "{dec}"')
self.set_decomplier_cmt(self.cfunc, expr.ea, dec)
else:
error(f'{expr.ea:#x}: string decoding failed using a constant value ({self.cst_val:#x})')
return 0
def main():
info('start')
ans = ida_kernwin.ask_yn(0, 'only decode the selected function?')
if ans == ida_kernwin.ASKBTN_YES:
fvas = [get_func_attr(get_screen_ea(), FUNCATTR_START)]
elif ans == ida_kernwin.ASKBTN_NO:
fvas = idautils.Functions()
else:
info('canceled')
return
for fva in fvas:
if get_func_flags(fva) & (FUNC_LIB | FUNC_THUNK):
debug(f"{fva:#x}: skipping library or thunk function")
continue
fn_name = get_name(get_func_attr(fva, FUNCATTR_START))
if fn_name.find(g_stub_GetProcAddress) != -1:
debug(f"{fva:#x}: skipping GetProcAddress stub function")
continue
print('-' * 100)
info(f'{get_name(fva)} ({fva:#x})')
cfunc = get_ctree_root(fva, cache=g_CACHE)
cvf = cnt_val_finder_t()
cvf.apply_to_exprs(cfunc.body, None)
cnt_val = cvf.get_cnt_val()
if cnt_val:
sd = static_decoder_t(cnt_val, cfunc)
sd.apply_to_exprs(cfunc.body, None)
else:
error(f'{fva:#x}: A constant value for decoding is not found')
refresh_idaview_anyway()
print('-' * 100)
info('done')
if __name__ == '__main__':
main()
================================================
FILE: eset_crackme/README.org
================================================
* IDA Pro loader/processor modules for ESET CrackMe driver VM
You can download the initial sample for the CrackMe challenge from [[https://join.eset.com/en/challenges/crack-me][here]].
before:
[[./img/eset_before.png]]
after:
[[./img/eset_after.png]]
** Reference
- https://quequero.org/2016/01/eset-crackme-challenge-2015-walkthrough/
- http://mshetta.blogspot.jp/2016/11/join-eset-crackme-2015-solution.html
================================================
FILE: eset_crackme/loaders/ida_loader_drv_vm.py
================================================
import idaapi
import ida_segment
from idc import *
from struct import *
DATA_SEG_START = 0x10000 # may be changed
def accept_file(li, filename):
sig = int16(li.read(2))
if sig in [0x3713, 0x481c, 0x1337]:
return {'format': "ESET Crackme driver VM program"}
else:
return 0
def int16(b):
return unpack('<H', b)[0]
def int32(b):
return unpack('<I', b)[0]
def myAddSeg(startea, endea, base, use32, name, clas):
s = idaapi.segment_t()
s.start_ea = startea
s.end_ea = endea
s.sel = idaapi.setup_selector(base)
s.bitness = use32
s.align = idaapi.saRelPara
s.comb = idaapi.scPub
#idaapi.add_segm_ex(s, name, clas, idaapi.ADDSEG_NOSREG|idaapi.ADDSEG_OR_DIE)
idaapi.add_segm(base, startea, endea, name, clas)
def load_file(li, neflags, format):
li.seek(0) # needed to read signature
sig = int16(li.read(2))
size = int32(li.read(4)) # the program size
code_off = int32(li.read(4)) # the code segment offset
if sig != 0x3713: # for inline VM
code_off = 0x12
data_off = int32(li.read(4)) # the data segment offset
flag_kernel_mode = int32(li.read(4))
#set_processor_type('eset_vm', SETPROC_USER | SETPROC_LOADER)
set_processor_type('eset_vm', SETPROC_LOADER)
# Create segment & Populate
#'''
myAddSeg(0, data_off - code_off, 0, 1, 'VM_CODE', "CODE")
li.file2base(li.tell(), 0, data_off - code_off, 1)
myAddSeg(DATA_SEG_START, DATA_SEG_START + size - data_off, 0, 1, 'VM_DATA', "DATA") # flat memory space
#myAddSeg(DATA_SEG_START, DATA_SEG_START + size - data_off, DATA_SEG_START >> 4, 1, 'VM_DATA', "DATA") # segmentation (base should be in paragraphs 16-bits)
li.file2base(li.tell(), DATA_SEG_START, DATA_SEG_START + size - data_off, 1)
'''
myAddSeg(code_off, data_off, 0, 1, 'VM_CODE', "CODE")
li.file2base(li.tell(), code_off, data_off, 1)
myAddSeg(data_off, size, 0, 1, 'VM_DATA', "DATA")
li.file2base(li.tell(), data_off, size, 1)
'''
# initialize
set_inf_attr(INF_START_EA, 0)
set_inf_attr(INF_START_IP, 0)
set_inf_attr(INF_START_CS, 0)
#add_entry(0, ep, "start", 1)
add_entry(0, 0, "start", 1)
# should return 1 or terminate immediately
return 1
================================================
FILE: eset_crackme/procs/ida_processor_drv_vm.py
================================================
import sys
import copy
import ida_idaapi
import ida_idp
import ida_ua
import ida_bytes
import ida_xref
import ida_offset
import ida_problems
import ida_lines
import ida_segment
from ida_idp import CF_USE1, CF_USE2, CF_CHG1, CF_CHG2, CF_STOP, CF_JUMP, CF_SHFT, CF_CALL
# enum definitions from VM engine idb
# enum_vm_size
SIZE_BYTE = 0
SIZE_WORD = 1
SIZE_DWORD = 2
# enum_vm_type
TYPE_REG_VAL = 0
TYPE_REG_PTR = 1
TYPE_IMM_VAL = 2
TYPE_DATA_OFF = 3
# enum_vm_cmp
CMP_EQUAL = 0
CMP_NOT_EQUAL = 1
CMP_LESS_THAN = 2
# enum_vm_arith
ARITH_XOR = 0
ARITH_ADD = 1
ARITH_SUB = 2
ARITH_SHL = 3
ARITH_SHR = 4
ARITH_ROL = 5
ARITH_ROR = 6
ARITH_MOD = 7
# ----------------------------------------------------------------------
class eset_drv_vm_processor_t(ida_idp.processor_t):
"""
Processor module classes must derive from ida_idp.processor_t
"""
# IDP id ( Numbers above 0x8000 are reserved for the third-party modules)
id = 0x8fff
# Processor features
flag = ida_idp.PRN_HEX | ida_idp.PR_RNAMESOK
# Number of bits in a byte for code segments (usually 8)
# IDA supports values up to 32 bits
cnbits = 8
# Number of bits in a byte for non-code segments (usually 8)
# IDA supports values up to 32 bits
dnbits = 8
# short processor names
# Each name should be shorter than 9 characters
psnames = ['eset_vm']
# long processor names
# No restriction on name lengthes.
plnames = ['ESET Crackme driver VM processor']
# size of a segment register in bytes
segreg_size = 0
# Array of instructions
instruc = [
{'name': '', 'feature': 0}, # placeholder for "not an instruction"
{'name': 'hlt', 'feature': CF_STOP, 'cmt': "halt CPU"},
{'name': 'mov', 'feature': CF_USE1 | CF_USE2 | CF_CHG1, 'cmt': "move"},
{'name': 'ncall', 'feature': CF_USE1 | CF_CALL, 'cmt': "call native function"},
{'name': 'lcall', 'feature': CF_USE1 | CF_USE2 | CF_CALL, 'cmt': "call library function"},
{'name': 'push', 'feature': CF_USE1, 'cmt': "push to stack"},
{'name': 'pop', 'feature': CF_USE1 | CF_CHG1, 'cmt': "pop from stack"},
{'name': 'cmpeq', 'feature': CF_USE1 | CF_USE2, 'cmt': "compare #0 (equal)"},
{'name': 'cmpne', 'feature': CF_USE1 | CF_USE2, 'cmt': "compare #1 (not equal)"},
{'name': 'cmpb', 'feature': CF_USE1 | CF_USE2, 'cmt': "compare #2 (less than)"},
{'name': 'jmp', 'feature': CF_USE1 | CF_JUMP | CF_STOP, 'cmt': "jump #0 (unconditional)"},
{'name': 'cjmp', 'feature': CF_USE1 | CF_JUMP, 'cmt': "jump #1 (conditional)"},
{'name': 'call', 'feature': CF_USE1 | CF_CALL, 'cmt': "call VM function"},
{'name': 'ret', 'feature': 0, 'cmt': "return"},
{'name': 'xor', 'feature': CF_USE1 | CF_USE2 | CF_CHG1, 'cmt': "arithmetic operation #0 (xor)"},
{'name': 'add', 'feature': CF_USE1 | CF_USE2 | CF_CHG1, 'cmt': "arithmetic operation #1 (add)"},
{'name': 'sub', 'feature': CF_USE1 | CF_USE2 | CF_CHG1, 'cmt': "arithmetic operation #2 (sub)"},
{'name': 'shl', 'feature': CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT, 'cmt': "arithmetic operation #3 (shift left)"},
{'name': 'shr', 'feature': CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT, 'cmt': "arithmetic operation #4 (shift right)"},
{'name': 'rol', 'feature': CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT, 'cmt': "arithmetic operation #5 (rotation left)"},
{'name': 'ror', 'feature': CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT, 'cmt': "arithmetic operation #6 (rotation right)"},
{'name': 'mod', 'feature': CF_USE1 | CF_USE2 | CF_CHG1, 'cmt': "arithmetic operation #7 (modulo)"},
{'name': 'alloc', 'feature': CF_USE1, 'cmt': "allocate buffer"},
{'name': 'free', 'feature': CF_USE1, 'cmt': "free buffer"},
{'name': 'loadVM','feature': CF_USE1 | CF_USE2, 'cmt': "load another VM"},
{'name': 'nop', 'feature': 0, 'cmt': "nop"},
]
# icode of the first instruction
instruc_start = 0
# icode of the last instruction + 1
instruc_end = len(instruc) + 1
# Size of long double (tbyte) for this processor (meaningful only if ash.a_tbyte != NULL) (optional)
# tbyte_size = 0
#
# Number of digits in floating numbers after the decimal point.
# If an element of this array equals 0, then the corresponding
# floating point data is not used for the processor.
# This array is used to align numbers in the output.
# real_width[0] - number of digits for short floats (only PDP-11 has them)
# real_width[1] - number of digits for "float"
# real_width[2] - number of digits for "double"
# real_width[3] - number of digits for "long double"
# Example: IBM PC module has { 0,7,15,19 }
#
# (optional)
#real_width = (0, 7, 0, 0)
# only one assembler is supported
assembler = {
# flag (mostly for the format)
'flag' : ida_idp.ASH_HEXF3 | ida_idp.ASD_DECF0 | ida_idp.ASO_OCTF5 | ida_idp.ASB_BINF0 | ida_idp.AS_N2CHR,
# user defined flags (local only for IDP) (optional)
#'uflag' : 0,
# Assembler name (displayed in menus)
'name': "ESET Crackme driver VM assembler",
# array of automatically generated header lines they appear at the start of disassembled text (optional)
'header': [".esetvm"],
# array of unsupported instructions (array of insn.itype) (optional)
#'badworks': [],
# org directive
'origin': ".org",
# end directive
'end': ".end",
# comment string (see also cmnt2)
'cmnt': ";",
# ASCII string delimiter
'ascsep': "\"",
# ASCII char constant delimiter
'accsep': "'",
# ASCII special chars (they can't appear in character and ascii constants)
'esccodes': "\"'",
#
# Data representation (db,dw,...):
#
# ASCII string directive
'a_ascii': ".char",
# byte directive
'a_byte': "db",
# word directive
'a_word': "dw",
# remove if not allowed
'a_dword': "dd",
# remove if not allowed
# 'a_qword': "dq",
# float; 4bytes; remove if not allowed
#'a_float': ".float",
# uninitialized data directive (should include '%s' for the size of data)
'a_bss': ".space %s",
# 'equ' Used if AS_UNEQU is set (optional)
#'a_equ': ".equ",
# 'seg ' prefix (example: push seg seg001)
'a_seg': "seg",
# current IP (instruction pointer) symbol in assembler
'a_curip': "$",
# "public" name keyword. NULL-gen default, ""-do not generate
'a_public': ".def",
# "weak" name keyword. NULL-gen default, ""-do not generate
'a_weak': "",
# "extrn" name keyword
'a_extrn': ".ref",
# "comm" (communal variable)
'a_comdef': "",
# "align" keyword
'a_align': ".align",
# Left and right braces used in complex expressions
'lbrace': "(",
'rbrace': ")",
# % mod assembler time operation
'a_mod': "%",
# & bit and assembler time operation
'a_band': "&",
# | bit or assembler time operation
'a_bor': "|",
# ^ bit xor assembler time operation
'a_xor': "^",
# ~ bit not assembler time operation
'a_bnot': "~",
# << shift left assembler time operation
'a_shl': "<<",
# >> shift right assembler time operation
'a_shr': ">>",
# size of type (format string) (optional)
'a_sizeof_fmt': "size %s",
'flag2': 0,
# the include directive (format string) (optional)
'a_include_fmt': '.include "%s"',
} # Assembler
# ----------------------------------------------------------------------
# The following callbacks are optional
#
#def notify_newprc(self, nproc):
# """
# Before changing proccesor type
# nproc - processor number in the array of processor names
# return 1-ok,0-prohibit
# """
# return 1
#def notify_assemble(self, ea, cs, ip, use32, line):
# """
# Assemble an instruction
# (make sure that ida_idp.PR_ASSEMBLE flag is set in the processor flags)
# (display a warning if an error occurs)
# args:
# ea - linear address of instruction
# cs - cs of instruction
# ip - ip of instruction
# use32 - is 32bit segment?
# line - line to assemble
# returns the opcode string
# """
# pass
def notify_get_frame_retsize(self, func_ea):
"""
Get size of function return address in bytes
If this function is absent, the kernel will assume
4 bytes for 32-bit function
2 bytes otherwise
"""
return 2
def notify_get_autocmt(self, insn):
"""
Get instruction comment. 'insn' describes the instruction in question
@return: None or the comment string
"""
if 'cmt' in self.instruc[insn.itype]:
return self.instruc[insn.itype]['cmt']
# ----------------------------------------------------------------------
def notify_is_sane_insn(self, insn, no_crefs):
"""
is the instruction sane for the current file type?
args: no_crefs
1: the instruction has no code refs to it.
ida just tries to convert unexplored bytes
to an instruction (but there is no other
reason to convert them into an instruction)
0: the instruction is created because
of some coderef, user request or another
weighty reason.
The instruction is in 'insn'
returns: 1-ok, <=0-no, the instruction isn't
likely to appear in the program
"""
#w = ida_bytes.get_wide_word(insn.ea)
#if w == 0 or w == 0xFFFF:
# return 0
#return 1
return -1
# ----------------------------------------------------------------------
def handle_operand(self, insn, op, isRead):
flags = ida_bytes.get_flags(insn.ea)
is_offs = ida_bytes.is_off(flags, op.n)
dref_flag = ida_xref.dr_R if isRead else ida_xref.dr_W
def_arg = ida_bytes.is_defarg(flags, op.n)
optype = op.type
itype = insn.itype
# create code xrefs
if optype == ida_ua.o_imm:
makeoff = False
if itype in [self.itype_ncall, self.itype_call]:
insn.add_cref(op.value, op.offb, ida_xref.fl_CN)
makeoff = True
#elif itype == self.itype_mov: # e.g., mov #addr, PC
# insn.add_cref(op.value, op.offb, ida_xref.fl_JN)
# makeoff = True
if makeoff and not def_arg:
otype = ida_offset.get_default_reftype(insn.ea)
ida_offset.op_offset(insn.ea, op.n, otype, ida_idaapi.BADADDR, insn.cs)
is_offs = True
if is_offs:
insn.add_off_drefs(op, ida_xref.dr_O, 0)
elif optype == ida_ua.o_near:
if insn.itype in [self.itype_ncall, self.itype_call]:
fl = ida_xref.fl_CN
else:
fl = ida_xref.fl_JN
insn.add_cref(op.addr, op.offb, fl)
# create data xrefs
elif optype == ida_ua.o_mem:
insn.create_op_data(op.addr, op.offb, op.dtype)
insn.add_dref(op.addr, op.offb, dref_flag)
'''
ds = ida_segment.get_segm_by_name('VM_DATA')
start = ds.start_ea
insn.create_op_data(start + op.addr, op.offb, op.dtype)
insn.add_dref(start + op.addr, op.offb, dref_flag)
'''
# ----------------------------------------------------------------------
# The following callbacks are mandatory
#
def notify_emu(self, insn):
"""
Emulate instruction, create cross-references, plan to analyze
subsequent instructions, modify flags etc. Upon entrance to this function
all information about the instruction is in 'insn' structure.
If zero is returned, the kernel will delete the instruction.
"""
aux = self.get_auxpref(insn)
Feature = insn.get_canon_feature()
if Feature & CF_USE1:
self.handle_operand(insn, insn.Op1, 1)
if Feature & CF_CHG1:
self.handle_operand(insn, insn.Op1, 0)
if Feature & CF_USE2:
self.handle_operand(insn, insn.Op2, 1)
if Feature & CF_CHG2:
self.handle_operand(insn, insn.Op2, 0)
if Feature & CF_JUMP:
ida_problems.remember_problem(ida_problems.PR_JUMP, insn.ea)
# is it an unconditional jump?
uncond_jmp = insn.itype in [self.itype_jmp]
# add flow
flow = (Feature & CF_STOP == 0) and not uncond_jmp
if flow:
insn.add_cref(insn.ea + insn.size, 0, ida_xref.fl_F)
return 1
# ----------------------------------------------------------------------
def notify_out_operand(self, ctx, op):
"""
Generate text representation of an instructon operand.
This function shouldn't change the database, flags or anything else.
All these actions should be performed only by the emu() function.
This function uses out_...() functions from ua.hpp to generate the operand text
Returns: 1-ok, 0-operand is hidden.
"""
optype = op.type
dtype = op.dtype
signed = 0
if optype == ida_ua.o_reg:
if dtype == ida_ua.dt_byte:
#ctx.out_register('b')
ctx.out_keyword('byte ')
elif dtype == ida_ua.dt_word:
#ctx.out_register('w')
ctx.out_keyword('word ')
ctx.out_register(self.reg_names[op.reg])
elif optype == ida_ua.o_phrase:
if dtype == ida_ua.dt_dword:
ctx.out_keyword('dword ptr ')
elif dtype == ida_ua.dt_byte:
ctx.out_keyword('byte ptr ')
elif dtype == ida_ua.dt_word:
ctx.out_keyword('word ptr ')
ctx.out_symbol('[')
ctx.out_register(self.reg_names[op.reg])
ctx.out_symbol(']')
elif optype == ida_ua.o_imm:
ctx.out_symbol('#')
ctx.out_value(op, ida_ua.OOFW_IMM | signed )
elif optype in [ida_ua.o_near, ida_ua.o_mem]:
r = ctx.out_name_expr(op, op.addr, ida_idaapi.BADADDR)
if not r:
ctx.out_tagon(ida_lines.COLOR_ERROR)
ctx.out_long(op.addr, 16)
ctx.out_tagoff(ida_lines.COLOR_ERROR)
ida_problems.remember_problem(ida_problems.PR_NONAME, ctx.insn.ea)
else:
return False
# for Op2 of mov instruction
#if op.specflag1:
# ctx.out_keyword(' as ptr')
return True
# ----------------------------------------------------------------------
def notify_out_insn(self, ctx):
"""
Generate text representation of an instruction in 'ctx.insn' structure.
This function shouldn't change the database, flags or anything else.
All these actions should be performed only by emu() function.
Returns: nothing
"""
postfix = ""
ctx.out_mnemonic()
# output first operand
# kernel will call outop()
if ctx.insn.Op1.type != ida_ua.o_void:
ctx.out_one_operand(0)
# output the rest of operands separated by commas
for i in xrange(1, 3):
if ctx.insn[i].type == ida_ua.o_void:
break
ctx.out_symbol(',')
ctx.out_char(' ')
ctx.out_one_operand(i)
ctx.set_gen_cmt() # generate comment at the next call to MakeLine()
ctx.flush_outbuf()
def fill_reg(self, op, dtype, regno):
op.type = ida_ua.o_reg
op.dtype = dtype
op.reg = regno
#op.specflag1 = 0
def fill_phrase(self, op, dtype, regno):
op.type = ida_ua.o_phrase
op.dtype = dtype
op.phrase = regno
#op.specflag1 = 0
def fill_imm(self, op, dtype, val):
op.type = ida_ua.o_imm
op.dtype = dtype
op.value = val
#op.specflag1 = 0
def fill_near(self, op, dtype, addr):
op.type = ida_ua.o_near
op.dtype = dtype
op.addr = addr
#op.specflag1 = 0
def fill_mem(self, op, dtype, addr):
op.type = ida_ua.o_mem
op.dtype = dtype
#op.addr = addr
# add data segment base addr
ds = ida_segment.get_segm_by_name('VM_DATA')
op.addr = ds.start_ea + addr
#op.specflag1 = 0
def get_next_bytes(self, insn, dtype):
if dtype == ida_ua.dt_byte:
return insn.get_next_byte()
elif dtype == ida_ua.dt_word:
return insn.get_next_word()
elif dtype == ida_ua.dt_dword:
return insn.get_next_dword()
def set_operand(self, insn, op, type_, regno, dtype):
# check dtype
if dtype > 2:
return -1
# IDA data type enum is matched with enum_vm_size of the idb
if type_ == TYPE_REG_VAL:
self.fill_reg(op, dtype, regno)
elif type_ == TYPE_REG_PTR:
self.fill_phrase(op, dtype, regno)
elif type_ == TYPE_IMM_VAL:
val = self.get_next_bytes(insn, dtype)
self.fill_imm(op, dtype, val)
elif type_ == TYPE_DATA_OFF:
dt_off = insn.get_next_dword()
self.fill_mem(op, dtype, dt_off)
return 0
# ----------------------------------------------------------------------
def notify_ana(self, insn):
"""
Decodes an instruction into 'insn'.
Returns: insn.size (=the size of the decoded instruction) or zero
"""
opc = insn.get_next_byte()
# cmp (0x6), jmp (0x7), arithmetic operation (0xa): multiple instructions
# 0xe - 0xff: nop
if opc > 0xd:
insn.itype = self.itype_nop
elif opc > 0xa:
insn.itype = self.itype_hlt + opc + 2 + 1 + 7
elif opc > 7:
insn.itype = self.itype_hlt + opc + 2 + 1
elif opc > 6:
insn.itype = self.itype_hlt + opc + 2
else:
insn.itype = self.itype_hlt + opc
if insn.itype not in [self.itype_hlt, self.itype_ret, self.itype_nop]:
if insn.itype in [self.itype_call, self.itype_jmp]:
if insn.itype == self.itype_jmp:
cflag = insn.get_next_byte() # check conditional flag
if cflag > 1:
return 0 # invalid flag value
insn.itype += cflag
addr = insn.get_next_dword()
self.fill_near(insn.Op1, ida_ua.dt_dword, addr)
elif insn.itype == self.itype_pop:
regno = insn.get_next_byte() & 0xf
self.fill_reg(insn.Op1, ida_ua.dt_dword, regno)
elif insn.itype in [self.itype_push, self.itype_alloc, self.itype_free, self.itype_ncall]:
b1 = insn.get_next_byte()
dtype = ida_ua.dt_dword if insn.itype == self.itype_ncall else b1 >> 6
if self.set_operand(insn, insn.Op1, (b1 >> 4) & 3, b1 & 0xf, dtype):
return 0 # invalid dtype
elif insn.itype in [self.itype_lcall, self.itype_loadVM]:
b1 = insn.get_next_byte()
b2 = insn.get_next_byte()
if self.set_operand(insn, insn.Op1, b2 & 3, b1 & 0xf, ida_ua.dt_dword):
return 0 # invalid dtype
dtype = ida_ua.dt_dword if insn.itype == self.itype_lcall else (b2 >> 4) & 3
if self.set_operand(insn, insn.Op2, (b2 >> 2) & 3, b1 >> 4, dtype):
return 0 # invalid dtype
elif insn.itype == self.itype_mov:
b1 = insn.get_next_byte()
b2 = insn.get_next_byte()
dtype = (b2 >> 4) & 3
if self.set_operand(insn, insn.Op2, b2 & 3, b1 >> 4, dtype):
return 0 # invalid dtype
dst_regno = b1 & 0xf
if (b2 >> 2) & 3: # used as pointer
self.fill_phrase(insn.Op1, dtype, dst_regno)
#insn.Op2.specflag1 = 1
else:
self.fill_reg(insn.Op1, dtype, dst_regno)
elif insn.itype in [self.itype_cmpeq, self.itype_xor]:
b1 = insn.get_next_byte()
b2 = insn.get_next_byte()
self.fill_reg(insn.Op1, ida_ua.dt_dword, b1 & 0xf)
if self.set_operand(insn, insn.Op2, b2 & 3, b1 >> 4, (b2 >> 2) & 3):
return 0 # invalid dtype
# update itype
itype_idx = (b2 >> 4) & 7
if insn.itype == self.itype_cmpeq and itype_idx > 2:
return 0 # invalid cmp operation
else:
insn.itype += itype_idx
# Return decoded instruction size or zero
return insn.size if insn.itype != self.itype_null else 0
# ----------------------------------------------------------------------
def init_instructions(self):
Instructions = []
i = 0
for x in self.instruc:
if x['name'] != '':
setattr(self, 'itype_' + x['name'], i)
else:
setattr(self, 'itype_null', i)
i += 1
# icode of the last instruction + 1
self.instruc_end = len(self.instruc) + 1
# ----------------------------------------------------------------------
def init_registers(self):
"""
This function parses the register table and creates corresponding ireg_XXX constants
"""
# Registers definition
self.reg_names = [
# General purpose registers
"r0",
"r1",
"r2",
"r3",
"r4",
"r5",
# SP
"r6",
# VM pointer
"r7",
# VM size
"r8",
# ntoskrnl_base
"r9",
# arg registers
"r10",
"r11",
"r12",
"r13",
"r14",
"r15",
# Fake segment registers
"CS",
"DS",
]
# Create the ireg_XXXX constants
for i in xrange(len(self.reg_names)):
setattr(self, 'ireg_' + self.reg_names[i], i)
# Segment register information (use virtual CS and DS registers if your
# processor doesn't have segment registers):
self.reg_first_sreg = self.ireg_CS
self.reg_last_sreg = self.ireg_DS
# number of CS register
self.reg_code_sreg = self.ireg_CS
# number of DS register
self.reg_data_sreg = self.ireg_DS
# ----------------------------------------------------------------------
def __init__(self):
ida_idp.processor_t.__init__(self)
self.init_instructions()
self.init_registers()
# ----------------------------------------------------------------------
# Every processor module script must provide this function.
# It should return a new instance of a class derived from ida_idp.processor_t
def PROCESSOR_ENTRY():
return eset_drv_vm_processor_t()
================================================
FILE: fn_fuzzy/README.org
================================================
#+OPTIONS: ^:{}
#+TITLE: fn_fuzzy.py - IDAPython script for fast multiple binary diffing triage
* Motivation
See the [[https://conference.hitb.org/hitbsecconf2019ams/sessions/fn_fuzzy-fast-multiple-binary-diffing-triage-with-ida/][conference information]] or [[https://www.carbonblack.com/2019/05/09/fn_fuzzy-fast-multiple-binary-diffing-triage-with-ida/][blog]] post.
* how to use
- fn_fuzzy.py :: IDAPython script to export/compare fuzzy hashes of the sample
- cli_export.py :: python wrapper script to export fuzzy hashes of multiple samples
The typical usage is to run cli_export.py to make a database for large idbs then compare on IDA by executing fn_fuzzy.py.
[[./img/fn_fuzzy.png]]
[[./img/res_summary.png]]
[[./img/res_funcs.png]]
* supported IDB version
IDBs generated by IDA 6.9 or later due to SHA256 API
* required python packages
- mmh3
- [[https://github.com/williballenthin/python-idb%0A][python-idb]]
================================================
FILE: fn_fuzzy/cli_export.py
================================================
# cli_export.py - batch export script for fn_fuzzy
# Takahiro Haruyama (@cci_forensics)
import argparse, subprocess, os, sqlite3, time, sys
import idb # python-idb
import logging
logging.basicConfig(level=logging.ERROR) # to suppress python-idb warning
# plz edit the following paths
g_ida_dir = r'C:\analysisw\tool\IDA'
g_db_path = r'C:\analysisw\tics\fn_fuzzy.sqlite'
g_fn_fuzzy_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'fn_fuzzy.py')
g_min_bytes = 0x10 # minimum number of extracted code bytes per function
g_analyzed_prefix = r'fn_' # analyzed function name prefix (regex)
class LocalError(Exception): pass
class ProcExportError(LocalError): pass
def info(msg):
print("[*] {}".format(msg))
def success(msg):
print("[+] {}".format(msg))
def error(msg):
print("[!] {}".format(msg))
def init_db(cur):
cur.execute("SELECT * FROM sqlite_master WHERE type='table'")
if cur.fetchone() is None:
info('DB initialized')
cur.execute("CREATE TABLE IF NOT EXISTS sample(sha256 UNIQUE, path)")
#cur.execute("CREATE INDEX sha256_index ON sample(sha256)")
cur.execute("CREATE INDEX path_index ON sample(path)")
cur.execute("CREATE TABLE IF NOT EXISTS function(sha256, fva, fname, fhd, fhm, f_ana, bsize, ptype, UNIQUE(sha256, fva))")
cur.execute("CREATE INDEX f_ana_index ON function(f_ana)")
cur.execute("CREATE INDEX bsize_index ON function(bsize)")
def existed(cur, sha256):
cur.execute("SELECT * FROM sample WHERE sha256 = ?", (sha256,))
if cur.fetchone() is None:
return False
else:
return True
def remove(cur, sha256):
cur.execute("DELETE FROM sample WHERE sha256 = ?", (sha256,))
cur.execute("DELETE FROM function WHERE sha256 = ?", (sha256,))
def export(f_debug, idb_path, outdb, min_, f_ex_libthunk, f_update, f_ana_exp, ana_pre, f_remove):
# check the ext and signature
ext = os.path.splitext(idb_path)[1]
if ext != '.idb' and ext != '.i64':
return 0
with open(idb_path, 'rb') as f:
sig = f.read(4)
if sig != b'IDA1' and sig != b'IDA2':
return 0
# check the database record for the idb
#print idb_path
conn = sqlite3.connect(outdb)
cur = conn.cursor()
init_db(cur)
with idb.from_file(idb_path) as db: # Fix: Cause NameError. need to rewrite in IDA batch mode to calculate SHA256
api = idb.IDAPython(db)
try:
sha256 = api.ida_nalt.retrieve_input_file_sha256()
except KeyError:
error('{}: ida_nalt.retrieve_input_file_sha256() failed. The API is supported in 6.9 or later idb version. Check the API on IDA for validation.'.format(idb_path))
return 0
sha256 = sha256.lower()
if f_remove:
remove(cur, sha256)
success('{}: the records successfully removed (SHA256={})'.format(idb_path, sha256))
conn.commit()
cur.close()
return 0
if existed(cur, sha256) and not f_update:
info('{}: The sample records are present in DB (SHA256={}). Skipped.'.format(idb_path, sha256))
return 0
conn.commit()
cur.close()
ida = 'ida.exe' if sig == 'IDA1' else 'ida64.exe'
ida_path = os.path.join(g_ida_dir, ida)
#cmd = [ida_path, '-L{}'.format(os.path.join(g_ida_dir, 'debug.log')), '-S{}'.format(g_fn_fuzzy_path), '-Ofn_fuzzy:{}:{}:{}:{}:{}:{}'.format(min_, f_ex_libthunk, f_update, f_ana_exp, ana_pre, outdb), idb_path]
cmd = [ida_path, '-S{}'.format(g_fn_fuzzy_path), '-Ofn_fuzzy:{}:{}:{}:{}:{}:{}'.format(min_, f_ex_libthunk, f_update, f_ana_exp, ana_pre, outdb), idb_path]
if not f_debug:
cmd.insert(1, '-A')
#print cmd
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
if proc.returncode == 0:
success('{}: successfully exported'.format(idb_path))
return 1
elif proc.returncode == 2: # skipped
return 0
else: # maybe 1
raise ProcExportError('{}: Something wrong with the IDAPython script (returncode={}). Use -d for debug'.format(idb_path, proc.returncode))
def list_file(d):
for entry in os.listdir(d):
if os.path.isfile(os.path.join(d, entry)):
yield os.path.join(d, entry)
def list_file_recursive(d):
for root, dirs, files in os.walk(d):
for file_ in files:
yield os.path.join(root, file_)
def main():
info('start')
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('target', help="idb file or folder to export")
parser.add_argument('--outdb', '-o', default=g_db_path, help="export DB path")
parser.add_argument('--min_', '-m', type=int, default=g_min_bytes, help="minimum number of extracted code bytes per function")
parser.add_argument('--exclude', '-e', action='store_true', help="exclude library/thunk functions")
parser.add_argument('--update', '-u', action='store_true', help="update the DB records")
parser.add_argument('--ana_exp', '-a', action='store_true', help="check analyzed functions")
parser.add_argument('--ana_pre', '-p', default=g_analyzed_prefix, help="analyzed function name prefix (regex)")
parser.add_argument('--recursively', '-r', action='store_true', help="export idbs recursively")
parser.add_argument('--debug', '-d', action='store_true', help="display IDA dialog for debug")
parser.add_argument('--remove', action='store_true', help="remove records from db")
args = parser.parse_args()
start = time.time()
cnt = 0
if os.path.isfile(args.target):
try:
cnt += export(args.debug, args.target, args.outdb, args.min_, args.exclude, args.update, args.ana_exp, args.ana_pre, args.remove)
except LocalError as e:
error('{} ({})'.format(str(e), type(e)))
return
elif os.path.isdir(args.target):
gen_lf = list_file_recursive if args.recursively else list_file
for t in gen_lf(args.target):
try:
cnt += export(args.debug, t, args.outdb, args.min_, args.exclude, args.update, args.ana_exp, args.ana_pre, args.remove)
except LocalError as e:
error('{} ({})'.format(str(e), type(e)))
return
else:
error('the target is not file/dir')
return
elapsed = time.time() - start
success('totally {} samples exported'.format(cnt))
info('elapsed time = {} sec'.format(elapsed))
info('done')
if __name__ == '__main__':
main()
================================================
FILE: fn_fuzzy/dump_types.py
================================================
import os
def main():
path = os.path.splitext(get_idb_path())[0] + '.idc'
gen_file(OFILE_IDC, path, 0, 0, GENFLG_IDCTYPE)
Exit(0)
if ( __name__ == "__main__" ):
main()
================================================
FILE: fn_fuzzy/fn_fuzzy.py
================================================
# fn_fuzzy.py - IDAPython script for fast multiple binary diffing triage
# Takahiro Haruyama (@cci_forensics)
import os, ctypes, sqlite3, re, time, sys, subprocess
import cProfile
from collections import defaultdict
from pprint import PrettyPrinter
from io import StringIO
from tqdm import tqdm
from idc import *
import idautils, ida_nalt, ida_kernwin, idaapi, ida_expr
import mmh3
import yara_fn # modified version in the same folder
g_db_path = r'Z:\haru\analysis\tics\fn_fuzzy.sqlite' # plz edit your path
g_min_bytes = 0x10 # minimum number of extracted code bytes per function
g_analyzed_prefix = r'fn_|func_' # analyzed function name prefix (regex)
g_threshold = 50 # function similarity score threshold without CFG match
g_threshold_cfg = 10 # function similarity score threshold with CFG match
g_max_bytes_for_score = 0x100 # more code bytes are evaluated by only CFG match
g_bsize_ratio = 40 # function binary size correction ratio to compare (40 is enough)
# debug purpose to check one function matching
g_dbg_flag = False
g_dbg_fva = 0x180015978
g_dbg_fname = 'fn_blob_get_word_param_and_seek'
g_dbg_sha256 = ''
# initialization for ssdeep
SPAMSUM_LENGTH = 64
FUZZY_MAX_RESULT = (2 * SPAMSUM_LENGTH + 20)
dirpath = os.path.dirname(__file__)
_lib_path = os.path.join(dirpath, 'fuzzy64.dll')
fuzzy_lib = ctypes.cdll.LoadLibrary(_lib_path)
g_dump_types_path = os.path.join(dirpath, 'dump_types.py')
class defaultdictRecurse(defaultdict):
def __init__(self):
self.default_factory = type(self)
class import_handler_t(ida_kernwin.action_handler_t):
def __init__(self, items, idb_path, title):
ida_kernwin.action_handler_t.__init__(self)
self.items = items
self.idb_path = idb_path
self.title = title
def import_types(self):
idc_path = os.path.splitext(self.idb_path)[0] + '.idc'
# dump type information from the 2nd idb
if not (os.path.exists(idc_path)):
with open(self.idb_path, 'rb') as f:
sig = f.read(4)
ida = 'ida.exe' if sig == 'IDA1' else 'ida64.exe'
ida_path = os.path.join(idadir(), ida)
cmd = [ida_path, '-S{}'.format(g_dump_types_path), self.idb_path]
#print cmd
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
if proc.returncode == 0:
success('{}: type information successfully dumped'.format(self.idb_path))
else:
error('{}: type information dumping failed'.format(self.idb_path))
return False
# import the type information
idc_path = os.path.splitext(self.idb_path)[0] + '.idc'
ida_expr.exec_idc_script(None, str(idc_path), "main", None, 0)
return True
def activate(self, ctx):
sel = []
for idx in ctx.chooser_selection:
# rename the function
ea = get_name_ea_simple(self.items[idx][2])
sfname = str(self.items[idx][4])
#set_name(ea, sfname)
idaapi.do_name_anyway(ea, sfname)
success('{:#x}: renamed to {}'.format(ea, sfname))
# set the function prototype
sptype = str(self.items[idx][5])
if sptype != 'None':
tinfo = idaapi.tinfo_t()
idaapi.parse_decl2(idaapi.cvar.idati, sptype, tinfo, 0)
#idaapi.apply_callee_tinfo(ea, tinfo)
if idaapi.apply_tinfo(ea, tinfo, 0):
success('{:#x}: function prototype set to {}'.format(ea, sptype))
else:
error('{:#x}: function prototype set FAILED (maybe you should import the types?)'.format(ea))
if ask_yn(0, 'Do you import types from the secondary idb?') == 1:
if self.import_types():
tinfo = idaapi.tinfo_t()
idaapi.parse_decl2(idaapi.cvar.idati, sptype, tinfo, 0)
if idaapi.apply_tinfo(ea, tinfo, 0):
success('{:#x}: function prototype set to {}'.format(ea, sptype))
else:
error('{:#x}: function prototype set FAILED again'.format(ea))
# insert the comment
score = self.items[idx][0]
mmatch = self.items[idx][1]
cmt = 'fn_fuzzy: ssdeep={}, machoc={}'.format(score, mmatch)
set_func_cmt(ea, cmt, 1)
#set_decomplier_cmt(ea, cmt) # not sure how to avoid orphan comment
# update the Choose rows
ida_kernwin.refresh_chooser(self.title)
def update(self, ctx):
return idaapi.AST_ENABLE_ALWAYS
'''
return ida_kernwin.AST_ENABLE_FOR_WIDGET \
if ida_kernwin.is_chooser_widget(ctx.widget_type) \
else ida_kernwin.AST_DISABLE_FOR_WIDGET
'''
class FnCh(ida_kernwin.Choose):
def __init__(self, title, mfn, idb_path):
self.mfn = mfn
self.idb_path = idb_path
self.title = title
ida_kernwin.Choose.__init__(
self,
title,
[
["ssdeep score", 10 | ida_kernwin.Choose.CHCOL_DEC],
["machoc matched", 10 | ida_kernwin.Choose.CHCOL_PLAIN],
["primary function", 30 | ida_kernwin.Choose.CHCOL_PLAIN],
["primary bsize", 10 | ida_kernwin.Choose.CHCOL_DEC],
["secondary analyzed function", 30 | ida_kernwin.Choose.CHCOL_PLAIN],
["secondary prototype", 40 | ida_kernwin.Choose.CHCOL_PLAIN]
],
flags = ida_kernwin.Choose.CH_MULTI)
def OnInit(self):
self.items = []
for fva,v in sorted(list(self.mfn.items()), key=lambda x:x[1]['score'], reverse=True):
if v['sfname']:
self.items.append(['{}'.format(v['score']), '{}'.format(v['cfg_match']), str(get_name(fva)), '{}'.format(v['pbsize']), str(v['sfname']), '{}'.format(v['sptype'])])
return True
def OnPopup(self, form, popup_handle):
actname = "choose:actFnFuzzyImport"
desc = ida_kernwin.action_desc_t(actname, 'Import function name and prototype', import_handler_t(self.items, self.idb_path, self.title))
ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)
def OnGetSize(self):
return len(self.items)
def OnGetLine(self, n):
return self.items[n]
def OnSelectLine(self, n):
idx = n[0] # due to CH_MULTI
idc.Jump(get_name_ea_simple(self.items[idx][2]))
def OnRefresh(self, n):
self.OnInit()
# try to preserve the cursor
#return [ida_kernwin.Choose.ALL_CHANGED] + self.adjust_last_item(n)
#return n
return None
def OnClose(self):
print("closed ", self.title)
class SummaryCh(ida_kernwin.Choose):
def __init__(self, title, res):
self.res = res
ida_kernwin.Choose.__init__(
self,
title,
[ ["SHA256", 20 | ida_kernwin.Choose.CHCOL_PLAIN],
["total similar functions", 20 | ida_kernwin.Choose.CHCOL_DEC],
["analyzed similar functions", 20 | ida_kernwin.Choose.CHCOL_DEC],
["IDB path", 80 | ida_kernwin.Choose.CHCOL_PATH] ])
self.items = []
def OnInit(self):
for sha256,v in sorted(list(self.res.items()), key=lambda x:x[1]['mcnt']['total'], reverse=True):
if v['mcnt']['total'] > 0:
self.items.append([str(sha256), '{}'.format(v['mcnt']['total']), '{}'.format(v['mcnt']['analyzed']), str(v['path'])])
return True
def OnGetSize(self):
return len(self.items)
def OnGetLine(self, n):
return self.items[n]
def OnSelectLine(self, n):
sha256 = self.items[n][0]
c = FnCh("similarities with {}(snip)".format(sha256[:8]), self.res[sha256]['mfn'], self.res[sha256]['path'])
c.Show()
def OnRefresh(self, n):
return n
def OnClose(self):
print("closed ", self.title)
class FnFuzzyForm(ida_kernwin.Form):
def __init__(self):
ida_kernwin.Form.__init__(self,
r"""BUTTON YES* Run
BUTTON CANCEL Cancel
fn_fuzzy
{FormChangeCb}
General Options
<DB file path:{iDBSave}>
<minimum function code size:{iMinBytes}>
<exclude library/thunk functions:{cLibthunk}>
<enable debug messages:{cDebug}>{cGroup}>
<##Commands##Export:{rExport}>
<Compare:{rCompare}>{rGroup}>
Export Options
<update the DB records:{cUpdate}>
<store flags as analyzed functions:{cAnaExp}>{cEGroup}>
<analyzed function name prefix/suffix (regex):{iPrefix}>
Compare Options
<compare with only analyzed functions:{cAnaCmp}>
<compare with only IDBs in the specified folder:{cFolCmp}>{cCGroup}>
<the folder path:{iFolder}>
<function code size comparison criteria (0-100):{iRatio}>
<function similarity score threshold (0-100) without CFG match:{iSimilarity}>
<function similarity score threshold (0-100) with CFG match:{iSimilarityCFG}>
<function code size threshold evaluated by only CFG match:{iMaxBytesForScore}>
""",
{
'FormChangeCb': ida_kernwin.Form.FormChangeCb(self.OnFormChange),
'cGroup': ida_kernwin.Form.ChkGroupControl(("cLibthunk", "cDebug")),
'iDBSave': ida_kernwin.Form.FileInput(save=True),
'iMinBytes': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_HEX),
'rGroup': ida_kernwin.Form.RadGroupControl(("rCompare", "rExport")),
'cEGroup': ida_kernwin.Form.ChkGroupControl(("cUpdate", "cAnaExp")),
'iPrefix': ida_kernwin.Form.StringInput(),
'cCGroup': ida_kernwin.Form.ChkGroupControl(("cAnaCmp", "cFolCmp")),
'iFolder': ida_kernwin.Form.DirInput(),
'iRatio': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),
'iSimilarity': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),
'iSimilarityCFG': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),
'iMaxBytesForScore': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_HEX),
})
def OnFormChange(self, fid):
if fid == -1:
self.SetControlValue(self.cLibthunk, True)
self.SetControlValue(self.cAnaExp, True)
self.SetControlValue(self.cAnaCmp, True)
self.SetControlValue(self.rCompare, True)
self.EnableField(self.cEGroup, False)
self.EnableField(self.iPrefix, False)
self.EnableField(self.cCGroup, True)
self.EnableField(self.iSimilarity, True)
self.EnableField(self.iSimilarityCFG, True)
self.EnableField(self.iMaxBytesForScore, True)
self.EnableField(self.iRatio, True)
if fid == self.rExport.id:
self.EnableField(self.cEGroup, True)
self.EnableField(self.iPrefix, True)
self.EnableField(self.cCGroup, False)
self.EnableField(self.iSimilarity, False)
self.EnableField(self.iSimilarityCFG, False)
self.EnableField(self.iMaxBytesForScore, False)
self.EnableField(self.iRatio, False)
elif fid == self.rCompare.id:
self.EnableField(self.cEGroup, False)
self.EnableField(self.iPrefix, False)
self.EnableField(self.cCGroup, True)
self.EnableField(self.iSimilarity, True)
self.EnableField(self.iSimilarityCFG, True)
self.EnableField(self.iMaxBytesForScore, True)
self.EnableField(self.iRatio, True)
return 1
class FnFuzzy(object):
def __init__(self, f_debug, db_path, min_bytes, f_ex_libthunk, f_update, f_ana_exp, ana_pre, f_ana_cmp = False, f_fol_cmp = False, ana_fol='', threshold = None, threshold_cfg = None, max_bytes_for_score = None, ratio = 0):
self.f_debug = f_debug
self.conn = sqlite3.connect(db_path)
self.cur = self.conn.cursor()
self.init_db()
self.in_memory_db()
self.min_bytes = min_bytes
self.f_ex_libthunk = f_ex_libthunk
# for export
self.f_update = f_update
self.f_ana_exp = f_ana_exp
self.ana_pre = ana_pre
if f_ana_exp:
self.ana_pat = re.compile(self.ana_pre)
# for compare
self.f_ana_cmp = f_ana_cmp
self.f_fol_cmp = f_fol_cmp
self.ana_fol = ana_fol
self.threshold = threshold
self.threshold_cfg = threshold_cfg
self.max_bytes_for_score = max_bytes_for_score
self.ratio = float(ratio)
self.idb_path = get_idb_path()
self.sha256 = ida_nalt.retrieve_input_file_sha256()
try:
#self.sha256 = self.sha256.lower()
self.sha256 = self.sha256.hex()
self.md5 = ida_nalt.retrieve_input_file_md5().lower()
except AttributeError:
message = 'ida_nalt.retrieve_input_file_sha256() returned None. Probably the IDB was generated by old IDA (<6.9). Check the version by ida_netnode.cvar.root_node.supstr(ida_nalt.RIDX_IDA_VERSION)'
error(message)
#ida_kernwin.warning(message)
def debug(self, msg):
if self.f_debug:
print("[D] {}".format(msg))
def init_db(self):
self.cur.execute("SELECT * FROM sqlite_master WHERE type='table'")
if self.cur.fetchone() is None:
info('DB initialized')
self.cur.execute("CREATE TABLE IF NOT EXISTS sample(sha256 UNIQUE, path)")
#self.cur.execute("CREATE INDEX sha256_index ON sample(sha256)")
self.cur.execute("CREATE INDEX path_index ON sample(path)")
self.cur.execute("CREATE TABLE IF NOT EXISTS function(sha256, fva, fname, fhd, fhm, f_ana, bsize, ptype, UNIQUE(sha256, fva))")
self.cur.execute("CREATE INDEX f_ana_index ON function(f_ana)")
self.cur.execute("CREATE INDEX bsize_index ON function(bsize)")
def in_memory_db(self): # for SELECT
tempfile = StringIO()
for line in self.conn.iterdump():
tempfile.write("{}\n".format(line))
tempfile.seek(0)
self.mconn = sqlite3.connect(":memory:")
self.mconn.cursor().executescript(tempfile.read())
self.mconn.commit()
self.mconn.row_factory=sqlite3.Row
self.mcur = self.mconn.cursor()
def calc_fn_machoc(self, fva, fname): # based on Machoc hash implementation (https://github.com/0x00ach/idadiff)
func = idaapi.get_func(fva)
if type(func) == type(None):
self.debug('{}: ignored due to lack of function object'.format(fname))
return None, None
flow = idaapi.FlowChart(f=func)
cur_hash_rev = ""
addrIds = []
cur_id = 1
for c in range(0,flow.size):
cur_basic = flow.__getitem__(c)
cur_hash_rev += shex(cur_basic.start_ea)+":"
addrIds.append((shex(cur_basic.start_ea),str(cur_id)))
cur_id += 1
addr = cur_basic.start_ea
blockEnd = cur_basic.end_ea
mnem = GetMnem(addr)
while mnem != "":
if mnem == "call": # should be separated into 2 blocks by call
cur_hash_rev += "c,"
addr = NextHead(addr,blockEnd)
mnem = GetMnem(addr)
if addr != BADADDR:
cur_hash_rev += shex(addr)+";"+shex(addr)+":"
addrIds.append((shex(addr),str(cur_id)))
cur_id += 1
else:
addr = NextHead(addr,blockEnd)
mnem = GetMnem(addr)
refs = []
for suc in cur_basic.succs():
refs.append(suc.start_ea)
refs.sort()
refsrev = ""
for ref in refs:
refsrev += shex(ref)+","
if refsrev != "":
refsrev = refsrev[:-1]
cur_hash_rev += refsrev+";"
# change addr to index
for aid in addrIds:
#cur_hash_rev = string.replace(cur_hash_rev,aid[0],aid[1])
cur_hash_rev = cur_hash_rev.replace(aid[0],aid[1])
# calculate machoc hash value
self.debug('{}: CFG = {}'.format(fname, cur_hash_rev))
return mmh3.hash(cur_hash_rev) & 0xFFFFFFFF, cur_id-1
def calc_fn_ssdeep(self, fva, fname):
d2h = b''
for bb in yara_fn.get_basic_blocks(fva):
rule = yara_fn.get_basic_block_rule(bb)
if rule:
chk = rule.cut_bytes_for_hash
if len(chk) < yara_fn.MIN_BB_BYTE_COUNT:
continue
d2h += chk.encode()
#self.debug('chunk at {:#x}: {}'.format(bb.va, get_hex_pat(chk)))
#self.debug('total func seq at {:#x}: {}'.format(fva, get_hex_pat(d2h)))
if len(d2h) < self.min_bytes:
self.debug('{}: ignored because of the number of extracted code bytes {}'.format(fname, len(d2h)))
return None, None
result_buffer = ctypes.create_string_buffer(FUZZY_MAX_RESULT)
file_buffer = ctypes.create_string_buffer(d2h)
hash_result = fuzzy_lib.fuzzy_hash_buf(file_buffer, len(file_buffer) - 1, result_buffer)
hash_value = result_buffer.value.decode("ascii")
return hash_value, len(d2h)
def existed(self):
self.mcur.execute("SELECT sha256 FROM sample WHERE sha256 = ?", (self.sha256,))
if self.mcur.fetchone() is None:
return False
else:
return True
def exclude_libthunk(self, fva, fname):
if self.f_ex_libthunk:
flags = get_func_attr(fva, FUNCATTR_FLAGS)
if flags & FUNC_LIB:
self.debug('{}: ignored because of library function'.format(fname))
return True
if flags & FUNC_THUNK:
self.debug('{}: ignored because of thunk function'.format(fname))
return True
return False
def export(self):
if self.existed() and not self.f_update:
info('{}: The sample records are present in DB. skipped.'.format(self.sha256))
return False
self.cur.execute("REPLACE INTO sample values(?, ?)", (self.sha256, self.idb_path))
pnum = tnum = 0
records = []
for fva in idautils.Functions():
fname = get_func_name(fva)
tnum += 1
if self.exclude_libthunk(fva, fname):
continue
fhd, bsize = self.calc_fn_ssdeep(fva, fname)
fhm, cfgnum = self.calc_fn_machoc(fva, fname)
if fhd and fhm:
pnum += 1
f_ana = bool(self.ana_pat.search(fname)) if self.f_ana_exp else False
tinfo = idaapi.tinfo_t()
idaapi.get_tinfo(fva, tinfo)
ptype = idaapi.print_tinfo('', 0, 0, idaapi.PRTYPE_1LINE, tinfo, fname, '')
ptype = ptype + ';' if ptype is not None else ptype
# fva is 64-bit int causing OverflowError
records.append((self.sha256, '{:#x}'.format(fva), fname, fhd, fhm, f_ana, bsize, ptype))
self.debug('EXPORT {} at {:#x}: ssdeep={} (size={}), machoc={} (num of CFG={})'.format(fname, fva, fhd, bsize, fhm, cfgnum))
self.cur.executemany("REPLACE INTO function values (?, ?, ?, ?, ?, ?, ?, ?)", records)
success ('{} of {} functions exported'.format(pnum, tnum))
return True
def compare(self):
res = defaultdictRecurse()
if self.f_fol_cmp:
self.mcur.execute("SELECT sha256,path FROM sample WHERE path LIKE ?", (self.ana_fol+'%',))
else:
self.mcur.execute("SELECT sha256,path FROM sample")
frows = self.mcur.fetchall()
num_of_samples = len(frows)
for sha256, path in frows:
res[sha256]['path'] = path
res[sha256]['mcnt'].default_factory = lambda: 0
#sql = "SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE f_ana == 1 AND bsize BETWEEN ? AND ?" if self.f_ana_cmp else "SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE bsize BETWEEN ? AND ?"
sql = "SELECT function.sha256,fname,fhd,fhm,f_ana,ptype FROM function INNER JOIN sample on function.sha256 == sample.sha256 WHERE path LIKE ? AND " if self.f_fol_cmp else "SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE "
sql += "f_ana == 1 AND bsize BETWEEN ? AND ?" if self.f_ana_cmp else "bsize BETWEEN ? AND ?"
fns = list(idautils.Functions())
for fva in tqdm(fns, desc='comparing functions'):
fname = get_func_name(fva)
if self.exclude_libthunk(fva, fname) or not num_of_samples:
continue
pfhd, pbsize = self.calc_fn_ssdeep(fva, fname)
pfhm, pcfgnum = self.calc_fn_machoc(fva, fname)
if pfhd and pfhm:
pbuf = ctypes.create_string_buffer(pfhd.encode())
self.debug('COMPARE {}: ssdeep={} (size={}), machoc={} (num of bb={})'.format(fname, pfhd, pbsize, pfhm, pcfgnum))
min_ = pbsize * (1 - (self.ratio / 100))
max_ = pbsize * (1 + (self.ratio / 100))
self.debug('min={}, max={}'.format(min_, max_))
if self.f_fol_cmp:
self.mcur.execute(sql, (self.ana_fol+'%', min_, max_))
else:
self.mcur.execute(sql, (min_, max_))
frows = self.mcur.fetchall()
self.debug('targeted {} records'.format(len(frows)))
for sha256, sfname, sfhd, sfhm, sf_ana, sptype in frows:
if sha256 == self.sha256: # skip the self
continue
res[sha256]['mfn'][fva].default_factory = lambda: 0
sbuf = ctypes.create_string_buffer(sfhd.encode())
score = fuzzy_lib.fuzzy_compare(pbuf, sbuf)
dbg_cond = g_dbg_flag and fva == g_dbg_fva and sfname == g_dbg_fname and sha256 == g_dbg_sha256
if dbg_cond:
print(('{:#x}: compared with {} in {} score = {} machoc match = {}'.format(fva, sfname, sha256, score, bool(pfhm == sfhm))))
if (score >= self.threshold) or (score >= self.threshold_cfg and pfhm == sfhm) or (pbsize > self.max_bytes_for_score and pfhm == sfhm):
if dbg_cond:
print(('{:#x}: counting {} in {} for total number'.format(fva, sfname, sha256)))
res[sha256]['mcnt']['total'] += 1
if sf_ana:
res[sha256]['mcnt']['analyzed'] += 1
if score > res[sha256]['mfn'][fva]['score'] or (res[sha256]['mfn'][fva]['score'] == 0 and pbsize > self.max_bytes_for_score):
res[sha256]['mfn'][fva]['score'] = score
res[sha256]['mfn'][fva]['cfg_match'] = bool(pfhm == sfhm)
res[sha256]['mfn'][fva]['sfname'] = sfname
res[sha256]['mfn'][fva]['sptype'] = sptype
res[sha256]['mfn'][fva]['pbsize'] = pbsize
if dbg_cond:
print(('{:#x}: appended record = {} in {}'.format(fva, sfname, sha256)))
c = SummaryCh("fn_fuzzy summary", res)
c.Show()
success('totally {} samples compared'.format(num_of_samples))
def close(self):
self.conn.commit()
self.cur.close()
def info(msg):
print("[*] {}".format(msg))
def success(msg):
print("[+] {}".format(msg))
def error(msg):
print("[!] {}".format(msg))
def get_hex_pat(buf):
# get hex pattern
return ' '.join(['{:02x}'.format(ord(x)) for x in buf])
def shex(a):
return hex(a).rstrip("L")
def set_decomplier_cmt(ea, cmt):
cfunc = idaapi.decompile(ea)
tl = idaapi.treeloc_t()
tl.ea = ea
tl.itp = idaapi.ITP_SEMI
if cfunc:
cfunc.set_user_cmt(tl, cmt)
cfunc.save_user_cmts()
else:
error("Decompile failed: {:#x}".formart(ea))
def main():
info('start')
if idaapi.get_plugin_options("fn_fuzzy"): # CLI (export only)
# not change the database to maintain the window setting
process_config_line("ABANDON_DATABASE=YES")
start = time.time()
options = idaapi.get_plugin_options("fn_fuzzy").split(':')
#print options
min_bytes = int(options[0])
f_ex_libthunk = eval(options[1])
f_update = eval(options[2])
f_ana_exp = eval(options[3])
ana_pre = options[4]
db_path = ':'.join(options[5:])
ff = FnFuzzy(False, db_path, min_bytes, f_ex_libthunk, f_update, f_ana_exp, ana_pre)
res = ff.export()
ff.close()
elapsed = time.time() - start
info('done (CLI)')
if res: # return code 1 is reserved for error
qexit(0)
else:
qexit(2) # already exported (skipped)
else:
f = FnFuzzyForm()
f.Compile()
f.iDBSave.value = g_db_path
f.iMinBytes.value = g_min_bytes
f.iPrefix.value = g_analyzed_prefix
f.iFolder.value = os.path.dirname(get_idb_path())
f.iSimilarity.value = g_threshold
f.iSimilarityCFG.value = g_threshold_cfg
f.iMaxBytesForScore.value = g_max_bytes_for_score
f.iRatio.value = g_bsize_ratio
r = f.Execute()
if r == 1: # Run
start = time.time()
ff = FnFuzzy(f.cDebug.checked, f.iDBSave.value, f.iMinBytes.value, f.cLibthunk.checked, f.cUpdate.checked, f.cAnaExp.checked, f.iPrefix.value, f.cAnaCmp.checked, f.cFolCmp.checked, f.iFolder.value, f.iSimilarity.value, f.iSimilarityCFG.value, f.iMaxBytesForScore.value, f.iRatio.value)
if f.rExport.selected:
if ff.sha256 is None:
print('aborted')
return
ff.export()
#cProfile.runctx('ff.export()', None, locals())
else:
ff.compare()
#cProfile.runctx('ff.compare()', None, locals())
ff.close()
elapsed = time.time() - start
else:
print('canceled')
return
info('elapsed time = {} sec'.format(elapsed))
info('done')
if __name__ == '__main__':
main()
================================================
FILE: fn_fuzzy/fn_fuzzy_7x.py
================================================
# fn_fuzzy.py - IDAPython script for fast multiple binary diffing triage
# Takahiro Haruyama (@cci_forensics)
import os, ctypes, sqlite3, re, time, sys, subprocess
import cProfile
from collections import defaultdict
from pprint import PrettyPrinter
from io import StringIO
from tqdm import tqdm
from idc import *
import idautils, ida_nalt, ida_kernwin, idaapi, ida_expr, ida_typeinf
import mmh3
import yara_fn_7x # modified version in the same folder
g_db_path = r'C:\analysisw\tics\fn_fuzzy.sqlite' # plz edit your path
g_min_bytes = 0x10 # minimum number of extracted code bytes per function
g_analyzed_prefix = r'fn_|func_' # analyzed function name prefix (regex)
g_threshold = 50 # function similarity score threshold without CFG match
g_threshold_cfg = 10 # function similarity score threshold with CFG match
g_max_bytes_for_score = 0x100 # more code bytes are evaluated by only CFG match
g_bsize_ratio = 40 # function binary size correction ratio to compare (40 is enough)
# debug purpose to check one function matching
g_dbg_flag = False
g_dbg_fva = 0x180015978
g_dbg_fname = 'fn_blob_get_word_param_and_seek'
g_dbg_sha256 = ''
# initialization for ssdeep
SPAMSUM_LENGTH = 64
FUZZY_MAX_RESULT = (2 * SPAMSUM_LENGTH + 20)
dirpath = os.path.dirname(__file__)
_lib_path = os.path.join(dirpath, 'fuzzy64.dll')
fuzzy_lib = ctypes.cdll.LoadLibrary(_lib_path)
g_dump_types_path = os.path.join(dirpath, 'dump_types.py')
class defaultdictRecurse(defaultdict):
def __init__(self):
self.default_factory = type(self)
class import_handler_t(ida_kernwin.action_handler_t):
def __init__(self, items, idb_path, title):
ida_kernwin.action_handler_t.__init__(self)
self.items = items
self.idb_path = idb_path
self.title = title
def import_types(self):
idc_path = os.path.splitext(self.idb_path)[0] + '.idc'
# dump type information from the 2nd idb
if not (os.path.exists(idc_path)):
with open(self.idb_path, 'rb') as f:
sig = f.read(4)
ida = 'ida.exe' if sig == 'IDA1' else 'ida64.exe'
ida_path = os.path.join(idadir(), ida)
cmd = [ida_path, '-S{}'.format(g_dump_types_path), self.idb_path]
#print cmd
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
if proc.returncode == 0:
success('{}: type information successfully dumped'.format(self.idb_path))
else:
error('{}: type information dumping failed'.format(self.idb_path))
return False
# import the type information
idc_path = os.path.splitext(self.idb_path)[0] + '.idc'
ida_expr.exec_idc_script(None, str(idc_path), "main", None, 0)
return True
def activate(self, ctx):
sel = []
for idx in ctx.chooser_selection:
# rename the function
ea = get_name_ea_simple(self.items[idx][2])
sfname = str(self.items[idx][4])
#set_name(ea, sfname)
ida_name.force_name(ea, sfname)
success('{:#x}: renamed to {}'.format(ea, sfname))
# set the function prototype
sptype = str(self.items[idx][5])
if sptype != 'None':
tinfo = idaapi.tinfo_t()
idaapi.parse_decl2(idaapi.cvar.idati, sptype, tinfo, 0)
#idaapi.apply_callee_tinfo(ea, tinfo)
if idaapi.apply_tinfo(ea, tinfo, 0):
success('{:#x}: function prototype set to {}'.format(ea, sptype))
else:
error('{:#x}: function prototype set FAILED (maybe you should import the types?)'.format(ea))
if ask_yn(0, 'Do you import types from the secondary idb?') == 1:
if self.import_types():
tinfo = idaapi.tinfo_t()
idaapi.parse_decl2(idaapi.cvar.idati, sptype, tinfo, 0)
if idaapi.apply_tinfo(ea, tinfo, 0):
success('{:#x}: function prototype set to {}'.format(ea, sptype))
else:
error('{:#x}: function prototype set FAILED again'.format(ea))
# insert the comment
score = self.items[idx][0]
mmatch = self.items[idx][1]
cmt = 'fn_fuzzy: ssdeep={}, machoc={}'.format(score, mmatch)
set_func_cmt(ea, cmt, 1)
#set_decomplier_cmt(ea, cmt) # not sure how to avoid orphan comment
# update the Choose rows
ida_kernwin.refresh_chooser(self.title)
def update(self, ctx):
return idaapi.AST_ENABLE_ALWAYS
'''
return ida_kernwin.AST_ENABLE_FOR_WIDGET \
if ida_kernwin.is_chooser_widget(ctx.widget_type) \
else ida_kernwin.AST_DISABLE_FOR_WIDGET
'''
class FnCh(ida_kernwin.Choose):
def __init__(self, title, mfn, idb_path):
self.mfn = mfn
self.idb_path = idb_path
self.title = title
ida_kernwin.Choose.__init__(
self,
title,
[
["ssdeep score", 10 | ida_kernwin.Choose.CHCOL_DEC],
["machoc matched", 10 | ida_kernwin.Choose.CHCOL_PLAIN],
["primary function", 30 | ida_kernwin.Choose.CHCOL_PLAIN],
["primary bsize", 10 | ida_kernwin.Choose.CHCOL_DEC],
["secondary analyzed function", 30 | ida_kernwin.Choose.CHCOL_PLAIN],
["secondary prototype", 40 | ida_kernwin.Choose.CHCOL_PLAIN]
],
flags = ida_kernwin.Choose.CH_MULTI)
def OnInit(self):
self.items = []
for fva,v in sorted(list(self.mfn.items()), key=lambda x:x[1]['score'], reverse=True):
if v['sfname']:
self.items.append(['{}'.format(v['score']), '{}'.format(v['cfg_match']), str(get_name(fva)), '{}'.format(v['pbsize']), str(v['sfname']), '{}'.format(v['sptype'])])
return True
def OnPopup(self, form, popup_handle):
actname = "choose:actFnFuzzyImport"
desc = ida_kernwin.action_desc_t(actname, 'Import function name and prototype', import_handler_t(self.items, self.idb_path, self.title))
ida_kernwin.attach_dynamic_action_to_popup(form, popup_handle, desc)
def OnGetSize(self):
return len(self.items)
def OnGetLine(self, n):
return self.items[n]
def OnSelectLine(self, n):
idx = n[0] # due to CH_MULTI
ida_kernwin.jumpto(get_name_ea_simple(self.items[idx][2]))
def OnRefresh(self, n):
self.OnInit()
# try to preserve the cursor
#return [ida_kernwin.Choose.ALL_CHANGED] + self.adjust_last_item(n)
#return n
return None
def OnClose(self):
print("closed ", self.title)
class SummaryCh(ida_kernwin.Choose):
def __init__(self, title, res):
self.res = res
ida_kernwin.Choose.__init__(
self,
title,
[ ["SHA256", 20 | ida_kernwin.Choose.CHCOL_PLAIN],
["total similar functions", 20 | ida_kernwin.Choose.CHCOL_DEC],
["analyzed similar functions", 20 | ida_kernwin.Choose.CHCOL_DEC],
["IDB path", 80 | ida_kernwin.Choose.CHCOL_PATH] ])
self.items = []
def OnInit(self):
for sha256,v in sorted(list(self.res.items()), key=lambda x:x[1]['mcnt']['total'], reverse=True):
if v['mcnt']['total'] > 0:
self.items.append([str(sha256), '{}'.format(v['mcnt']['total']), '{}'.format(v['mcnt']['analyzed']), str(v['path'])])
return True
def OnGetSize(self):
return len(self.items)
def OnGetLine(self, n):
return self.items[n]
def OnSelectLine(self, n):
sha256 = self.items[n][0]
c = FnCh("similarities with {}(snip)".format(sha256[:8]), self.res[sha256]['mfn'], self.res[sha256]['path'])
c.Show()
def OnRefresh(self, n):
return n
def OnClose(self):
print("closed ", self.title)
class FnFuzzyForm(ida_kernwin.Form):
def __init__(self):
ida_kernwin.Form.__init__(self,
r"""BUTTON YES* Run
BUTTON CANCEL Cancel
fn_fuzzy
{FormChangeCb}
General Options
<DB file path:{iDBSave}>
<minimum function code size:{iMinBytes}>
<exclude library/thunk functions:{cLibthunk}>
<enable debug messages:{cDebug}>{cGroup}>
<##Commands##Export:{rExport}>
<Compare:{rCompare}>{rGroup}>
Export Options
<update the DB records:{cUpdate}>
<store flags as analyzed functions:{cAnaExp}>{cEGroup}>
<analyzed function name prefix/suffix (regex):{iPrefix}>
Compare Options
<compare with only analyzed functions:{cAnaCmp}>
<compare with only IDBs in the specified folder:{cFolCmp}>{cCGroup}>
<the folder path:{iFolder}>
<function code size comparison criteria (0-100):{iRatio}>
<function similarity score threshold (0-100) without CFG match:{iSimilarity}>
<function similarity score threshold (0-100) with CFG match:{iSimilarityCFG}>
<function code size threshold evaluated by only CFG match:{iMaxBytesForScore}>
""",
{
'FormChangeCb': ida_kernwin.Form.FormChangeCb(self.OnFormChange),
'cGroup': ida_kernwin.Form.ChkGroupControl(("cLibthunk", "cDebug")),
'iDBSave': ida_kernwin.Form.FileInput(save=True),
'iMinBytes': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_HEX),
'rGroup': ida_kernwin.Form.RadGroupControl(("rCompare", "rExport")),
'cEGroup': ida_kernwin.Form.ChkGroupControl(("cUpdate", "cAnaExp")),
'iPrefix': ida_kernwin.Form.StringInput(),
'cCGroup': ida_kernwin.Form.ChkGroupControl(("cAnaCmp", "cFolCmp")),
'iFolder': ida_kernwin.Form.DirInput(),
'iRatio': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),
'iSimilarity': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),
'iSimilarityCFG': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_DEC),
'iMaxBytesForScore': ida_kernwin.Form.NumericInput(tp=ida_kernwin.Form.FT_HEX),
})
def OnFormChange(self, fid):
if fid == -1:
self.SetControlValue(self.cLibthunk, True)
self.SetControlValue(self.cAnaExp, True)
self.SetControlValue(self.cAnaCmp, True)
self.SetControlValue(self.rCompare, True)
self.EnableField(self.cEGroup, False)
self.EnableField(self.iPrefix, False)
self.EnableField(self.cCGroup, True)
self.EnableField(self.iSimilarity, True)
self.EnableField(self.iSimilarityCFG, True)
self.EnableField(self.iMaxBytesForScore, True)
self.EnableField(self.iRatio, True)
if fid == self.rExport.id:
self.EnableField(self.cEGroup, True)
self.EnableField(self.iPrefix, True)
self.EnableField(self.cCGroup, False)
self.EnableField(self.iSimilarity, False)
self.EnableField(self.iSimilarityCFG, False)
self.EnableField(self.iMaxBytesForScore, False)
self.EnableField(self.iRatio, False)
elif fid == self.rCompare.id:
self.EnableField(self.cEGroup, False)
self.EnableField(self.iPrefix, False)
self.EnableField(self.cCGroup, True)
self.EnableField(self.iSimilarity, True)
self.EnableField(self.iSimilarityCFG, True)
self.EnableField(self.iMaxBytesForScore, True)
self.EnableField(self.iRatio, True)
return 1
class FnFuzzy(object):
def __init__(self, f_debug, db_path, min_bytes, f_ex_libthunk, f_update, f_ana_exp, ana_pre, f_ana_cmp = False, f_fol_cmp = False, ana_fol='', threshold = None, threshold_cfg = None, max_bytes_for_score = None, ratio = 0):
self.f_debug = f_debug
self.conn = sqlite3.connect(db_path)
self.cur = self.conn.cursor()
self.init_db()
self.in_memory_db()
self.min_bytes = min_bytes
self.f_ex_libthunk = f_ex_libthunk
# for export
self.f_update = f_update
self.f_ana_exp = f_ana_exp
self.ana_pre = ana_pre
if f_ana_exp:
self.ana_pat = re.compile(self.ana_pre)
# for compare
self.f_ana_cmp = f_ana_cmp
self.f_fol_cmp = f_fol_cmp
self.ana_fol = ana_fol
self.threshold = threshold
self.threshold_cfg = threshold_cfg
self.max_bytes_for_score = max_bytes_for_score
self.ratio = float(ratio)
self.idb_path = get_idb_path()
self.sha256 = ida_nalt.retrieve_input_file_sha256()
try:
#self.sha256 = self.sha256.lower()
self.sha256 = self.sha256.hex()
self.md5 = ida_nalt.retrieve_input_file_md5().lower()
except AttributeError:
message = 'ida_nalt.retrieve_input_file_sha256() returned None. Probably the IDB was generated by old IDA (<6.9). Check the version by ida_netnode.cvar.root_node.supstr(ida_nalt.RIDX_IDA_VERSION)'
error(message)
#ida_kernwin.warning(message)
def debug(self, msg):
if self.f_debug:
print("[D] {}".format(msg))
def init_db(self):
self.cur.execute("SELECT * FROM sqlite_master WHERE type='table'")
if self.cur.fetchone() is None:
info('DB initialized')
self.cur.execute("CREATE TABLE IF NOT EXISTS sample(sha256 UNIQUE, path)")
#self.cur.execute("CREATE INDEX sha256_index ON sample(sha256)")
self.cur.execute("CREATE INDEX path_index ON sample(path)")
self.cur.execute("CREATE TABLE IF NOT EXISTS function(sha256, fva, fname, fhd, fhm, f_ana, bsize, ptype, UNIQUE(sha256, fva))")
self.cur.execute("CREATE INDEX f_ana_index ON function(f_ana)")
self.cur.execute("CREATE INDEX bsize_index ON function(bsize)")
def in_memory_db(self): # for SELECT
tempfile = StringIO()
for line in self.conn.iterdump():
tempfile.write("{}\n".format(line))
tempfile.seek(0)
self.mconn = sqlite3.connect(":memory:")
self.mconn.cursor().executescript(tempfile.read())
self.mconn.commit()
self.mconn.row_factory=sqlite3.Row
self.mcur = self.mconn.cursor()
def calc_fn_machoc(self, fva, fname): # based on Machoc hash implementation (https://github.com/0x00ach/idadiff)
func = idaapi.get_func(fva)
if type(func) == type(None):
self.debug('{}: ignored due to lack of function object'.format(fname))
return None, None
flow = idaapi.FlowChart(f=func)
cur_hash_rev = ""
addrIds = []
cur_id = 1
for c in range(0,flow.size):
cur_basic = flow.__getitem__(c)
cur_hash_rev += shex(cur_basic.start_ea)+":"
addrIds.append((shex(cur_basic.start_ea),str(cur_id)))
cur_id += 1
addr = cur_basic.start_ea
blockEnd = cur_basic.end_ea
mnem = idc.print_insn_mnem(addr)
while mnem != "":
if mnem == "call": # should be separated into 2 blocks by call
cur_hash_rev += "c,"
addr = idc.next_head(addr,blockEnd)
mnem = idc.print_insn_mnem(addr)
if addr != BADADDR:
cur_hash_rev += shex(addr)+";"+shex(addr)+":"
addrIds.append((shex(addr),str(cur_id)))
cur_id += 1
else:
addr = idc.next_head(addr,blockEnd)
mnem = idc.print_insn_mnem(addr)
refs = []
for suc in cur_basic.succs():
refs.append(suc.start_ea)
refs.sort()
refsrev = ""
for ref in refs:
refsrev += shex(ref)+","
if refsrev != "":
refsrev = refsrev[:-1]
cur_hash_rev += refsrev+";"
# change addr to index
for aid in addrIds:
#cur_hash_rev = string.replace(cur_hash_rev,aid[0],aid[1])
cur_hash_rev = cur_hash_rev.replace(aid[0],aid[1])
# calculate machoc hash value
self.debug('{}: CFG = {}'.format(fname, cur_hash_rev))
return mmh3.hash(cur_hash_rev) & 0xFFFFFFFF, cur_id-1
def calc_fn_ssdeep(self, fva, fname):
d2h = b''
for bb in yara_fn_7x.get_basic_blocks(fva):
rule = yara_fn_7x.get_basic_block_rule(bb)
if rule:
chk = rule.cut_bytes_for_hash
if len(chk) < yara_fn_7x.MIN_BB_BYTE_COUNT:
continue
d2h += chk.encode()
#self.debug('chunk at {:#x}: {}'.format(bb.va, get_hex_pat(chk)))
#self.debug('total func seq at {:#x}: {}'.format(fva, get_hex_pat(d2h)))
if len(d2h) < self.min_bytes:
self.debug('{}: ignored because of the number of extracted code bytes {}'.format(fname, len(d2h)))
return None, None
result_buffer = ctypes.create_string_buffer(FUZZY_MAX_RESULT)
file_buffer = ctypes.create_string_buffer(d2h)
hash_result = fuzzy_lib.fuzzy_hash_buf(file_buffer, len(file_buffer) - 1, result_buffer)
hash_value = result_buffer.value.decode("ascii")
return hash_value, len(d2h)
def existed(self):
self.mcur.execute("SELECT sha256 FROM sample WHERE sha256 = ?", (self.sha256,))
if self.mcur.fetchone() is None:
return False
else:
return True
def exclude_libthunk(self, fva, fname):
if self.f_ex_libthunk:
flags = get_func_attr(fva, FUNCATTR_FLAGS)
if flags & FUNC_LIB:
self.debug('{}: ignored because of library function'.format(fname))
return True
if flags & FUNC_THUNK:
self.debug('{}: ignored because of thunk function'.format(fname))
return True
return False
def export(self):
if self.existed() and not self.f_update:
info('{}: The sample records are present in DB. skipped.'.format(self.sha256))
return False
self.cur.execute("REPLACE INTO sample values(?, ?)", (self.sha256, self.idb_path))
pnum = tnum = 0
records = []
for fva in idautils.Functions():
fname = get_func_name(fva)
tnum += 1
if self.exclude_libthunk(fva, fname):
continue
fhd, bsize = self.calc_fn_ssdeep(fva, fname)
fhm, cfgnum = self.calc_fn_machoc(fva, fname)
if fhd and fhm:
pnum += 1
f_ana = bool(self.ana_pat.search(fname)) if self.f_ana_exp else False
#tinfo = idaapi.tinfo_t()
#idc.get_tinfo(fva, tinfo)
#tif = ida_typeinf.tinfo_t()
#tinfo = idc.get_tinfo(fva)
#ptype = idaapi.print_tinfo('', 0, 0, idaapi.PRTYPE_1LINE, tinfo, fname, '')
ptype = ida_typeinf.idc_get_type(fva)
ptype = ptype + ';' if ptype is not None else ptype
# fva is 64-bit int causing OverflowError
records.append((self.sha256, '{:#x}'.format(fva), fname, fhd, fhm, f_ana, bsize, ptype))
self.debug('EXPORT {} at {:#x}: ssdeep={} (size={}), machoc={} (num of CFG={})'.format(fname, fva, fhd, bsize, fhm, cfgnum))
self.cur.executemany("REPLACE INTO function values (?, ?, ?, ?, ?, ?, ?, ?)", records)
success ('{} of {} functions exported'.format(pnum, tnum))
return True
def compare(self):
res = defaultdictRecurse()
if self.f_fol_cmp:
self.mcur.execute("SELECT sha256,path FROM sample WHERE path LIKE ?", (self.ana_fol+'%',))
else:
self.mcur.execute("SELECT sha256,path FROM sample")
frows = self.mcur.fetchall()
num_of_samples = len(frows)
for sha256, path in frows:
res[sha256]['path'] = path
res[sha256]['mcnt'].default_factory = lambda: 0
#sql = "SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE f_ana == 1 AND bsize BETWEEN ? AND ?" if self.f_ana_cmp else "SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE bsize BETWEEN ? AND ?"
sql = "SELECT function.sha256,fname,fhd,fhm,f_ana,ptype FROM function INNER JOIN sample on function.sha256 == sample.sha256 WHERE path LIKE ? AND " if self.f_fol_cmp else "SELECT sha256,fname,fhd,fhm,f_ana,ptype FROM function WHERE "
sql += "f_ana == 1 AND bsize BETWEEN ? AND ?" if self.f_ana_cmp else "bsize BETWEEN ? AND ?"
fns = list(idautils.Functions())
for fva in tqdm(fns, desc='comparing functions'):
fname = get_func_name(fva)
if self.exclude_libthunk(fva, fname) or not num_of_samples:
continue
pfhd, pbsize = self.calc_fn_ssdeep(fva, fname)
pfhm, pcfgnum = self.calc_fn_machoc(fva, fname)
if pfhd and pfhm:
pbuf = ctypes.create_string_buffer(pfhd.encode())
self.debug('COMPARE {}: ssdeep={} (size={}), machoc={} (num of bb={})'.format(fname, pfhd, pbsize, pfhm, pcfgnum))
min_ = pbsize * (1 - (self.ratio / 100))
max_ = pbsize * (1 + (self.ratio / 100))
self.debug('min={}, max={}'.format(min_, max_))
if self.f_fol_cmp:
self.mcur.execute(sql, (self.ana_fol+'%', min_, max_))
else:
self.mcur.execute(sql, (min_, max_))
frows = self.mcur.fetchall()
self.debug('targeted {} records'.format(len(frows)))
for sha256, sfname, sfhd, sfhm, sf_ana, sptype in frows:
if sha256 == self.sha256: # skip the self
continue
res[sha256]['mfn'][fva].default_factory = lambda: 0
sbuf = ctypes.create_string_buffer(sfhd.encode())
score = fuzzy_lib.fuzzy_compare(pbuf, sbuf)
dbg_cond = g_dbg_flag and fva == g_dbg_fva and sfname == g_dbg_fname and sha256 == g_dbg_sha256
if dbg_cond:
print(('{:#x}: compared with {} in {} score = {} machoc match = {}'.format(fva, sfname, sha256, score, bool(pfhm == sfhm))))
if (score >= self.threshold) or (score >= self.threshold_cfg and pfhm == sfhm) or (pbsize > self.max_bytes_for_score and pfhm == sfhm):
if dbg_cond:
print(('{:#x}: counting {} in {} for total number'.format(fva, sfname, sha256)))
res[sha256]['mcnt']['total'] += 1
if sf_ana:
res[sha256]['mcnt']['analyzed'] += 1
if score > res[sha256]['mfn'][fva]['score'] or (res[sha256]['mfn'][fva]['score'] == 0 and pbsize > self.max_bytes_for_score):
res[sha256]['mfn'][fva]['score'] = score
res[sha256]['mfn'][fva]['cfg_match'] = bool(pfhm == sfhm)
res[sha256]['mfn'][fva]['sfname'] = sfname
res[sha256]['mfn'][fva]['sptype'] = sptype
res[sha256]['mfn'][fva]['pbsize'] = pbsize
if dbg_cond:
print(('{:#x}: appended record = {} in {}'.format(fva, sfname, sha256)))
c = SummaryCh("fn_fuzzy summary", res)
c.Show()
success('totally {} samples compared'.format(num_of_samples))
def close(self):
self.conn.commit()
self.cur.close()
def info(msg):
print("[*] {}".format(msg))
def success(msg):
print("[+] {}".format(msg))
def error(msg):
print("[!] {}".format(msg))
def get_hex_pat(buf):
# get hex pattern
return ' '.join(['{:02x}'.format(ord(x)) for x in buf])
def shex(a):
return hex(a).rstrip("L")
def set_decomplier_cmt(ea, cmt):
cfunc = idaapi.decompile(ea)
tl = idaapi.treeloc_t()
tl.ea = ea
tl.itp = idaapi.ITP_SEMI
if cfunc:
cfunc.set_user_cmt(tl, cmt)
cfunc.save_user_cmts()
else:
error("Decompile failed: {:#x}".formart(ea))
def main():
info('start')
if idaapi.get_plugin_options("fn_fuzzy"): # CLI (export only)
# not change the database to maintain the window setting
process_config_line("ABANDON_DATABASE=YES")
start = time.time()
options = idaapi.get_plugin_options("fn_fuzzy").split(':')
#print options
min_bytes = int(options[0])
f_ex_libthunk = eval(options[1])
f_update = eval(options[2])
f_ana_exp = eval(options[3])
ana_pre = options[4]
db_path = ':'.join(options[5:])
ff = FnFuzzy(False, db_path, min_bytes, f_ex_libthunk, f_update, f_ana_exp, ana_pre)
res = ff.export()
ff.close()
elapsed = time.time() - start
info('done (CLI)')
if res: # return code 1 is reserved for error
qexit(0)
else:
qexit(2) # already exported (skipped)
else:
f = FnFuzzyForm()
f.Compile()
f.iDBSave.value = g_db_path
f.iMinBytes.value = g_min_bytes
f.iPrefix.value = g_analyzed_prefix
f.iFolder.value = os.path.dirname(get_idb_path())
f.iSimilarity.value = g_threshold
f.iSimilarityCFG.value = g_threshold_cfg
f.iMaxBytesForScore.value = g_max_bytes_for_score
f.iRatio.value = g_bsize_ratio
r = f.Execute()
if r == 1: # Run
start = time.time()
ff = FnFuzzy(f.cDebug.checked, f.iDBSave.value, f.iMinBytes.value, f.cLibthunk.checked, f.cUpdate.checked, f.cAnaExp.checked, f.iPrefix.value, f.cAnaCmp.checked, f.cFolCmp.checked, f.iFolder.value, f.iSimilarity.value, f.iSimilarityCFG.value, f.iMaxBytesForScore.value, f.iRatio.value)
if f.rExport.selected:
if ff.sha256 is None:
print('aborted')
return
ff.export()
#cProfile.runctx('ff.export()', None, locals())
else:
ff.compare()
#cProfile.runctx('ff.compare()', None, locals())
ff.close()
elapsed = time.time() - start
else:
print('canceled')
return
info('elapsed time = {} sec'.format(elapsed))
info('done')
if __name__ == '__main__':
main()
================================================
FILE: fn_fuzzy/yara_fn.py
================================================
'''
IDAPython script that generates a YARA rule to match against the
basic blocks of the current function. It masks out relocation bytes
and ignores jump instructions (given that we're already trying to
match compiler-specific bytes, this is of arguable benefit).
If python-yara is installed, the IDAPython script also validates that
the generated rule matches at least one segment in the current file.
author: Willi Ballenthin <william.ballenthin@fireeye.com>
'''
# 2018/8/6 Takahiro Haruyama modified to calculate fixup (relocation) size correctly
# and exclude direct memory reference data and other ignorable variable code
import logging
from collections import namedtuple
from idc import *
import idaapi
import idautils
import ida_ua, ida_kernwin
logger = logging.getLogger(__name__)
BasicBlock = namedtuple('BasicBlock', ['va', 'size'])
# each rule must have at least this many non-masked bytes
MIN_BB_BYTE_COUNT = 4
def get_basic_blocks(fva):
'''
return sequence of `BasicBlock` instances for given function.
'''
ret = []
func = idaapi.get_func(fva)
if func is None:
return ret
for bb in idaapi.FlowChart(func):
ret.append(BasicBlock(va=bb.start_ea,
size=bb.end_ea - bb.start_ea))
return ret
def get_function(va):
'''
return va for first instruction in function that contains given va.
'''
return idaapi.get_func(va).start_ea
Rule = namedtuple('Rule', ['name', 'bytes', 'masked_bytes', 'cut_bytes_for_hash'])
def is_jump(va):
'''
return True if the instruction at the given address appears to be a jump.
'''
return print_insn_mnem(va).startswith('j')
def get_fixup_va_and_size(va):
fva = idaapi.get_next_fixup_ea(va)
ftype = get_fixup_target_type(fva)
fsize = ida_fixup.calc_fixup_size(ftype)
return fva, fsize
def get_basic_block_rule(bb):
'''
create and format a YARA rule for a single basic block.
The following bytes are ignored:
- relocation bytes
- the last jump instruction
- direct memory references / immediate values and other igorable data
'''
# fetch the instruction start addresses
insns = []
va = bb.va
while va < bb.va + bb.size:
insns.append(va)
va = next_head(va)
# drop the last instruction if its a jump
if insns and is_jump(insns[-1]):
insns = insns[:-1]
_bytes = []
# `masked_bytes` is the list of formatted bytes,
# not yet join'd for performance.
masked_bytes = []
cut_bytes_for_hash = ''
for va in insns:
insn = ida_ua.insn_t()
size = ida_ua.decode_insn(insn, va)
mnem = insn.get_canon_mnem()
op1 = insn.Op1
op2 = insn.Op2
fixup_byte_addrs = set([])
if idaapi.contains_fixups(va, size): # not work for x64 binaries? (e.g., idaapi.contains_fixups(here(), 0x2d000) -> False)
logging.debug('ea = {:#x}, fixups'.format(va))
# fetch the fixup locations and sizes within this one instruction.
fixups = []
fva, fsize = get_fixup_va_and_size(va)
fixups.append((fva, fsize))
fva += fsize
while fva < va + size:
fva, fsize = get_fixup_va_and_size(fva - 1) # to detect consecutive fixups
fixups.append((fva, fsize))
fva += fsize
logging.debug('fixups: {}'.format(fixups))
# compute the addresses of each component byte.
for fva, fsize in fixups:
for i in range(fva, fva+fsize):
fixup_byte_addrs.add(i)
# fetch and format each byte of the instruction,
# possibly masking it into an unknown byte if its a fixup or several operand types like direct mem ref.
masked_types = [o_mem, o_imm, o_displ, o_near, o_far]
#masked_types = [o_mem, o_imm, o_near, o_far]
bytes_ = get_bytes(va, size)
if bytes_ is None:
return None
for i, byte in enumerate(bytes_):
_bytes.append(byte)
byte_addr = i + va
if byte_addr in fixup_byte_addrs:
logging.debug('{:#x}: fixup byte (masked)'.format(byte_addr))
masked_bytes.append('??')
elif op1.type in masked_types and i >= op1.offb and (i < op2.offb or op2.offb == 0):
logging.debug('{:#x}: Op1 masked byte'.format(byte_addr))
masked_bytes.append('??')
elif op2.type in masked_types and i >= op2.offb:
logging.debug('{:#x}: Op2 masked byte'.format(byte_addr))
masked_bytes.append('??')
else:
masked_bytes.append('%02X' % (byte)) # for Python3
cut_bytes_for_hash += chr(byte)
return Rule('$0x%x' % (bb.va), _bytes, masked_bytes, cut_bytes_for_hash)
def format_rules(fva, rules):
'''
given the address of a function, and the byte signatures for basic blocks in
the function, format a complete YARA rule that matches all of the
basic block signatures.
'''
name = GetFunctionName(fva)
if not rules:
logging.info('no rules for {}'.format(name))
return None
# some characters aren't valid for YARA rule names
safe_name = name
BAD_CHARS = '@ /\\!@#$%^&*()[]{};:\'",./<>?'
for c in BAD_CHARS:
safe_name = safe_name.replace(c, '')
md5 = idautils.GetInputFileMD5()
ret = []
ret.append('rule a_{hash:s}_{name:s} {{'.format(
hash=md5.hex(),
name=safe_name))
ret.append(' meta:')
ret.append(' sample_md5 = "{md5:s}"'.format(md5=md5.hex()))
ret.append(' function_address = "0x{fva:x}"'.format(fva=fva))
ret.append(' function_name = "{name:s}"'.format(name=name))
ret.append(' strings:')
for rule in rules:
formatted_rule = ' '.join(rule.masked_bytes).rstrip('?? ')
ret.append(' {name:s} = {{ {hex:s} }}'.format(
name=rule.name,
hex=formatted_rule))
ret.append(' condition:')
ret.append(' all of them')
ret.append('}')
return '\n'.join(ret)
def create_yara_rule_for_function(fva):
'''
given the address of a function, generate and format a complete YARA rule
that matches the basic blocks.
'''
rules = []
for bb in get_basic_blocks(fva):
rule = get_basic_block_rule(bb)
if rule:
# ensure there at least MIN_BB_BYTE_COUNT
# non-masked bytes in the rule, or ignore it.
# this will reduce the incidence of many very small matches.
unmasked_count = len([b for b in rule.masked_bytes if b != '??'])
if unmasked_count < MIN_BB_BYTE_COUNT:
continue
rules.append(rule)
return format_rules(fva, rules)
def get_segment_buffer(segstart):
'''
fetch the bytes of the section that starts at the given address.
if the entire section cannot be accessed, try smaller regions until it works.
'''
segend = idaapi.getseg(segstart).end_ea
buf = None
segsize = segend - segstart
while buf is None and segsize > 0:
buf = GetManyBytes(segstart, segsize)
if buf is None:
segsize -= 0x1000
return buf
Segment = namedtuple('Segment', ['start', 'size', 'name', 'buf'])
def get_segments():
'''
fetch the segments in the current executable.
'''
for segstart in idautils.Segments():
segend = idaapi.getseg(segstart).end_ea
segsize = segend - segstart
segname = str(SegName(segstart)).rstrip('\x00')
segbuf = get_segment_buffer(segstart)
yield Segment(segstart, segend, segname, segbuf)
class TestDidntRunError(Exception):
pass
def test_yara_rule(rule):
'''
try to match the given rule against each segment in the current exectuable.
raise TestDidntRunError if its not possible to import the YARA library.
return True if there's at least one match, False otherwise.
'''
try:
import yara
except ImportError:
logger.warning("can't test rule: failed to import python-yara")
raise TestDidntRunError('python-yara not available')
r = yara.compile(source=rule)
for segment in get_segments():
if segment.buf is not None:
matches = r.match(data=segment.buf)
if len(matches) > 0:
logger.info('generated rule matches section: {:s}'.format(segment.name))
return True
return False
def main():
print('Start')
ans = ida_kernwin.ask_yn(0, 'define only selected function?')
if ans:
va = ScreenEA()
fva = get_function(va)
print(('-' * 80))
rule = create_yara_rule_for_function(fva)
if rule:
print(rule)
'''
if test_yara_rule(rule):
logging.info('success: validated the generated rule')
else:
logging.error('error: failed to validate generated rule')
'''
else:
for fva in idautils.Functions():
print(('-' * 80))
rule = create_yara_rule_for_function(fva)
if rule:
print(rule)
print('Done')
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
#logging.basicConfig(level=logging.DEBUG)
#logging.getLogger().setLevel(logging.DEBUG)
main()
================================================
FILE: fn_fuzzy/yara_fn_7x.py
================================================
'''
IDAPython script that generates a YARA rule to match against the
basic blocks of the current function. It masks out relocation bytes
and ignores jump instructions (given that we're already trying to
match compiler-specific bytes, this is of arguable benefit).
If python-yara is installed, the IDAPython script also validates that
the generated rule matches at least one segment in the current file.
author: Willi Ballenthin <william.ballenthin@fireeye.com>
'''
# 2018/8/6 Takahiro Haruyama modified to calculate fixup (relocation) size correctly
# and exclude direct memory reference data and other ignorable variable code
import logging
from collections import namedtuple
from idc import *
import idaapi
import idautils
import ida_ua, ida_kernwin
logger = logging.getLogger(__name__)
BasicBlock = namedtuple('BasicBlock', ['va', 'size'])
# each rule must have at least this many non-masked bytes
MIN_BB_BYTE_COUNT = 4
def get_basic_blocks(fva):
'''
return sequence of `BasicBlock` instances for given function.
'''
ret = []
func = idaapi.get_func(fva)
if func is None:
return ret
for bb in idaapi.FlowChart(func):
ret.append(BasicBlock(va=bb.start_ea,
size=bb.end_ea - bb.start_ea))
return ret
def get_function(va):
'''
return va for first instruction in function that contains given va.
'''
return idaapi.get_func(va).start_ea
Rule = namedtuple('Rule', ['name', 'bytes', 'masked_bytes', 'cut_bytes_for_hash'])
def is_jump(va):
'''
return True if the instruction at the given address appears to be a jump.
'''
return print_insn_mnem(va).startswith('j')
def get_fixup_va_and_size(va):
fva = idaapi.get_next_fixup_ea(va)
ftype = get_fixup_target_type(fva)
fsize = ida_fixup.calc_fixup_size(ftype)
return fva, fsize
def get_basic_block_rule(bb):
'''
create and format a YARA rule for a single basic block.
The following bytes are ignored:
- relocation bytes
- the last jump instruction
- direct memory references / immediate values and other igorable data
'''
# fetch the instruction start addresses
insns = []
va = bb.va
while va < bb.va + bb.size:
insns.append(va)
va = next_head(va)
# drop the last instruction if its a jump
if insns and is_jump(insns[-1]):
insns = insns[:-1]
_bytes = []
# `masked_bytes` is the list of formatted bytes,
# not yet join'd for performance.
masked_bytes = []
cut_bytes_for_hash = ''
for va in insns:
insn = ida_ua.insn_t()
size = ida_ua.decode_insn(insn, va)
mnem = insn.get_canon_mnem()
op1 = insn.Op1
op2 = insn.Op2
fixup_byte_addrs = set([])
if idaapi.contains_fixups(va, size): # not work for x64 binaries? (e.g., idaapi.contains_fixups(here(), 0x2d000) -> False)
logging.debug('ea = {:#x}, fixups'.format(va))
# fetch the fixup locations and sizes within this one instruction.
fixups = []
fva, fsize = get_fixup_va_and_size(va)
fixups.append((fva, fsize))
fva += fsize
while fva < va + size:
fva, fsize = get_fixup_va_and_size(fva - 1) # to detect consecutive fixups
fixups.append((fva, fsize))
fva += fsize
logging.debug('fixups: {}'.format(fixups))
# compute the addresses of each component byte.
for fva, fsize in fixups:
for i in range(fva, fva+fsize):
fixup_byte_addrs.add(i)
# fetch and format each byte of the instruction,
# possibly masking it into an unknown byte if its a fixup or several operand types like direct mem ref.
masked_types = [o_mem, o_imm, o_displ, o_near, o_far]
#masked_types = [o_mem, o_imm, o_near, o_far]
bytes_ = get_bytes(va, size)
if bytes_ is None:
return None
for i, byte in enumerate(bytes_):
_bytes.append(byte)
byte_addr = i + va
if byte_addr in fixup_byte_addrs:
logging.debug('{:#x}: fixup byte (masked)'.format(byte_addr))
masked_bytes.append('??')
elif op1.type in masked_types and i >= op1.offb and (i < op2.offb or op2.offb == 0):
logging.debug('{:#x}: Op1 masked byte'.format(byte_addr))
masked_bytes.append('??')
elif op2.type in masked_types and i >= op2.offb:
logging.debug('{:#x}: Op2 masked byte'.format(byte_addr))
masked_bytes.append('??')
else:
masked_bytes.append('%02X' % (byte)) # for Python3
cut_bytes_for_hash += chr(byte)
return Rule('$0x%x' % (bb.va), _bytes, masked_bytes, cut_bytes_for_hash)
def format_rules(fva, rules):
'''
given the address of a function, and the byte signatures for basic blocks in
the function, format a complete YARA rule that matches all of the
basic block signatures.
'''
name = idc.get_func_name(fva)
if not rules:
logging.info('no rules for {}'.format(name))
return None
# some characters aren't valid for YARA rule names
safe_name = name
BAD_CHARS = '@ /\\!@#$%^&*()[]{};:\'",./<>?'
for c in BAD_CHARS:
safe_name = safe_name.replace(c, '')
md5 = idautils.GetInputFileMD5()
ret = []
ret.append('rule a_{hash:s}_{name:s} {{'.format(
hash=md5.hex(),
name=safe_name))
ret.append(' meta:')
ret.append(' sample_md5 = "{md5:s}"'.format(md5=md5.hex()))
ret.append(' function_address = "0x{fva:x}"'.format(fva=fva))
ret.append(' function_name = "{name:s}"'.format(name=name))
ret.append(' strings:')
for rule in rules:
formatted_rule = ' '.join(rule.masked_bytes).rstrip('?? ')
ret.append(' {name:s} = {{ {hex:s} }}'.format(
name=rule.name,
hex=formatted_rule))
ret.append(' condition:')
ret.append(' all of them')
ret.append('}')
return '\n'.join(ret)
def create_yara_rule_for_function(fva):
'''
given the address of a function, generate and format a complete YARA rule
that matches the basic blocks.
'''
rules = []
for bb in get_basic_blocks(fva):
rule = get_basic_block_rule(bb)
if rule:
# ensure there at least MIN_BB_BYTE_COUNT
# non-masked bytes in the rule, or ignore it.
# this will reduce the incidence of many very small matches.
unmasked_count = len([b for b in rule.masked_bytes if b != '??'])
if unmasked_count < MIN_BB_BYTE_COUNT:
continue
rules.append(rule)
return format_rules(fva, rules)
def get_segment_buffer(segstart):
'''
fetch the bytes of the section that starts at the given address.
if the entire section cannot be accessed, try smaller regions until it works.
'''
segend = idaapi.getseg(segstart).end_ea
buf = None
segsize = segend - segstart
while buf is None and segsize > 0:
buf = idc.get_bytes(segstart, segsize)
if buf is None:
segsize -= 0x1000
return buf
Segment = namedtuple('Segment', ['start', 'size', 'name', 'buf'])
def get_segments():
'''
fetch the segments in the current executable.
'''
for segstart in idautils.Segments():
segend = idaapi.getseg(segstart).end_ea
segsize = segend - segstart
segname = str(idc.get_segm_name(segstart)).rstrip('\x00')
segbuf = get_segment_buffer(segstart)
yield Segment(segstart, segend, segname, segbuf)
class TestDidntRunError(Exception):
pass
def test_yara_rule(rule):
'''
try to match the given rule against each segment in the current exectuable.
raise TestDidntRunError if its not possible to import the YARA library.
return True if there's at least one match, False otherwise.
'''
try:
import yara
except ImportError:
logger.warning("can't test rule: failed to import python-yara")
raise TestDidntRunError('python-yara not available')
r = yara.compile(source=rule)
for segment in get_segments():
if segment.buf is not None:
matches = r.match(data=segment.buf)
if len(matches) > 0:
logger.info('generated rule matches section: {:s}'.format(segment.name))
return True
return False
def main():
print('Start')
ans = ida_kernwin.ask_yn(0, 'define only selected function?')
if ans:
va = ida_kernwin.get_screen_ea()
fva = get_function(va)
print(('-' * 80))
rule = create_yara_rule_for_function(fva)
if rule:
print(rule)
'''
if test_yara_rule(rule):
logging.info('success: validated the generated rule')
else:
logging.error('error: failed to validate generated rule')
'''
else:
for fva in idautils.Functions():
print(('-' * 80))
rule = create_yara_rule_for_function(fva)
if rule:
print(rule)
print('Done')
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
#logging.basicConfig(level=logging.DEBUG)
#logging.getLogger().setLevel(logging.DEBUG)
main()
================================================
FILE
gitextract_b2x18g5t/
├── .gitignore
├── ADVobfuscator/
│ ├── README.org
│ └── idapy3_ADVobfuscator_deob.py
├── LICENSE
├── README.org
├── bindiff/
│ ├── README.org
│ ├── bindiff.py
│ ├── bindiff_export.idc
│ ├── save_func_names.py
│ └── save_func_names_7x.py
├── callstrings/
│ ├── README.org
│ ├── hexrays_utils.py
│ ├── ida_callstrings_dbg.py
│ ├── ida_callstrings_flare_emu.py
│ └── ida_callstrings_static.py
├── eset_crackme/
│ ├── README.org
│ ├── loaders/
│ │ └── ida_loader_drv_vm.py
│ └── procs/
│ └── ida_processor_drv_vm.py
├── fn_fuzzy/
│ ├── README.org
│ ├── cli_export.py
│ ├── dump_types.py
│ ├── fn_fuzzy.py
│ ├── fn_fuzzy_7x.py
│ ├── yara_fn.py
│ └── yara_fn_7x.py
└── stackstring_static/
├── README.org
└── stackstring_static.py
SYMBOL INDEX (283 symbols across 17 files)
FILE: ADVobfuscator/idapy3_ADVobfuscator_deob.py
function info (line 31) | def info(msg):
function success (line 34) | def success(msg):
function error (line 37) | def error(msg):
function set_decomplier_cmt (line 40) | def set_decomplier_cmt(ea, cmt):
function add_bookmark (line 54) | def add_bookmark(ea, comment):
function get_emu_range (line 69) | def get_emu_range(ea):
function call_hook (line 81) | def call_hook(address, argv, funcName, userData):
function inst_hook (line 89) | def inst_hook(uc, address, size, userData):
function emulate (line 113) | def emulate(pname, eh, dec_fn, size, key):
function main (line 173) | def main():
FILE: bindiff/bindiff.py
class LocalError (line 37) | class LocalError(Exception): pass
class ProcExportError (line 38) | class ProcExportError(LocalError): pass
class ProcDiffError (line 39) | class ProcDiffError(LocalError): pass
class LoadFuncNamesError (line 40) | class LoadFuncNamesError(LocalError): pass
class FileNotFoundError (line 41) | class FileNotFoundError(LocalError): pass
class ChildProcessError (line 42) | class ChildProcessError(LocalError): pass
class BinDiff (line 44) | class BinDiff(object):
method __init__ (line 46) | def __init__ (self, primary, out_dir, ws_th, fs_th, ins_th, bb_th, siz...
method _dprint (line 89) | def _dprint(self, msg):
method _get_machine_type (line 95) | def _get_machine_type(self, path):
method _files_not_found (line 129) | def _files_not_found(self):
method _get_db_path_noext (line 136) | def _get_db_path_noext(self, target):
method _get_idb_path (line 140) | def _get_idb_path(self, target, arch):
method _get_ida_path (line 149) | def _get_ida_path(self, arch):
method _load_func_names_pyidb (line 155) | def _load_func_names_pyidb(self, idb_path): # exlcude library/thunk fu...
method _load_func_names_default (line 175) | def _load_func_names_default(self, func_regex, path, ida_path):
method _make_BinExport (line 194) | def _make_BinExport(self, target, ida_path):
method _get_BinDiff_path (line 213) | def _get_BinDiff_path(self, secondary):
method _make_BinDiff (line 218) | def _make_BinDiff(self, secondary):
method is_skipped (line 239) | def is_skipped(self, secondary):
method check_similarity (line 271) | def check_similarity(self, secondary, q=None):
method check_similarities (line 331) | def check_similarities(self, secondary_dir, recursively):
method increment_count (line 352) | def increment_count(self):
method get_result (line 355) | def get_result(self):
function main (line 359) | def main():
FILE: bindiff/save_func_names.py
function get_pfuncs (line 7) | def get_pfuncs(ea, track_th):
function main (line 15) | def main():
FILE: bindiff/save_func_names_7x.py
function get_pfuncs (line 8) | def get_pfuncs(ea, track_th):
function main (line 16) | def main():
FILE: callstrings/hexrays_utils.py
function info (line 24) | def info(msg):
function success (line 27) | def success(msg):
function error (line 30) | def error(msg):
function debug (line 33) | def debug(msg):
function extract_ascii (line 38) | def extract_ascii(data):
function extract_unicode (line 42) | def extract_unicode(data):
function get_ctree_root (line 46) | def get_ctree_root(ea, cache=True):
class cnt_val_finder_t (line 60) | class cnt_val_finder_t(ctree_visitor_t):
method __init__ (line 62) | def __init__(self):
method visit_expr (line 68) | def visit_expr(self, expr):
method get_cnt_val (line 102) | def get_cnt_val(self):
class asg_parent_finder_t (line 107) | class asg_parent_finder_t(ctree_visitor_t):
method __init__ (line 109) | def __init__(self, call_ea):
method visit_expr (line 115) | def visit_expr(self, expr):
class my_lvar_modifier_t (line 127) | class my_lvar_modifier_t(user_lvar_modifier_t):
method __init__ (line 129) | def __init__(self, target_name, new_name=None, new_decl=None, new_tif=...
method modify_lvars (line 137) | def modify_lvars(self, lvars):
class HexRaysUtils (line 169) | class HexRaysUtils():
method __init__ (line 171) | def __init__(self):
method get_reg_value (line 177) | def get_reg_value(self, reg_name):
method get_ptr_value (line 181) | def get_ptr_value(self, ptr):
method get_string (line 185) | def get_string(self, ea, is_unicode=False):
method get_bytes (line 188) | def get_bytes(self, ea):
method get_fn_offset (line 191) | def get_fn_offset(self, ea):
method set_decomplier_cmt (line 204) | def set_decomplier_cmt(self, cfunc, ea, cmt):
method GetTypeSignature (line 222) | def GetTypeSignature(self, apiName):
method force_rename_lvar (line 244) | def force_rename_lvar(self, ea, var, new_name):
method get_arg_strings (line 263) | def get_arg_strings(self, address):
method print_summary (line 376) | def print_summary(self):
method decode (line 383) | def decode(self, enc, cst_val):
FILE: callstrings/ida_callstrings_dbg.py
function info (line 15) | def info(msg):
function success (line 18) | def success(msg):
function error (line 21) | def error(msg):
function debug (line 24) | def debug(msg):
class TraceHook (line 29) | class TraceHook(DBG_Hooks, HexRaysUtils):
method __init__ (line 31) | def __init__(self, target_ea):
method get_reg_value (line 40) | def get_reg_value(self, reg_name):
method get_ptr_value (line 44) | def get_ptr_value(self, ptr):
method get_string (line 51) | def get_string(self, ea, is_unicode=False):
method dbg_trace (line 57) | def dbg_trace(self, tid, ea):
method dbg_thread_start (line 86) | def dbg_thread_start(self, pid, tid, ea):
method dbg_thread_exit (line 121) | def dbg_thread_exit(self, pid, tid, ea, exit_code):
method dbg_run_to (line 125) | def dbg_run_to(self, pid, tid=0, ea=0):
method dbg_process_exit (line 138) | def dbg_process_exit(self, pid, tid, ea, code):
function main (line 152) | def main():
FILE: callstrings/ida_callstrings_flare_emu.py
function info (line 28) | def info(msg):
function success (line 31) | def success(msg):
function error (line 34) | def error(msg):
function debug (line 37) | def debug(msg):
function debug_bin (line 41) | def debug_bin(n, v):
class HexRaysEmu (line 47) | class HexRaysEmu(HexRaysUtils):
method __init__ (line 49) | def __init__(self, eh):
method get_reg_value (line 54) | def get_reg_value(self, reg_name):
method get_ptr_value (line 58) | def get_ptr_value(self, ptr):
method get_string (line 62) | def get_string(self, ea, is_unicode=False):
method get_bytes (line 66) | def get_bytes(self, ea):
function call_hook (line 71) | def call_hook(address, argv, funcName, userData):
function mem_write_hook (line 83) | def mem_write_hook(unicornObject, accessType, memAccessAddress, memAcces...
function is_high_entropy (line 94) | def is_high_entropy(v):
function inst_hook_cff (line 112) | def inst_hook_cff(unicornObject, address, instructionSize, userData):
function inst_hook (line 164) | def inst_hook(unicornObject, address, instructionSize, userData):
function noop (line 174) | def noop(*args):
function main (line 178) | def main():
FILE: callstrings/ida_callstrings_static.py
function info (line 14) | def info(msg):
function success (line 17) | def success(msg):
function error (line 20) | def error(msg):
function debug (line 23) | def debug(msg):
class static_decoder_t (line 28) | class static_decoder_t(ctree_visitor_t, HexRaysUtils):
method __init__ (line 30) | def __init__(self, cst_val, cfunc):
method visit_expr (line 38) | def visit_expr(self, expr):
function main (line 64) | def main():
FILE: eset_crackme/loaders/ida_loader_drv_vm.py
function accept_file (line 8) | def accept_file(li, filename):
function int16 (line 15) | def int16(b):
function int32 (line 18) | def int32(b):
function myAddSeg (line 21) | def myAddSeg(startea, endea, base, use32, name, clas):
function load_file (line 32) | def load_file(li, neflags, format):
FILE: eset_crackme/procs/ida_processor_drv_vm.py
class eset_drv_vm_processor_t (line 41) | class eset_drv_vm_processor_t(ida_idp.processor_t):
method notify_get_frame_retsize (line 270) | def notify_get_frame_retsize(self, func_ea):
method notify_get_autocmt (line 279) | def notify_get_autocmt(self, insn):
method notify_is_sane_insn (line 288) | def notify_is_sane_insn(self, insn, no_crefs):
method handle_operand (line 310) | def handle_operand(self, insn, op, isRead):
method notify_emu (line 353) | def notify_emu(self, insn):
method notify_out_operand (line 385) | def notify_out_operand(self, ctx, op):
method notify_out_insn (line 435) | def notify_out_insn(self, ctx):
method fill_reg (line 462) | def fill_reg(self, op, dtype, regno):
method fill_phrase (line 468) | def fill_phrase(self, op, dtype, regno):
method fill_imm (line 474) | def fill_imm(self, op, dtype, val):
method fill_near (line 480) | def fill_near(self, op, dtype, addr):
method fill_mem (line 486) | def fill_mem(self, op, dtype, addr):
method get_next_bytes (line 495) | def get_next_bytes(self, insn, dtype):
method set_operand (line 503) | def set_operand(self, insn, op, type_, regno, dtype):
method notify_ana (line 522) | def notify_ana(self, insn):
method init_instructions (line 595) | def init_instructions(self):
method init_registers (line 609) | def init_registers(self):
method __init__ (line 659) | def __init__(self):
function PROCESSOR_ENTRY (line 667) | def PROCESSOR_ENTRY():
FILE: fn_fuzzy/cli_export.py
class LocalError (line 17) | class LocalError(Exception): pass
class ProcExportError (line 18) | class ProcExportError(LocalError): pass
function info (line 20) | def info(msg):
function success (line 23) | def success(msg):
function error (line 26) | def error(msg):
function init_db (line 29) | def init_db(cur):
function existed (line 40) | def existed(cur, sha256):
function remove (line 47) | def remove(cur, sha256):
function export (line 51) | def export(f_debug, idb_path, outdb, min_, f_ex_libthunk, f_update, f_an...
function list_file (line 103) | def list_file(d):
function list_file_recursive (line 108) | def list_file_recursive(d):
function main (line 113) | def main():
FILE: fn_fuzzy/dump_types.py
function main (line 3) | def main():
FILE: fn_fuzzy/fn_fuzzy.py
class defaultdictRecurse (line 40) | class defaultdictRecurse(defaultdict):
method __init__ (line 41) | def __init__(self):
class import_handler_t (line 44) | class import_handler_t(ida_kernwin.action_handler_t):
method __init__ (line 45) | def __init__(self, items, idb_path, title):
method import_types (line 51) | def import_types(self):
method activate (line 74) | def activate(self, ctx):
method update (line 112) | def update(self, ctx):
class FnCh (line 120) | class FnCh(ida_kernwin.Choose):
method __init__ (line 121) | def __init__(self, title, mfn, idb_path):
method OnInit (line 138) | def OnInit(self):
method OnPopup (line 145) | def OnPopup(self, form, popup_handle):
method OnGetSize (line 150) | def OnGetSize(self):
method OnGetLine (line 153) | def OnGetLine(self, n):
method OnSelectLine (line 156) | def OnSelectLine(self, n):
method OnRefresh (line 160) | def OnRefresh(self, n):
method OnClose (line 167) | def OnClose(self):
class SummaryCh (line 170) | class SummaryCh(ida_kernwin.Choose):
method __init__ (line 171) | def __init__(self, title, res):
method OnInit (line 182) | def OnInit(self):
method OnGetSize (line 188) | def OnGetSize(self):
method OnGetLine (line 191) | def OnGetLine(self, n):
method OnSelectLine (line 194) | def OnSelectLine(self, n):
method OnRefresh (line 199) | def OnRefresh(self, n):
method OnClose (line 202) | def OnClose(self):
class FnFuzzyForm (line 205) | class FnFuzzyForm(ida_kernwin.Form):
method __init__ (line 206) | def __init__(self):
method OnFormChange (line 252) | def OnFormChange(self, fid):
class FnFuzzy (line 284) | class FnFuzzy(object):
method __init__ (line 285) | def __init__(self, f_debug, db_path, min_bytes, f_ex_libthunk, f_updat...
method debug (line 319) | def debug(self, msg):
method init_db (line 323) | def init_db(self):
method in_memory_db (line 334) | def in_memory_db(self): # for SELECT
method calc_fn_machoc (line 345) | def calc_fn_machoc(self, fva, fname): # based on Machoc hash implement...
method calc_fn_ssdeep (line 394) | def calc_fn_ssdeep(self, fva, fname):
method existed (line 416) | def existed(self):
method exclude_libthunk (line 423) | def exclude_libthunk(self, fva, fname):
method export (line 434) | def export(self):
method compare (line 465) | def compare(self):
method close (line 530) | def close(self):
function info (line 534) | def info(msg):
function success (line 537) | def success(msg):
function error (line 540) | def error(msg):
function get_hex_pat (line 543) | def get_hex_pat(buf):
function shex (line 547) | def shex(a):
function set_decomplier_cmt (line 550) | def set_decomplier_cmt(ea, cmt):
function main (line 561) | def main():
FILE: fn_fuzzy/fn_fuzzy_7x.py
class defaultdictRecurse (line 40) | class defaultdictRecurse(defaultdict):
method __init__ (line 41) | def __init__(self):
class import_handler_t (line 44) | class import_handler_t(ida_kernwin.action_handler_t):
method __init__ (line 45) | def __init__(self, items, idb_path, title):
method import_types (line 51) | def import_types(self):
method activate (line 74) | def activate(self, ctx):
method update (line 112) | def update(self, ctx):
class FnCh (line 120) | class FnCh(ida_kernwin.Choose):
method __init__ (line 121) | def __init__(self, title, mfn, idb_path):
method OnInit (line 138) | def OnInit(self):
method OnPopup (line 145) | def OnPopup(self, form, popup_handle):
method OnGetSize (line 150) | def OnGetSize(self):
method OnGetLine (line 153) | def OnGetLine(self, n):
method OnSelectLine (line 156) | def OnSelectLine(self, n):
method OnRefresh (line 160) | def OnRefresh(self, n):
method OnClose (line 167) | def OnClose(self):
class SummaryCh (line 170) | class SummaryCh(ida_kernwin.Choose):
method __init__ (line 171) | def __init__(self, title, res):
method OnInit (line 182) | def OnInit(self):
method OnGetSize (line 188) | def OnGetSize(self):
method OnGetLine (line 191) | def OnGetLine(self, n):
method OnSelectLine (line 194) | def OnSelectLine(self, n):
method OnRefresh (line 199) | def OnRefresh(self, n):
method OnClose (line 202) | def OnClose(self):
class FnFuzzyForm (line 205) | class FnFuzzyForm(ida_kernwin.Form):
method __init__ (line 206) | def __init__(self):
method OnFormChange (line 252) | def OnFormChange(self, fid):
class FnFuzzy (line 284) | class FnFuzzy(object):
method __init__ (line 285) | def __init__(self, f_debug, db_path, min_bytes, f_ex_libthunk, f_updat...
method debug (line 319) | def debug(self, msg):
method init_db (line 323) | def init_db(self):
method in_memory_db (line 334) | def in_memory_db(self): # for SELECT
method calc_fn_machoc (line 345) | def calc_fn_machoc(self, fva, fname): # based on Machoc hash implement...
method calc_fn_ssdeep (line 394) | def calc_fn_ssdeep(self, fva, fname):
method existed (line 416) | def existed(self):
method exclude_libthunk (line 423) | def exclude_libthunk(self, fva, fname):
method export (line 434) | def export(self):
method compare (line 468) | def compare(self):
method close (line 533) | def close(self):
function info (line 537) | def info(msg):
function success (line 540) | def success(msg):
function error (line 543) | def error(msg):
function get_hex_pat (line 546) | def get_hex_pat(buf):
function shex (line 550) | def shex(a):
function set_decomplier_cmt (line 553) | def set_decomplier_cmt(ea, cmt):
function main (line 564) | def main():
FILE: fn_fuzzy/yara_fn.py
function get_basic_blocks (line 31) | def get_basic_blocks(fva):
function get_function (line 47) | def get_function(va):
function is_jump (line 57) | def is_jump(va):
function get_fixup_va_and_size (line 63) | def get_fixup_va_and_size(va):
function get_basic_block_rule (line 69) | def get_basic_block_rule(bb):
function format_rules (line 144) | def format_rules(fva, rules):
function create_yara_rule_for_function (line 182) | def create_yara_rule_for_function(fva):
function get_segment_buffer (line 204) | def get_segment_buffer(segstart):
function get_segments (line 222) | def get_segments():
class TestDidntRunError (line 234) | class TestDidntRunError(Exception):
function test_yara_rule (line 238) | def test_yara_rule(rule):
function main (line 261) | def main():
FILE: fn_fuzzy/yara_fn_7x.py
function get_basic_blocks (line 31) | def get_basic_blocks(fva):
function get_function (line 47) | def get_function(va):
function is_jump (line 57) | def is_jump(va):
function get_fixup_va_and_size (line 63) | def get_fixup_va_and_size(va):
function get_basic_block_rule (line 69) | def get_basic_block_rule(bb):
function format_rules (line 144) | def format_rules(fva, rules):
function create_yara_rule_for_function (line 182) | def create_yara_rule_for_function(fva):
function get_segment_buffer (line 204) | def get_segment_buffer(segstart):
function get_segments (line 222) | def get_segments():
class TestDidntRunError (line 234) | class TestDidntRunError(Exception):
function test_yara_rule (line 238) | def test_yara_rule(rule):
function main (line 261) | def main():
FILE: stackstring_static/stackstring_static.py
function extract_unicode (line 14) | def extract_unicode(data):
function extract_ascii (line 18) | def extract_ascii(data):
class StackString (line 22) | class StackString(object):
method __init__ (line 24) | def __init__ (self, start, end, debug, do_xor, static_xor_key):
method rename_vars (line 35) | def rename_vars(self):
method store_bytes_to_reg (line 48) | def store_bytes_to_reg(self, r, b):
method store_reg_to_reg (line 63) | def store_reg_to_reg(self, dst, src):
method parse_and_get_var_hex (line 70) | def parse_and_get_var_hex(self, vstr):
method store_byte_to_var (line 88) | def store_byte_to_var(self, v, b):
method store_bytes_to_vars (line 102) | def store_bytes_to_vars(self, v, bs):
method store_key_to_name (line 112) | def store_key_to_name(self, v, b):
method int_to_bytes_list (line 118) | def int_to_bytes_list(self, v):
method store_byte_to_stack (line 136) | def store_byte_to_stack(self, b):
method dprint (line 140) | def dprint(self, s):
method traverse (line 144) | def traverse(self):
class SSSForm (line 370) | class SSSForm(ida_kernwin.Form):
method __init__ (line 371) | def __init__(self):
method OnFormChange (line 389) | def OnFormChange(self, fid):
function main (line 401) | def main():
Condensed preview — 27 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (225K chars).
[
{
"path": ".gitignore",
"chars": 1307,
"preview": "# Byte-compiled / optimized / DLL files\r\n__pycache__/\r\n*.py[cod]\r\n*$py.class\r\n\r\n# C extensions\r\n*.so\r\n\r\n# Distribution /"
},
{
"path": "ADVobfuscator/README.org",
"chars": 1106,
"preview": "* IDAPython script deobfuscating ADVobfuscator strings, applied to a TrickBoot sample\n\nThe script requires [[https://git"
},
{
"path": "ADVobfuscator/idapy3_ADVobfuscator_deob.py",
"chars": 8016,
"preview": "# idapy3_ADVobfuscator_deob.py - IDAPython script deobfuscating ADVobfuscator strings, applied to a TrickBoot sample\n# T"
},
{
"path": "LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.org",
"chars": 817,
"preview": "#+OPTIONS: ^:{}\n* ida_haru\n\nScripts/plugins for IDA Pro\n\nNote: Old scripts don't work for IDA 8.x, but I leave them just"
},
{
"path": "bindiff/README.org",
"chars": 7599,
"preview": "#+OPTIONS: ^:{}\n\n#+TITLE: BinDiff wrapper script for multiple binary diffing\n\n* Purpose\n\nmultiple binary diffing up to 1"
},
{
"path": "bindiff/bindiff.py",
"chars": 20500,
"preview": "# bindiff.py - BinDiff wrapper script for multiple binary diffing\n# Takahiro Haruyama (@cci_forensics)\n\nimport argparse,"
},
{
"path": "bindiff/bindiff_export.idc",
"chars": 436,
"preview": "#include <idc.idc>\nstatic main()\n{\n ChangeConfig(\"ABANDON_DATABASE=YES\");\n Batch(0);\n Wait();\n //RunPlugin(\""
},
{
"path": "bindiff/save_func_names.py",
"chars": 1578,
"preview": "import os, pickle, re\n\ng_track_parent_th = 2 # parent function tracking level threshold\ng_parent_func_exclude_list = ['_"
},
{
"path": "bindiff/save_func_names_7x.py",
"chars": 1720,
"preview": "import os, pickle, re\r\nfrom idautils import *\r\n\r\ng_track_parent_th = 2 # parent function tracking level threshold\r\ng_par"
},
{
"path": "callstrings/README.org",
"chars": 837,
"preview": "#+OPTIONS: ^:{}\n* callstrings - deobfuscating Hodur's global string encryption\n\n- Recover strings using various methods "
},
{
"path": "callstrings/hexrays_utils.py",
"chars": 14896,
"preview": "'''\nhexrays_utils.py - common classes/functions using Hex-Rays decompiler APIs\nTakahiro Haruyama (@cci_forensics)\n'''\n\n#"
},
{
"path": "callstrings/ida_callstrings_dbg.py",
"chars": 5682,
"preview": "'''\nida_callstrings_dbg.py - string deobfuscation using IDA debug hook class\nTakahiro Haruyama (@cci_forensics)\n'''\n\nimp"
},
{
"path": "callstrings/ida_callstrings_flare_emu.py",
"chars": 9998,
"preview": "'''\nida_callstrings_flare_emu.py - string deobfuscation using flare-emu\nTakahiro Haruyama (@cci_forensics)\n'''\n\nimport i"
},
{
"path": "callstrings/ida_callstrings_static.py",
"chars": 3443,
"preview": "'''\nida_callstrings_static.py - string deobfuscation for Hodur\nTakahiro Haruyama (@cci_forensics)\n'''\n\nimport idaapi\nida"
},
{
"path": "eset_crackme/README.org",
"chars": 420,
"preview": "* IDA Pro loader/processor modules for ESET CrackMe driver VM\n\nYou can download the initial sample for the CrackMe chall"
},
{
"path": "eset_crackme/loaders/ida_loader_drv_vm.py",
"chars": 2282,
"preview": "import idaapi\nimport ida_segment\nfrom idc import *\nfrom struct import *\n\nDATA_SEG_START = 0x10000 # may be changed\n\ndef "
},
{
"path": "eset_crackme/procs/ida_processor_drv_vm.py",
"chars": 22984,
"preview": "import sys\nimport copy\n\nimport ida_idaapi\nimport ida_idp\nimport ida_ua\nimport ida_bytes\nimport ida_xref\nimport ida_offse"
},
{
"path": "fn_fuzzy/README.org",
"chars": 932,
"preview": "#+OPTIONS: ^:{}\n\n#+TITLE: fn_fuzzy.py - IDAPython script for fast multiple binary diffing triage\n\n* Motivation\n\nSee the "
},
{
"path": "fn_fuzzy/cli_export.py",
"chars": 6684,
"preview": "# cli_export.py - batch export script for fn_fuzzy\n# Takahiro Haruyama (@cci_forensics)\n\nimport argparse, subprocess, os"
},
{
"path": "fn_fuzzy/dump_types.py",
"chars": 186,
"preview": "import os\n\ndef main():\n path = os.path.splitext(get_idb_path())[0] + '.idc'\n gen_file(OFILE_IDC, path, 0, 0, GENFL"
},
{
"path": "fn_fuzzy/fn_fuzzy.py",
"chars": 26839,
"preview": "# fn_fuzzy.py - IDAPython script for fast multiple binary diffing triage\n# Takahiro Haruyama (@cci_forensics)\n\nimport os"
},
{
"path": "fn_fuzzy/fn_fuzzy_7x.py",
"chars": 27683,
"preview": "# fn_fuzzy.py - IDAPython script for fast multiple binary diffing triage\r\n# Takahiro Haruyama (@cci_forensics)\r\n\r\nimport"
},
{
"path": "fn_fuzzy/yara_fn.py",
"chars": 9513,
"preview": "'''\nIDAPython script that generates a YARA rule to match against the\nbasic blocks of the current function. It masks out "
},
{
"path": "fn_fuzzy/yara_fn_7x.py",
"chars": 9833,
"preview": "'''\r\nIDAPython script that generates a YARA rule to match against the\r\nbasic blocks of the current function. It masks ou"
},
{
"path": "stackstring_static/README.org",
"chars": 607,
"preview": "* stackstring_static.py - IDAPython script statically-recovering strings constructed in stack\n\nThe motivation is the sam"
},
{
"path": "stackstring_static/stackstring_static.py",
"chars": 18471,
"preview": "# stackstring_static.py - IDAPython script statically-recovering strings constructed in stack\n# Takahiro Haruyama (@cci_"
}
]
About this extraction
This page contains the full source code of the TakahiroHaruyama/ida_haru GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 27 files (210.7 KB), approximately 55.4k tokens, and a symbol index with 283 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.