Repository: nlitsme/idbutil Branch: master Commit: 52603ccb954e Files: 23 Total size: 133.2 KB Directory structure: gitextract_5u3ptauz/ ├── .gitmodules ├── CMakeLists.txt ├── Findidbutil.cmake ├── IDB-FORMAT.md ├── Jenkinsfile ├── LICENSE ├── Makefile ├── Makefile.linux ├── README.md ├── cmake_find/ │ ├── Findcpputils.cmake │ ├── Finddoctest.cmake │ ├── Findidasdk.cmake │ ├── Findlibgmp.cmake │ ├── boilerplate.cmake │ └── dumpvars.cmake ├── getcontrib.sh ├── include/ │ └── idblib/ │ └── idb3.h ├── tests/ │ ├── CMakeLists.txt │ ├── test-idb3.cpp │ ├── unittestframework.h │ └── unittests.cpp └── tools/ ├── CMakeLists.txt └── idbtool.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitmodules ================================================ ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.10) project(idbutil) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_find") include(boilerplate) find_package(idasdk REQUIRED) find_package(cpputils REQUIRED) find_package(libgmp) add_library(idblib INTERFACE) target_include_directories(idblib INTERFACE include) if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME OR BUILD_TOOLS) add_subdirectory(tools) endif() include(CTest) if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME AND BUILD_TESTING OR BUILD_ALL_TESTS) add_subdirectory(tests) endif() ================================================ FILE: Findidbutil.cmake ================================================ if (TARGET idbutil) return() endif() find_path(IDBUTIL_DIR NAMES include/idblib/idb3.h PATHS ${CMAKE_SOURCE_DIR}/symlinks/idbutil) if(IDBUTIL_DIR STREQUAL "IDBUTIL_DIR-NOTFOUND") include(FetchContent) FetchContent_Populate(idbutil GIT_REPOSITORY https://github.com/nlitsme/idbutil) set(IDBUTIL_DIR ${idbutil_SOURCE_DIR}) else() set(idbutil_BINARY_DIR ${CMAKE_BINARY_DIR}/idbutil-build) endif() add_subdirectory(${IDBUTIL_DIR} ${idbutil_BINARY_DIR}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(idbutil REQUIRED_VARS IDBUTIL_DIR) ================================================ FILE: IDB-FORMAT.md ================================================ IDApro databases ================== An IDApro database consists of one large file which contains several sections. At the start of the `idb` or `i64` file there is a list of fileoffsets pointing to these sections. Sections can optionally be stored compressed. When a database is opened by IDA the sections are extracted from the main data file and stored in separate files. When you only need to read from the database, and don't want to change anything, the splitting into `id0`, `id1`, `nam` and `til` files is not nescesary, IDApro does this anyway, since it expect the user to make changes to the database. Very old IDApro versions ( v1.6 and v2.0 ) store the sections separately, such that there could only be one database per directory. | index | extension | contents | | :---- | :-------- | :------------------------- | | 0 | id0 | A btree key/value database | | 1 | id1 | Flags for each byte | | 2 | nam | A list of named offsets | | 3 | seg | Unknown | | 4 | til | Type information | | 5 | id2 | Unknown | Older ida versions don't have the id2 file. Newer ida versions don't have the seg file. Newer ida versions use 64 bit file offsets, so IDA can support files larger than 4GB. There is no difference in the IDB header between 32 bit ( with `.idb` extension ) and 64 bit ( with `.i64` extension ) databases. ## ID0 section. The ID0 sections contains a b-tree database, This is a single large key-value database, like leveldb. There are three main groups of key types: * Bookkeeping, so IDApro can quickly decide what the next free nodeid is. These keys all start with a '$' (dollar) sign. * `$ MAX LINK` * `$ MAX NODE` * `$ NET DESC` * Nodes, keys starting with a '.' (dot). * followed by an address, or internal nodeid. * 32 bit databases use 32 bit addresses, 64 bit databases use 64 bit addresses here. * internal nodeid's have the upper 8 bits set to one, so `0xFF000000` for a 32 bit database, or `0xFF00000000000000` for a 64 bit database. * a tag, `A` for altvals, `S` for supvals, etc. See netnode.hpp in the idasdk. * optionally followed by an index or hashkey value, depending on the tag. * both the address and index value are encoded in bigendian byte order. * Name index, keys starting with an `N`, followed by a name. The value being a 32 or 64 bit offset. * names up to 511 are encoded as plain strings. longer names start with a NUL byte, followed by a blob index. pointing to a blob at special nodeid `0xFF000000(00000000)`. * the maximum name length is 32 * 1024 characters. * Very old ida versions had keys starting with lowercase 'n', and '-' (minus). * The maximum key size if 512 bytes, including dots, 'N', etc. The range of internal nodeid's is the reason you cannot have code or data in your disassembly at addresses starting with `0xFF000000(00000000)`. IDA will allow you to create such segments manually. Doing so will usually result in corrupted databases. There are two types of names: * Internal, pointing to internal nodeid's. Examples: `$ structs`, `Root Node`. Most have a space in them. * Labels, pointing to addresses in the disassembly. The maximum value size is 1024 bytes. Several types of values: * Integers, encoded in little endian byte order. * Strings are sometimes NUL terminated, sometimes not. * In several cases structured information is stored in a _packed_ format, see below. ### packed values Packed values are used among others for structure and segment definitions. In packed data: * Values in the range 0x00-0x7f are stored in a single byte. * Values in the range 0x80-0x3fff are stored ORRED with 0x8000. * Values in the range 0x4000-0x1fffffff are stored ORRED with 0xC000000. * Larger 32 bit values are stored prefixed with a 0xFF byte. * 64 bit values are stored as two consecutive numbers. * All values are stored in big-endian byte order. ### The B-tree format The file is organised in 8kbyte pages, where the first page contains a header with pagesize, pointer to a list of free pages, pointer to the root of the page tree, the number of records, and number of pages. There are two types of pages, leaf pages, which don't contain pointers to other pages, but only key-value records. And index pages, with a _preceeding_ pointer, and where all key-value records contain a pointer to a page where all keys in the pointed-to page have values greater than the key containing the page pointer. This makes it very efficient to lookup records by key. The page tree looks like this. Between brackets are key values, the pointer marked with a `*` (STAR) is the _preceeding_ pointer. Values are not shown. *-------->[00] *------>[02]---+ [01] root ->[08]---+ [05]-+ | [17]-+ | | +--->[03] | | | [04] | | | | | +----->[06] | | [07] | | | | *-------->[09] | +->[11]---+ [10] | [14]-+ | | | +--->[12] | | [13] | | | +----->[15] | [16] | | *-------->[18] +--->[20]---+ [19] [23]-+ | | +--->[21] | [22] | +----->[24] [25] Each page has a small header, with a pointer to a preceeding page, and a record count. For Leaf pages the _preceeding_ pointer is zero. Following the header there is an index containing offsets to the actual records in the page, and a pointer to the next level index or leaf page. The records are stored as _keylength_, keydata, _datalength_, data. All records in the level below an index are guaranteed to have a key greater than the key in the index. In leaf pages consecutive entries will often have keys which are very similar. The index stores an offset into the key from which the keys differ, only the part that differs is stored. | key | binary representation | compressed key | :--------------------------------- | :---------------------- | :------------------ | ('.', 0xFF000002, 'N') | 2eff0000024e | (0, 2eff0000024e) | ('.', 0xFF000002, 'S', 0x00000001) | 2eff0000025300000001 | (5, 5300000001) | ('.', 0xFF000002, 'S', 0x00000002) | 2eff0000025300000002 | (9, 02) ## The ID1 section The ID1 section contains the flag values as returned by the idc `GetFlags` function. It starts with a list of file regions, followed by flags for each byte. ## Netnodes The highlevel view of the `ID0` database is that of netnodes, as partially documented in the idasdk. The most important nodes are: * `Root Node` * lists: `$ structs`, `$ enums`, `$ scripts` * the values in a list are stored in the altnodes of the list node. * the values are one more than the actual nodeid pointed to: a list pointing to struct id's 0xff000bf6, 0xff000c01 would contain : 0xff000bf7, 0xff000c02 * `$ funcs` * `$ fileregions`, `$ segs`, '$ srareas' * '$ entry points' ### structs The main struct node: | node | contents | :--- | :---- | (id, 'N') | the struct name | (id, 'M', 0) | packed member info, nodeids for members. The struct member nodes: | node | contents | :--- | :---- | (id, 'N') | the member name | (id, 'M', 0) | packed member info | (id, 'A', 3) | enum id | (id, 'A', 11) | struct id | (id, 'A', 16) | string type | (id, 'S', 0) | member comment | (id, 'S', 1) | repeatable member comment | (id, 'S', 9) | offset spec | (id, 'S', 0x3000) | typeinfo ### history The `$ curlocs` list contains several location histories: For example, the `$ IDA View-A` netnode contains the following keys: * `A 0` - highest history supval item * `A 1` - number of history items * `A 2` - object type: `idaplace_t` * `S ` - packed history item: itemlinenr, ea\_t, int, int, colnr, rownr ### normal addresses In the SDK, in the file `nalt.hpp` there are many more items defined. These are some of the regularly used ones. | key | value | description | :-- | :---- | :---------- | (addr, 'D', fromaddr) | reftype | data xref from | (addr, 'd', toaddr) | reftype | data xref to | (addr, 'X', fromaddr) | reftype | code xref from | (addr, 'x', toaddr) | reftype | code xref to | (addr, 'N') | string | global label | (addr, 'A', 1) | jumptableid+1 | jumptable target | (addr, 'A', 2) | nodeid+1 | hexrays info | (addr, 'A', 3) | structid+1 | data type | (addr, 'A', 8) | dword | additional flags | (addr, 'A', 0xB) | enumid+1 | first operand enum type | (addr, 'A', 0x10) | dword | string type | (addr, 'A', 0x11) | dword | align type | (addr, 'S', 0) | string | comment | (addr, 'S', 1) | string | repeatable comment | (addr, 'S', 4) | data | constant pool reference | (addr, 'S', 5) | data | array | (addr, 'S', 8) | data | jumptable info | (addr, 'S', 9) | packed | first operand offset spec | (addr, 'S', 0xA) | packed | second operand offset spec | (addr, 'S', 0x1B) | data | ? | (addr, 'S', 1000+linenr) | string | anterior comment | (addr, 'S', 0x1000) | packed | SP change point | (addr, 'S', 0x3000) | data | function prototype | (addr, 'S', 0x3001) | data | argument list | (addr, 'S', 0x4000+n) | packed blob | register renaming | (addr, 'S', 0x5000) | packed blob | function's local labels | (addr, 'S', 0x6000) | data | register args | (addr, 'S', 0x7000) | packed | function tails | (addr, 'S', 0x7000) | dword | tail backreference | ================================================ FILE: Jenkinsfile ================================================ pipeline { agent { label "windows" } stages { stage("clean") { steps { sh '''git clean -dfx''' } } stage("windows-build") { steps { sh '''#!/bin/bash set -e . /c/local/msvcenv.sh export BOOST_ROOT=c:/local/boost_1_74_0 export IDASDK=c:/local/idasdk_pro82 make vc ''' } } } } ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2020 Willem Hengeveld Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Makefile ================================================ CMAKEARGS+=$(if $(D),-DCMAKE_BUILD_TYPE=Debug,-DCMAKE_BUILD_TYPE=Release) CMAKEARGS+=$(if $(COV),-DOPT_COV=1) CMAKEARGS+=$(if $(PROF),-DOPT_PROF=1) CMAKEARGS+=$(if $(LIBCXX),-DOPT_LIBCXX=1) CMAKE=cmake JOBSFLAG=$(filter -j%,$(MAKEFLAGS)) cmake: $(CMAKE) -B build . $(CMAKEARGS) $(MAKE) -C build $(JOBSFLAG) $(if $(V),VERBOSE=1) vc: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/Common7/IDE/CommonExtensions/Microsoft/CMake/CMake/bin/cmake.exe" -G"Visual Studio 16 2019" -B build . $(CMAKEARGS) "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/MSBuild/Current/Bin/amd64/MSBuild.exe" build/*.sln -t:Rebuild ctest: TEST=1 ctest: cmake cd build && ctest --verbose clean: $(RM) -r build CMakeFiles CMakeCache.txt CMakeOutput.log ================================================ FILE: Makefile.linux ================================================ cmake: cmake -B build . $(if $(D),-DCMAKE_BUILD_TYPE=Debug,-DCMAKE_BUILD_TYPE=Release) $(CMAKEARGS) $(MAKE) -C build $(if $(V),VERBOSE=1) # either specify `idasdk` and `idabin` in this config file, # or pass them as variables on the make commandline. -include ../idacfg.mk space=$(empty) $(empty) escapespaces=$(subst $(space),\ ,$1) fileexists=$(wildcard $(call escapespaces,$1)) ifeq ($(call fileexists,$(idasdk)/include/ida.hpp),) $(error The `idasdk` variable does not point to a directory containing an include directory with the ida headers.) endif ifneq ($(wildcard $(SystemRoot)/explorer.exe $(SYSTEMROOT)/explorer.exe),) OSTYPE=windows CFLAGS+=-D__NT__=1 IDA64LIB=$(idasdk)/lib/x64_win_vc_64/ida.lib IDA32LIB=$(idasdk)/lib/x64_win_vc_32/ida.lib L=.dll O=.obj DLLFLAGS=/dll endif ifneq ($(wildcard /System/Library/Extensions),) OSTYPE=darwin CFLAGS+=-D__MAC__=1 IDA64LIB=$(idasdk)/lib/x64_mac_clang_64/libida64.dylib IDA32LIB=$(idasdk)/lib/x64_mac_clang_32/libida.dylib L=.dylib O=.o DLLFLAGS=-dynamiclib endif ifneq ($(wildcard /sbin/modprobe),) OSTYPE=linux CFLAGS+=-D__LINUX__=1 IDA64LIB=$(idasdk)/lib/x64_linux_gcc_64/libida64.so IDA32LIB=$(idasdk)/lib/x64_linux_gcc_32/libida.so L=.so O=.o DLLFLAGS=--shared endif APPS=idbtool unittests all: $(APPS) clean: $(RM) $(APPS) $(wildcard *.o *.obj) $(RM) -r build CMakeFiles CMakeCache.txt CMakeOutput.log unittests: $(notdir $(subst .cpp,$(O),$(wildcard tests/*.cpp))) $(CXX) $(LDFLAGS) -o $@ $^ idbtool: idbtool$(O) ldflags_idbtool=-lz -L/usr/local/lib -lgmp CFLAGS+=-fPIC $(if $(D),-O0,-O3) -g -Wall -I /usr/local/include -I submodules/cpputils -I $(idasdk)/include/ -I . CFLAGS+=-DUSE_STANDARD_FILE_FUNCTIONS CFLAGS+=-DUSE_DANGEROUS_FUNCTIONS ifneq ($(OSTYPE),windows) CFLAGS+=-DHAVE_LIBGMP endif # .. todo: on windows doctest build fails with DOCTEST_CHECK_THROWS - ident not found. CFLAGS+=-DUSE_DOCTEST #CFLAGS+=-DUSE_CATCH CFLAGS+=-DNOMINMAX -DWIN32_LEAN_AND_MEAN LDFLAGS+=-g -Wall ifeq ($(OSTYPE),windows) CFLAGS+=-std:c++17 else CFLAGS+=-std=c++17 endif %$(O): tests/%.cpp $(CXX) $(CFLAGS) -c $^ -o $@ %$(O): %.cpp $(CXX) -c $^ -o $@ $(cflags_$(basename $(notdir $@))) $(CFLAGS) %: %$(O) $(CXX) $^ -o $@ $(ldflags_$(basename $(notdir $@))) $(LDFLAGS) install: cp idbtool ~/bin/ pull: git submodule update --recursive --remote ================================================ FILE: README.md ================================================ IDBTOOL ======= A tool for extracting information from IDA databases. `idbtool` knows how to handle databases from all IDA versions since v2.0, both `i64` and `idb` files. You can also use `idbtool` to recover information from unclosed databases. `idbtool` works without change with IDA v7.0. Much faster than loading a file in IDA -------------------------------------- With idbtool you can search thousands of .idb files in seconds. More precisely: on my laptop it takes: * 1.5 seonds to extract 143 idc scripts from 119 idb and i64 files. * 3.8 seonds to print idb info for 441 files. * 5.6 seconds to extract 281 enums containing 4726 members from 35 files. * 67.8 seconds to extract 5942 structs containing 33672 members from 265 files. Loading an approximately 5 Gbyte idb file in IDA, takes about 45 minutes. While idb3.h takes basically no time at all, no more than a few milliseconds. Download ======== Two versions of this tool exist: One written in python * https://github.com/nlitsme/pyidbutil One written in C++ * https://github.com/nlitsme/idbutil Both repositories contain a library which can be used for reading `.idb` or `.i64` files. An IDA Pro plugin making use of `idb3.h` can be found here: * https://github.com/nlitsme/idbimport This is a plugin making it easy to copy scripts, structs or enums from recent ida databases. Usage ===== Usage: idbtool [options] [database file(s)] [-- address-list] * `-n` or `--names` will list all named values in the database. * `-s` or `--scripts` will list all scripts stored in the database. * `-u` or `--structs` will list all structs stored in the database. * `-e` or `--enums` will list all enums stored in the database. * `-i` or `--info` will print some general info about the database. * `-a` list all names, including ..todo.. * `-d` dump btree page tree contents. * `--inc`, `--dec` list all records in ascending / descending order. * `-q` or `--query` search specific records in the database. * `-m` or `--limit` limit the number of results returned by `-q`. All addresses after `--` will be printed as `symbol+offset`. Query ----- Queries need to be specified last on the command line. Example: idbtool [database file(s)] --query "Root Node;V" Will list the source binary for all the databases specified on the command line. A query is a string with the following format: * [==,<=,>=,<,>] - optional relation, default: == * a base node key: * a DOT followed by the numeric value of the nodeid. * a HASH followed by the numeric value of the system-nodeid. * a QUESTION followed by the name of the node. -> a 'N'ame node * the name of the node. -> the name is resolved, results in a '.'Dot node * an optional tag ( A for Alt, S for Supval, etc ) * an optional index value Example queries: * `Root Node;V` -> prints record containing the source binary name * `?Root Node` -> prints the Name record pointing to the root * `>Root Node` -> prints the first 10 records starting with the root node id. * ` prints the 10 records startng with the records before the root node. * `.0xff000001;N` -> prints the root node name entry. * `#1;N` -> prints the root node name entry. List the highest node and following record in the database in two different ways, the first: starting at the first record below `ffc00000`, and listing the next. The second: starting at the first record after `ffc00000`, and listing the previous: * `--query "<#0xc00000" --limit 2 --inc -v` * `--query ">#0xc00000" --limit 2 --dec -v` Note that this should be the nodeid in the `$ MAX NODE` record. List the last two records: * `--limit 2 --dec -v` List the first two records, the `$ MAX LINK` and `$ MAX NODE` records: * `--limit 2 --inc -v` A full database dump -------------------- Several methods exist for printing all records in the database. This may be useful if you want to investigate more of IDA''s internals. But can also be useful in recovering data from corrupted databases. * `--inc`, `--dec` can be used to enumerate all b-tree records in either forward, or backward direction. * `--id0` walks the page tree, instead of the b-tree, printing the contents of each page LIBRARY ======= The header file `idb3.h` contains a library for reading from IDA Pro databases. ## IDBFile Class for accessing sections of an `.idb` or `.i64` file. Constructor Parameters: * `std::shared_ptr` ( typedefed to `stream_ptr` ) Methods: * `stream_ptr getsection(int)` ## ID0File, ID1File, NAMFile Constructor Parameters: * `IDBFile& idb` * `stream_ptr` Constant * `INDEX` - the argument for `idb.getsection` ## ID0File Methods * `Cursor find(relation_t, nodeid, ...)` * `...` can be: * tag, index * tag, hash * tag * `Cursor find(relation_t, std::string key)` * `std::string blob(nodeid, tag, ...)` * `uint64_t node(std::string name)` * `bool is64bit()` * `true` for `.i64` files. * `uint64_t nodebase()` * return `0xFF000000(00000000)` for 32/64 bit databases. * `void enumlist(uint64_t nodeid, char tag, CB cb)` * call `cb` for each value in the list. Convenience Methods * `std::string getdata(ARGS...args)` * `std::string getstr(ARGS...args)` * `uint64_t getuint(ARGS...args)` * `uint64_t getuint(BtreeBase::Cursor& c)` * `std::string getname(uint64_t node)` ## ID1File Methods * `uint32_t GetFlags(uint64_t ea)` ## NAMFile Methods * `uint64_t findname(uint64_t ea)` ## Cursor Methods * `void next()` * move cursor to the next btree record * `void prev()` * move cursor to the previous btree record * `bool eof()` * did we reach the start/end of the btree? * `std::string `getkey()` * return the key pointed to by the cursor * `std::string `getval()` * return the value pointed to by the cursor TODO ==== * add option to list all comments stored in the database * support compressed sections * add option to list flags for a list of addresses. Author ====== Willem Hengeveld ================================================ FILE: cmake_find/Findcpputils.cmake ================================================ if (TARGET cpputils) return() endif() # NOTE: you can avoid downloading cpputils, by symlinking to a downloaded version here: find_path(CPPUTILS_DIR NAMES include/cpputils/string-lineenum.h PATHS ${CMAKE_SOURCE_DIR}/symlinks/cpputils) if(CPPUTILS_DIR STREQUAL "CPPUTILS_DIR-NOTFOUND") include(FetchContent) FetchContent_Populate(cpputils GIT_REPOSITORY https://github.com/nlitsme/cpputils) set(CPPUTILS_DIR ${cpputils_SOURCE_DIR}) else() set(cpputils_BINARY_DIR ${CMAKE_BINARY_DIR}/cpputils-build) endif() add_subdirectory(${CPPUTILS_DIR} ${cpputils_BINARY_DIR}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(cpputils REQUIRED_VARS CPPUTILS_DIR) ================================================ FILE: cmake_find/Finddoctest.cmake ================================================ list(APPEND CMAKE_MODULE_PATH "/usr/lib/cmake/doctest/") if (TARGET doctest) return() endif() if (TARGET doctest::doctest) return() endif() file(GLOB DOCTEST_DOCTEST_DIRS /usr/include /usr/local/include /usr/local/opt/doctest/include) find_path(DOCTEST_DOCTEST_DIR NAMES doctest/doctest.h PATHS ${DOCTEST_DOCTEST_DIRS}) if (DOCTEST_DOCTEST_DIR STREQUAL "DOCTEST_DOCTEST_DIR-NOTFOUND") include(FetchContent) FetchContent_Populate( doctest GIT_REPOSITORY https://github.com/doctest/doctest.git ) list(APPEND CMAKE_MODULE_PATH "${doctest_SOURCE_DIR}/scripts/cmake/") set(DOCTEST_DOCTEST_DIR ${doctest_SOURCE_DIR}) else() set(doctest_BINARY_DIR ${CMAKE_BINARY_DIR}/doctest-build) endif() add_library(doctest INTERFACE) target_include_directories(doctest INTERFACE ${DOCTEST_DOCTEST_DIR}) add_library(doctest::doctest ALIAS doctest) ================================================ FILE: cmake_find/Findidasdk.cmake ================================================ if (TARGET idasdk) return() endif() # note: this depends partially on my local install find_path(IDASDK_PATH NAMES include/netnode.hpp PATHS $ENV{IDASDK} $ENV{HOME}/src/idasdk_pro82 $ENV{HOME}/src/idasdk_pro80 c:/local/idasdk_pro82 c:/local/idasdk77) if (IDASDK_PATH STREQUAL "IDASDK_PATH-NOTFOUND") message(FATAL_ERROR "IDASDK not found on ${CMAKE_SYSTEM_NAME}.") endif() if(WIN32) # note that for windows both libs have the same name. find_library(IDALIB32 ida ${IDASDK_PATH}/lib/x64_win_vc_32 ${IDASDK_PATH}/lib/x64_win_vc_32_pro) find_library(IDALIB64 ida ${IDASDK_PATH}/lib/x64_win_vc_64 ${IDASDK_PATH}/lib/x64_win_vc_64_pro) elseif(LINUX) find_library(IDALIB32 ida ${IDASDK_PATH}/lib/x64_linux_gcc_32 ${IDASDK_PATH}/lib/x64_linux_gcc_32_pro) find_library(IDALIB64 ida64 ${IDASDK_PATH}/lib/x64_linux_gcc_64 ${IDASDK_PATH}/lib/x64_linux_gcc_64_pro) elseif(DARWIN) # now this depends on the host, better would be to set # CMAKE_OSX_ARCHITECTURES to arm64 for the arm build. if (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL x86_64) find_library(IDALIB32 ida ${IDASDK_PATH}/lib/x64_mac_clang_32 ${IDASDK_PATH}/lib/x64_mac_clang_32_pro) find_library(IDALIB64 ida64 ${IDASDK_PATH}/lib/x64_mac_clang_64 ${IDASDK_PATH}/lib/x64_mac_clang_64_pro) else() find_library(IDALIB32 ida ${IDASDK_PATH}/lib/arm64_mac_clang_32 ${IDASDK_PATH}/lib/arm64_mac_clang_32_pro) find_library(IDALIB64 ida64 ${IDASDK_PATH}/lib/arm64_mac_clang_64 ${IDASDK_PATH}/lib/arm64_mac_clang_64_pro) endif() endif() if (IDALIB64 STREQUAL "IDALIB64-NOTFOUND") message(FATAL_ERROR "could not find libida64") endif() if (IDALIB32 STREQUAL "IDALIB32-NOTFOUND") message(FATAL_ERROR "could not find libida") endif() message(STATUS "found ida headers at: ${IDASDK_PATH}/include") message(STATUS "found ida32 lib at: ${IDALIB32}") message(STATUS "found ida64 lib at: ${IDALIB64}") add_library(idasdk INTERFACE) target_include_directories(idasdk INTERFACE ${IDASDK_PATH}/include) target_compile_definitions(idasdk INTERFACE MAXSTR=1024) # since ida v7 all builds are 64 bit target_compile_definitions(idasdk INTERFACE __X64__) if (LINUX) target_compile_definitions(idasdk INTERFACE __LINUX__=1) elseif (DARWIN) target_compile_definitions(idasdk INTERFACE __MAC__=1) elseif (WIN32) target_compile_definitions(idasdk INTERFACE __NT__=1) endif() # this prevents idasdk:fpro.h to redefine all stdio stuff to 'dont_use_XXX' target_compile_definitions(idasdk INTERFACE USE_STANDARD_FILE_FUNCTIONS) # this prevents idasdk:pro.h to redefine all string functions to 'dont_use_XXX' target_compile_definitions(idasdk INTERFACE USE_DANGEROUS_FUNCTIONS) # disallow obsolete sdk functions. target_compile_definitions(idasdk INTERFACE NO_OBSOLETE_FUNCS) target_compile_definitions(idasdk INTERFACE __DEFINE_ROOT_NODE__) target_compile_definitions(idasdk INTERFACE __DEFINE_INF__) target_compile_definitions(idasdk INTERFACE __DEFINE_PH__) # __EA64__=1 - for ida64 -> handled by choosing idasdk / idasdk64 # * this chooses between sizeof(ea_t) == 4 or 8 # __IDP__ for processor modules -> also needs win32: -export:LPH # __PLUGIN__ for plugins add_library(idasdk32 INTERFACE) target_link_libraries(idasdk32 INTERFACE idasdk ${IDALIB32}) add_library(idasdk64 INTERFACE) target_link_libraries(idasdk64 INTERFACE idasdk ${IDALIB64}) target_compile_definitions(idasdk64 INTERFACE __EA64__=1) ================================================ FILE: cmake_find/Findlibgmp.cmake ================================================ if (${libgmp_FOUND}) return() endif() if (TARGET libgmp) return() endif() find_path(GMPINC_DIR NAMES gmp.h gmpxx.h PATHS /usr/include /usr/local/include) find_library(GMPLIB_DIR NAMES gmp libgmp PATHS /usr/lib /usr/local/lib) if(NOT GMPINC_DIR STREQUAL "GMPINC_DIR-NOTFOUND" AND NOT GMPLIB_DIR STREQUAL "GMPLIB_DIR-NOTFOUND") add_library(libgmp INTERFACE) target_link_libraries(libgmp INTERFACE ${GMPLIB_DIR}) target_include_directories(libgmp INTERFACE ${GMPINC_DIR}) target_compile_definitions(libgmp INTERFACE HAVE_LIBGMP) endif() include(FindPackageHandleStandardArgs) find_package_handle_standard_args(libgmp REQUIRED_VARS GMPINC_DIR GMPLIB_DIR) ================================================ FILE: cmake_find/boilerplate.cmake ================================================ option(OPT_STL_DEBUGGING "Build with STL debugging" OFF) option(OPT_PROF "Build for profiling" OFF) option(OPT_COV "Build for code coverage" OFF) option(OPT_LIBCXX "Build with libcxx" OFF) option(OPT_MODULES "use c++20 modules" OFF) option(OPT_ANALYZE "add -fanalyzer" OFF) option(OPT_SYMBOLS "With symbols" OFF) option(OPT_SANITIZE "With -fsanitize" OFF) option(OPT_TSAN "With thread sanitizer" OFF) option(OPT_ASAN "With address sanitizer" OFF) option(OPT_CLANG_TIDY "With clang-tidy checks" OFF) option(OPT_COMPILE_COMMANDS "Generate compile_commands.json" OFF) option(OPT_INSTALL_HEADERS "Export header files for INSTALL target" OFF) option(OPT_DISABLE_CMAKE_SANITY_CHECK "Disable CMake call sanity checks (ex: OpenWrt)" OFF) option(OPT_DISABLE_DEVEL_INSTALL "Disable all development install targets (ex: Win NSIS installer)" OFF) if (${CMAKE_SYSTEM_NAME} MATCHES "Linux") set(LINUX TRUE) elseif (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(DARWIN TRUE) if (${CMAKE_OSX_SYSROOT} MATCHES "/iPhoneOS.platform") set(IPHONE TRUE) elseif (${CMAKE_OSX_SYSROOT} MATCHES "/iPhoneSimulator.platform") set(IPHONESIM TRUE) elseif (${CMAKE_OSX_SYSROOT} MATCHES "/MacOSX.platform") set(MACOS TRUE) else() message(FATAL_ERROR "Unsupported apple platform") endif() elseif (${CMAKE_SYSTEM_NAME} MATCHES "iOS") set(DARWIN TRUE) if (${CMAKE_OSX_SYSROOT} MATCHES "/iPhoneOS.platform") set(IPHONE TRUE) elseif (${CMAKE_OSX_SYSROOT} MATCHES "/iPhoneSimulator.platform") set(IPHONESIM TRUE) endif() elseif (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") set(FREEBSD TRUE) endif() if (NOT OPT_DISABLE_CMAKE_SANITY_CHECK) # checking if we are called in the correct way: # with a -B argument. and without a cache file in the source directory. if (CMAKE_CACHEFILE_DIR STREQUAL "${CMAKE_SOURCE_DIR}") message(FATAL_ERROR "\nUnexpected CMakeCache.txt file in the source directory. Please remove it.") return() endif() if (EXISTS ${CMAKE_BINARY_DIR}/CMakeLists.txt) message(FATAL_ERROR "\nRun cmake with an explicit -B buildpath") return() endif() endif() if (OPT_ANALYZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # see https://gcc.gnu.org/onlinedocs/gcc-12.2.0/gcc/Static-Analyzer-Options.html#index-analyzer set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fanalyzer") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # https://clang.llvm.org/docs/UsersManual.html set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --analyze") endif() endif() if (OPT_ASAN AND OPT_TSAN) message(FATAL_ERROR "Only one sanitizer can be active at a time") elseif (OPT_ASAN) # https://gcc.gnu.org/onlinedocs/gcc-12.2.0/gcc/Instrumentation-Options.html#index-fsanitize_003daddress set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread") #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=dataflow") elseif(OPT_TSAN) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread") endif() if (OPT_CLANG_TIDY) # clang-tidy supports a range of different checks. For a list of all available # checks, check the clang-tidy website: # https://clang.llvm.org/extra/clang-tidy/checks/list.html # To enable only certain checks, we disable all of them first and then select # - clang-analyzer-* => Clang Static Analyzer # - bugprone-* => bug-prone code constructs (except bugprone-easily-swappable-parameters, bugprone-suspicious-include) # - cert-* => CERT Secure Coding Guidelines # - concurrency-* => General concurrency checks # - performance-* => General performance checks # - portability-* => General portability checks set(CLANG_TIDY_CHECKS "clang-analyzer-*,bugprone-*,-bugprone-easily-swappable-parameters,-bugprone-suspicious-include,cert-*,concurrency-*,performance-*,portability=*") set(CMAKE_CXX_CLANG_TIDY "clang-tidy;--extra-arg-before=-std=c++${CMAKE_CXX_STANDARD};-checks=-*,${CLANG_TIDY_CHECKS}") endif() if (OPT_LIBCXX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") endif() if (OPT_STL_DEBUGGING) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_DEBUG") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_LIBCPP_DEBUG_LEVEL=1") endif() if (OPT_PROF) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg ") endif() if (OPT_SYMBOLS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ") endif() if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-narrowing") endif() if (OPT_COV) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") message(STATUS "gcc code coverage") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftest-coverage -fprofile-arcs ") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -ftest-coverage -fprofile-arcs ") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") message(STATUS "llvm code coverage") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-instr-generate -fcoverage-mapping -fdebug-info-for-profiling") #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mllvm -inline-threshold=100000") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") else() message(STATUS "don't know how to add code coverage for ${CMAKE_CXX_COMPILER_ID }") endif() endif() if(OPT_STATIC) set(LIBSTYLE STATIC) set(CMAKE_POSITION_INDEPENDENT_CODE True) else() set(LIBSTYLE SHARED) endif() if (OPT_COMPILE_COMMANDS) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) endif() if (OPT_DISABLE_DEVEL_INSTALL) set(MAY_EXCLUDE_FROM_ALL EXCLUDE_FROM_ALL) endif() # Project wide warning/error settings if(MSVC) # /W0 suppresses all warnings # /W1 displays level 1 (severe) warnings (default in command line) # /W2 displays level 1 and level 2 (significant) warnings. # /W3 displays level 1, level 2, and level 3 (production quality) warnings (default in IDE) # /W4 displays level 1, level 2, and level 3 warnings, and all level 4 (informational) warnings that aren't off by default add_compile_options(/W1) else() # Exclude the following ones for now: # -Wunused-parameter: we have delegate classes with stub methods (with unused parameters) # -Wempty-body: occurs in release builds as there are if-cases which only contain a logmsg expression # -Wunused-variable, -Wunused-value: occurs in release builds for parameters of a logmsg expression add_compile_options(-Wall -Wextra -Wno-unused-parameter -Wno-empty-body -Wno-unused-value -Wno-unused-variable) endif() if(MSVC) # /MP = multithreaded build set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") # /utf-8 = utf8 source and execution set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8") # NOBITMAP - avoid error in mmreg.h # NOMINMAX - remove 'max()' macro from global namespace # NOGDI - ... add_definitions(-DNOMINMAX -DNOGDI -DNOBITMAP -DWIN32_LEAN_AND_MEAN) add_definitions(-DWIN32) add_definitions(-D__STDC_WANT_SECURE_LIB__=1) # Executables need to resolve path to dlls (RPATH is not available on Windows). This could be done # either by using PATH env. variable or keeping dlls alongside with executables set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/bin) endif() if (OPT_MODULES) if (CMAKE_COMPILER_IS_GNUCXX) set(CMAKE_CXX_FLAGS -fmodules-ts) else() set(CMAKE_CXX_FLAGS -fmodules -fbuiltin-module-map) endif() endif() ================================================ FILE: cmake_find/dumpvars.cmake ================================================ # include this file to get a full dump of all cmake variables to stdout. function(dump_cmake_variables) get_cmake_property(_variableNames VARIABLES) list (SORT _variableNames) foreach (_variableName ${_variableNames}) if (ARGV0) unset(MATCHED) string(REGEX MATCH ${ARGV0} MATCHED ${_variableName}) if (NOT MATCHED) continue() endif() endif() message(STATUS "${_variableName}=${${_variableName}}") endforeach() endfunction() list(APPEND InterestingProps AUTOGEN_ORIGIN_DEPENDS AUTOMOC_COMPILER_PREDEFINES AUTOMOC_MACRO_NAMES AUTOMOC_PATH_PREFIX BINARY_DIR BUILD_WITH_INSTALL_RPATH EXCLUDE_FROM_ALL EXPORT_COMPILE_COMMANDS FOLDER HEADER_SETS IMPORTED IMPORTED_GLOBAL INCLUDE_DIRECTORIES INSTALL_RPATH INSTALL_RPATH_USE_LINK_PATH INTERFACE_HEADER_SETS INTERFACE_INCLUDE_DIRECTORIES INTERFACE_LINK_LIBRARIES ISPC_HEADER_SUFFIX LINK_LIBRARIES PCH_INSTANTIATE_TEMPLATES PCH_WARN_INVALID POSITION_INDEPENDENT_CODE RULE_LAUNCH_CUSTOM SKIP_BUILD_RPATH SOURCES SOURCE_DIR UNITY_BUILD_BATCH_SIZE UNITY_BUILD_MODE) include(CMakePrintHelpers) function(dump_targets) get_directory_property(_tlist BUILDSYSTEM_TARGETS) message(STATUS "bs: ${_tlist}") foreach (t ${_tlist}) message(STATUS "---- ${t}") get_target_property(_slist ${t} SOURCES) foreach (s ${_slist}) message(STATUS " ${s}") endforeach() cmake_print_properties(TARGETS ${t} PROPERTIES ${InterestingProps}) endforeach() endfunction() ================================================ FILE: getcontrib.sh ================================================ mkdir -p contrib curl -s -o contrib/doctest.h https://raw.githubusercontent.com/onqtam/doctest/master/doctest/doctest.h curl -s -o contrib/catch.hpp https://raw.githubusercontent.com/catchorg/Catch2/master/single_include/catch2/catch.hpp ================================================ FILE: include/idblib/idb3.h ================================================ /* * idb3.h is a library for accessing IDApro databases. * * Author: Willem Hengeveld * * * Toplevel class: IDBFile, use getsection to get a stream to the desired section, * Then create an ID0File, ID1File, NAMFile for that section. * */ #include #include #include #include #include #include #include #include #include #define dbgprint(...) // a sharedptr, so i can pass an istream around without // worrying about who owns it. typedef std::shared_ptr stream_ptr; // create vector from `n` invocations of `f` template std::vector getvec(int n, FN f) { std::vector v; while (n--) v.push_back(f()); return v; } //////////////////////////////////////////////////////////////////////// // Sometimes i need to pass backinserter iterators as pair // These functions make that possible. // ANY - backinserter == INT_MAX -> always enough space after a backinserter template int operator-(T lhs, typename std::back_insert_iterator rhs) { return INT_MAX; } // backinserter += INT -> does nothing template typename std::back_insert_iterator& operator+=(typename std::back_insert_iterator& rhs, int n) { return rhs; } // streamhelper: get little/big endian integers of various sizes from a stream // There are functions for 8, 16, 32, 64 bit little/big endian unsigned integers. // And a function for reading a database dependent word (64bit for .i64, 32bit for .idb) template class streamhelper { ISPTR _is; int _wordsize; // the wordsize of the current database public: streamhelper(ISPTR is, int wordsize) : _is(is), _wordsize(wordsize) { _is->exceptions(std::istream::failbit | std::istream::badbit); } uint8_t get8() { auto c = _is->get(); if (c==-1) throw "EOF"; return (uint8_t)c; } uint16_t get16le() { uint8_t lo = get8(); uint8_t hi = get8(); return (hi<<8) | lo; } uint16_t get16be() { uint8_t hi = get8(); uint8_t lo = get8(); return (hi<<8) | lo; } uint32_t get32le() { uint16_t lo = get16le(); uint16_t hi = get16le(); return (hi<<16) | lo; } uint32_t get32be() { uint16_t hi = get16be(); uint16_t lo = get16be(); return (hi<<16) | lo; } uint64_t get64le() { uint32_t lo = get32le(); uint32_t hi = get32le(); return (uint64_t(hi)<<32) | lo; } uint64_t get64be() { uint32_t hi = get32be(); uint32_t lo = get32be(); return (uint64_t(hi)<<32) | lo; } // function used to get the right wordsize for either .i64 or .idb file. uint64_t getword() { if (_wordsize==4) return get32le(); else if (_wordsize==8) return get64le(); throw "unsupported wordsize"; } std::string getdata(int n) { std::string str(n, char(0)); auto m = _is->readsome(&str.front(), n); str.resize(m); dbgprint("getdata -> %b\n", str); return str; } void seekg( std::istream::off_type off, std::ios_base::seekdir dir) { _is->seekg(off, dir); } void seekg( std::istream::pos_type pos ) { _is->seekg(pos); } }; // function for creating a streamhelper. template auto makehelper(ISPTR is, int wordsize = 0) { return streamhelper(is, wordsize); } // EndianTools: a collection of static functions for reading/writing // little/big endian integers of various sizes from an iterator range struct EndianTools { template static void set8(P first, P last, uint8_t w) { if (last-first < 1) throw "not enough space"; *first = w; } template static void setbe16(P first, P last, T w) { P p = first; if (last-p < 2) throw "not enough space"; set8(p, last, w>>8); p += 1; set8(p, last, w); } template static void setbe32(P first, P last, T w) { P p = first; if (last-p < 4) throw "not enough space"; setbe16(p, last, w>>16); p += 2; setbe16(p, last, w); } template static void setbe64(P first, P last, T w) { P p = first; if (last-p < 8) throw "not enough space"; setbe32(p, last, w>>32); p += 4; setbe32(p, last, w); } template static void setle16(P first, P last, T w) { P p = first; if (last-p < 2) throw "not enough space"; set8(p, last, w); p += 1; set8(p, last, w>>8); } template static void setle32(P first, P last, T w) { P p = first; if (last-p < 4) throw "not enough space"; setle16(p, last, w); p += 2; setle16(p, last, w>>16); } template static void setle64(P first, P last, T w) { P p = first; if (last-p < 8) throw "not enough space"; setle32(p, last, w); p += 4; setle32(p, last, w>>32); } template static uint8_t get8(P first, P last) { if (first>=last) throw "not enough space"; return *first; } template static uint16_t getbe16(P first, P last) { P p = first; if (last-p < 2) throw "not enough space"; uint8_t hi =get8(p, last); p += 1; uint8_t lo =get8(p, last); return (uint16_t(hi)<<8) | lo; } template static uint32_t getbe32(P first, P last) { P p = first; if (last-p < 4) throw "not enough space"; uint16_t hi =getbe16(p, last); p += 2; uint16_t lo =getbe16(p, last); return (uint32_t(hi)<<16) | lo; } template static uint64_t getbe64(P first, P last) { P p = first; if (last-p < 8) throw "not enough space"; uint32_t hi =getbe32(p, last); p += 4; uint32_t lo =getbe32(p, last); return (uint64_t(hi)<<32) | lo; } template static uint16_t getle16(P first, P last) { P p = first; if (last-p < 2) throw "not enough space"; uint8_t lo =get8(p, last); p += 1; uint8_t hi =get8(p, last); return (uint16_t(hi)<<8) | lo; } template static uint32_t getle32(P first, P last) { P p = first; if (last-p < 4) throw "not enough space"; uint16_t lo =getle16(p, last); p += 2; uint16_t hi =getle16(p, last); return (uint32_t(hi)<<16) | lo; } template static uint64_t getle64(P first, P last) { P p = first; if (last-p < 8) throw "not enough space"; uint32_t lo =getle32(p, last); p += 4; uint32_t hi =getle32(p, last); return (uint64_t(hi)<<32) | lo; } }; // stream buffer for sectionstream // This is the class doing the actual work for sectionstream. // This presents a view of a section of a random access stream. class sectionbuffer : public std::streambuf { stream_ptr _is; std::streamoff _first; std::streamoff _last; std::streampos _curpos; public: sectionbuffer(stream_ptr is, uint64_t first, uint64_t last) : _is(is), _first(first), _last(last), _curpos(0) { _is->seekg(_first); } protected: std::streampos seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) { std::streampos newpos; switch(way) { case std::ios_base::beg: newpos = off; break; case std::ios_base::cur: newpos = _curpos + off; break; case std::ios_base::end: newpos = (_last-_first) + off; break; default: throw std::ios_base::failure("bad seek direction"); } return seekpos(newpos, which); } std::streampos seekpos(std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) { if (sp<0 || sp > (_last-_first)) return -1; _is->seekg(sp+_first); return _curpos = sp; } std::streamsize showmanyc() { return (_last-_first)-_curpos; } std::streamsize xsgetn(char_type* s, std::streamsize n) { if (n<=0 || _curpos >= (_last-_first)) return 0; auto want = std::min(std::streamsize(_last-_first - _curpos), n); //auto got = _is->readsome(s, want); _is->read(s, want); auto got = want; _curpos += got; return got; } int_type underflow() { if (_curpos >= (_last-_first)) return traits_type::eof(); int r = _is->peek(); return r; } int_type uflow() { if (_curpos >= (_last-_first)) return traits_type::eof(); int r = _is->get(); if (r==traits_type::eof()) return traits_type::eof(); _curpos+=1; return r; } }; // istream restricted to a section of a seakable stream class sectionstream : public std::istream { sectionbuffer _buf; public: template sectionstream(ISPTR is, uint64_t from, uint64_t size) : std::istream(nullptr), _buf(is, from, from+size) { init(&_buf); } }; /////////////////////////////////////////////////////////////// // read .idb file, returns sectionstreams for sections // // IDBFile knows how to read sections from all types of IDApro databases. // // Compression is not yet supported. class IDBFile { stream_ptr _is; uint32_t _magic; int _fileversion; std::vector _offsets; std::vector _checksums; public: enum { MAGIC_IDA2 = 0x32414449, MAGIC_IDA1 = 0x31414449, MAGIC_IDA0 = 0x30414449, }; IDBFile(stream_ptr is) : _is(is), _magic(0), _fileversion(-1) { readheader(); } uint32_t magic() const { return _magic; } void readheader() { auto s = makehelper(_is); _magic = s.get32le(); /*zero = */ s.get16le(); auto values = getvec(6, [&](){ return s.get32le(); }); if (values[5]!=0xaabbccdd) { _fileversion = 0; for (auto v : values) _offsets.push_back(v); _offsets[5] = 0; _checksums.resize(6); return; } _fileversion = s.get16le(); if (_fileversion < 5) { /*auto unknown =*/ s.get32le(); for (auto v : values) _offsets.push_back(v); _offsets.pop_back(); _checksums = getvec(5, [&](){ return s.get32le(); }); uint32_t idsofs = s.get32le(); uint32_t idscheck = _fileversion==1 ? s.get16le() : s.get32le(); _offsets.push_back(idsofs); _checksums.push_back(idscheck); // in filever==4 there is more in the .idb header: // 0x5c, 0, 0, , 128*NUL } else { // ver 5, 6 : 64 bit fileptrs _offsets.push_back((uint64_t(values[1])<<32)|values[0]); _offsets.push_back((uint64_t(values[3])<<32)|values[2]); _offsets.push_back(s.get64le()); _offsets.push_back(s.get64le()); _offsets.push_back(s.get64le()); _checksums = getvec(5, [&](){ return s.get32le(); }); _offsets.push_back(s.get64le()); _checksums.push_back(s.get32le()); // more data in the .idb header: // 0x7c, 0, 0, , 128*NUL } } void dump() { print("IDB v%d, m=%08x\n", _fileversion, _magic); for (unsigned int i=0 ; iseekg(_offsets[i]); auto s = makehelper(_is); auto comp = s.get8(); uint64_t size = _fileversion<5 ? s.get32le() : s.get64le(); uint64_t ofs = _offsets[i] + (_fileversion<5 ? 5 : 9); return std::make_tuple(comp, ofs, size); } stream_ptr getsection(int i) { auto info = getinfo(i); if (std::get<0>(info)) throw "compression not supported"; return std::make_shared(_is, std::get<1>(info), std::get<2>(info)); } }; // search relation enum relation_t { REL_LESS, REL_LESS_EQUAL, REL_EQUAL, REL_GREATER_EQUAL, REL_GREATER, REL_RECURSE, }; // baseclass for Btree Pages // baseclass for Btree database, subclassed by v1.5, v1.6, v2.0 class BasePage { protected: stream_ptr _is; int _pagesize; uint32_t _nr; uint32_t _preceeding; int _count; // item for the entry table class Entry { public: uint32_t pagenr; int indent; int recofs; Entry() : pagenr(0), indent(0), recofs(0) { } Entry(Entry&& e) : pagenr(e.pagenr), indent(e.indent), recofs(e.recofs) { } }; std::vector _index; std::vector _keys; // only for leaf pages // IntIter, used to be able to use upper_bound on `_index` class IntIter : public std::iterator { int _ix; public: IntIter(int x) : _ix(x) { } IntIter() : _ix(0) { } IntIter(const IntIter& i) : _ix(i._ix) { } bool operator==(const IntIter& rhs) {return _ix==rhs._ix;} bool operator!=(const IntIter& rhs) {return _ix!=rhs._ix;} int operator*() const {return _ix;} int operator[](int i) {return _ix+i;} IntIter& operator++() {++_ix;return *this;} IntIter operator++(int) {IntIter tmp(*this); operator++(); return tmp;} IntIter& operator--() {--_ix;return *this;} IntIter operator--(int) {IntIter tmp(*this); operator--(); return tmp;} IntIter& operator+=(int n) { _ix += n; return *this; } IntIter& operator-=(int n) { _ix -= n; return *this; } friend IntIter operator+(int n, IntIter p) { return p+=n; } friend IntIter operator+(IntIter p, int n) { return p+=n; } friend IntIter operator-(IntIter p, int n) { return p-=n; } friend int operator-(const IntIter& p, const IntIter& q) { return p._ix-q._ix; } bool operator<(const IntIter& rhs) { return _ix(const IntIter& rhs) { return _ix>rhs._ix; } bool operator>=(const IntIter& rhs) { return _ix>=rhs._ix; } }; // unused, iterator returning Entry's class PageIter : public std::iterator { BasePage* _page; int _ix; public: PageIter(BasePage*page, int ix) : _page(page), _ix(ix) { } PageIter() : _page(nullptr), _ix(0) { } PageIter(const PageIter& i) : _page(i._page), _ix(i._ix) { } bool operator==(const PageIter& rhs) {return _ix==rhs._ix;} bool operator!=(const PageIter& rhs) {return _ix!=rhs._ix;} Entry& operator*() {return _page->getent(_ix);} Entry& operator[](int i) {return _page->getent(_ix+i);} PageIter& operator++() {++_ix;return *this;} PageIter operator++(int) {PageIter tmp(*this); operator++(); return tmp;} PageIter& operator--() {--_ix;return *this;} PageIter operator--(int) {PageIter tmp(*this); operator--(); return tmp;} PageIter& operator+=(int n) { _ix += n; return *this; } PageIter& operator-=(int n) { _ix -= n; return *this; } friend PageIter operator+(int n, PageIter p) { return p+=n; } friend PageIter operator+(PageIter p, int n) { return p+=n; } friend PageIter operator-(PageIter p, int n) { return p-=n; } friend int operator-(const PageIter& p, const PageIter& q) { return p._ix-q._ix; } bool operator<(const PageIter& rhs) { return _ix(const PageIter& rhs) { return _ix>rhs._ix; } bool operator>=(const PageIter& rhs) { return _ix>=rhs._ix; } }; public: BasePage(stream_ptr is, uint32_t nr, int pagesize) : _is(is), _pagesize(pagesize), _nr(nr), _preceeding(0), _count(0) { } virtual ~BasePage() {} uint32_t nr() const { return _nr; } bool isindex() const { return _preceeding!=0; } bool isleaf() const { return _preceeding==0; } size_t indexsize() const { return _index.size(); } virtual Entry readent() = 0; void dump() { if (_preceeding) print("prec = %05x\n", _preceeding); for (unsigned int i=0 ; i<_index.size() ; i++) print("%-b = %-b\n", getkey(i), getval(i)); } void readindex() { for (int i=0 ; i<_count ; i++) _index.emplace_back(readent()); //print("got %d entries\n", _index.size()); if (isleaf()) readkeys(); } // for a leafpage, calculate all key values void readkeys() { auto s = makehelper(_is); std::string key; for (auto & ent : _index) { _is->seekg(ent.recofs); int klen = s.get16le(); key.resize(klen+ent.indent); _is->read(&key[ent.indent], klen); dbgprint("key i=%d, l=%d -> %b\n", ent.indent, klen, key); _keys.push_back(key); } } // get the subpage for the item at positon `i` uint32_t getpage(int i) const { if (!isindex()) throw "getpage called on leaf"; if (i<0) return _preceeding; if (i>=_index.size()) { print("#%06x i=%d, max=%d\n", _nr, i, _index.size()); throw "page: i too large"; } return _index[i].pagenr; } // get key for the item at position `i` std::string getkey(int i) { auto& ent = getent(i); if (isindex()) { _is->seekg(ent.recofs); auto s = makehelper(_is); int klen = s.get16le(); dbgprint("indexkey(%d) -> l=%d\n", i, klen); return s.getdata(klen); } else if (isleaf()) { dbgprint("leafkey(%d)\n", i); return _keys[i]; } throw "not a leaf of index"; } // get value for the item at position `i` std::string getval(int i) { auto& ent = getent(i); _is->seekg(ent.recofs); auto s = makehelper(_is); int klen = s.get16le(); _is->seekg(klen, std::ios_base::cur); int vlen = s.get16le(); dbgprint("%04x: val(%d), kl=%d, vl=%d\n", ent.recofs, i, klen, vlen); return s.getdata(vlen); } Entry& getent(int i) { if (i<0 || i>=_index.size()) throw "invalid key index"; return _index[i]; } // unused //auto begin() { return PageIter(this, 0); } //auto end() { return PageIter(this, _count); } struct result { relation_t act; int index; bool operator==(const result& rhs) const { return act==rhs.act && index==rhs.index; } bool operator!=(const result& rhs) const { return !(*this==rhs); } friend std::ostream& operator<<(std::ostream& os, const result& res) { os << '{'; switch(res.act) { case REL_LESS: os << "<"; break; case REL_LESS_EQUAL: os << "<="; break; case REL_EQUAL: os << "=="; break; case REL_GREATER_EQUAL: os << ">="; break; case REL_GREATER: os << ">"; break; case REL_RECURSE: os << "r"; break; default: os << "?"; } os << res.index; os << '}'; return os; } }; // search for the key in this page. // getkey(index) ... act ... key result find(const std::string& key) { //auto i = std::upper_bound(begin(), end(), key, [](const std::string& key, const Entry& ent){ return false; }); auto i = std::upper_bound(IntIter(0), IntIter(_count), key, [this](const std::string& key, int ix){ return key < this->getkey(ix); }); if (i==IntIter(0)) { if (isindex()) return {REL_RECURSE, -1}; return {REL_GREATER, 0}; // index[0] > key } --i; int ix = i-IntIter(0); if (getkey(ix) == key) return {REL_EQUAL, ix}; if (isindex()) return {REL_RECURSE, ix}; return {REL_LESS, ix}; // index[ix] < key } }; typedef std::shared_ptr Page_ptr; // baseclass for Btree database, subclassed by v1.5, v1.6, v2.0 class BtreeBase { protected: stream_ptr _is; uint32_t _firstindex; uint32_t _pagesize; uint32_t _firstfree; uint32_t _reccount; uint32_t _pagecount; public: class Cursor { BtreeBase *_bt; struct ent { Page_ptr page; int index; ent(Page_ptr page, int index) : page(page), index(index) { } ent() : index(0) { } bool operator==(const ent& rhs) const { return page==rhs.page && index==rhs.index; } bool operator!=(const ent& rhs) const { return !(*this==rhs); } }; std::vector _stack; void dump() const { std::stringstream x; for (auto& ent : _stack) x << stringformat(" %05x:%d", ent.page->nr(), ent.index); std::cout << x.str() << std::endl; } public: Cursor(BtreeBase *bt) : _bt(bt) { } void clear() { _stack.clear(); } void next() { if (eof()) throw "cursor:EOF"; auto ent = _stack.back(); _stack.pop_back(); if (ent.page->isleaf()) { // from leaf move towards root ent.index++; while (!_stack.empty() && ent.index==ent.page->indexsize()) { ent = _stack.back(); _stack.pop_back(); ent.index++; } if (ent.indexindexsize()) { add(ent.page, ent.index); } } else { // from node move towards leaf add(ent.page, ent.index); ent.page = _bt->readpage(ent.page->getpage(ent.index)); while (ent.page->isindex()) { ent.index = -1; add(ent.page, ent.index); ent.page = _bt->readpage(ent.page->getpage(ent.index)); } ent.index = 0; add(ent.page, ent.index); } } void prev() { if (eof()) throw "cursor:EOF"; auto ent = _stack.back(); _stack.pop_back(); ent.index--; if (ent.page->isleaf()) { while (!_stack.empty() && ent.index<0) { ent = _stack.back(); _stack.pop_back(); } if (ent.index>=0) add(ent.page, ent.index); } else { add(ent.page, ent.index); while (ent.page->isindex()) { ent.page = _bt->readpage(ent.page->getpage(ent.index)); ent.index = ent.page->indexsize()-1; add(ent.page, ent.index); } } } bool eof() const { return _stack.empty(); } // for Btree.find to create cursor. void add(Page_ptr page, int index) { _stack.emplace_back(page, index); } // getting key/value from cursor pos auto getkey() const { if (eof()) throw "cursor:EOF"; auto ent = _stack.back(); return ent.page->getkey(ent.index); } auto getval() const { if (eof()) throw "cursor:EOF"; auto ent = _stack.back(); return ent.page->getval(ent.index); } bool operator==(const Cursor& rhs) const { return _stack == rhs._stack; } bool operator!=(const Cursor& rhs) const { return !(*this==rhs); } bool operator<(const Cursor& rhs) const { return getkey() < rhs.getkey(); } }; BtreeBase(stream_ptr is) : _is(is) { } virtual ~BtreeBase() { } virtual int version() const = 0; virtual void readheader() = 0; virtual Page_ptr makepage(int nr) = 0; Page_ptr readpage(int nr) { auto page = makepage(nr); page->readindex(); return page; } void dump() { print("btree v%02d ff=%d, pg=%d, root=%05x, #recs=%d #pgs=%d\n", version(), _firstfree, _pagesize, _firstindex, _reccount, _pagecount); dumptree(_firstindex); } void dumptree(int nr) { auto page = readpage(nr); page->dump(); if (page->isindex()) { dumptree(page->getpage(-1)); for (unsigned int i=0 ; iindexsize() ; i++) dumptree(page->getpage(i)); } } stream_ptr pagestream(int nr) { return std::make_shared(_is, nr*_pagesize, _pagesize); } Cursor find(relation_t rel, const std::string& key) { auto page = readpage(_firstindex); Cursor cursor(this); relation_t act; while (true) { auto res = page->find(key); dbgprint("bt.find %d : %d\n", res.act, res.index); cursor.add(page, res.index); if (res.act != REL_RECURSE) { act = res.act; break; } page = readpage(page->getpage(res.index)); } if (act == rel) { dbgprint("same -> pass\n"); // pass } else if (rel==REL_EQUAL && act!=REL_EQUAL) { cursor.clear(); dbgprint("not equal -> empty\n"); } else if ((rel==REL_LESS_EQUAL || rel==REL_GREATER_EQUAL) && act==REL_EQUAL) { dbgprint("want: <=/>=, got: == -> pass\n"); // pass } else if ((rel==REL_GREATER || rel==REL_GREATER_EQUAL) && act==REL_LESS) { dbgprint("want: >/>=, got: < -> next\n"); cursor.next(); } else if (rel==REL_GREATER && act==REL_EQUAL) { dbgprint("want: >, got: == -> next\n"); cursor.next(); } else if ((rel==REL_LESS || rel==REL_LESS_EQUAL) && act==REL_GREATER) { dbgprint("want: -> prev\n"); cursor.prev(); } else if (rel==REL_LESS && act==REL_EQUAL) { dbgprint("want: <, got: == -> prev\n"); cursor.prev(); } return cursor; } }; class Page15 : public BasePage { public: Page15(stream_ptr is, uint32_t nr, int pagesize) : BasePage(is, nr, pagesize) { auto s = makehelper(_is); _preceeding = s.get16le(); _count = s.get16le(); } virtual Entry readent() { auto s = makehelper(_is); if (isindex()) { Entry ent; ent.pagenr = s.get16le(); ent.recofs = s.get16le()+1; dbgprint("@%04x: ix ent15 %08x %04x\n", (int)_is->tellg(), ent.pagenr, ent.recofs); return ent; } else if (isleaf()) { Entry ent; ent.indent = s.get8(); /*ent.unknown = */s.get8(); ent.recofs = s.get16le()+1; dbgprint("@%04x: lf ent15 %+4d %04x\n", (int)_is->tellg(), ent.indent, ent.recofs); return ent; } throw "page not a index or leaf"; } }; class Btree15 : public BtreeBase { public: Btree15(stream_ptr is) : BtreeBase(is) { } int version() const { return 15; } void readheader() { _is->seekg(0); auto s = makehelper(_is); _firstfree = s.get16le(); _pagesize = s.get16le(); _firstindex = s.get16le(); _reccount = s.get32le(); _pagecount = s.get16le(); } virtual Page_ptr makepage(int nr) { dbgprint("page15\n"); return std::make_shared(pagestream(nr), nr, _pagesize); } }; class Page16 : public BasePage { public: Page16(stream_ptr is, uint32_t nr, int pagesize) : BasePage(is, nr, pagesize) { auto s = makehelper(_is); _preceeding = s.get32le(); _count = s.get16le(); } virtual Entry readent() { auto s = makehelper(_is); if (isindex()) { Entry ent; ent.pagenr = s.get32le(); ent.recofs = s.get16le()+1; dbgprint("@%04x: ix ent16 %08x %04x\n", (int)_is->tellg(), ent.pagenr, ent.recofs); return ent; } else if (isleaf()) { Entry ent; ent.indent = s.get8(); /*ent.unknown = */s.get8(); /*ent.unknown1 = */s.get16le(); ent.recofs = s.get16le()+1; dbgprint("@%04x: lf ent16 %+4d %04x\n", (int)_is->tellg(), ent.indent, ent.recofs); return ent; } throw "page not a index or leaf"; } }; class Btree16 : public BtreeBase { public: Btree16(stream_ptr is) : BtreeBase(is) { } int version() const { return 16; } void readheader() { _is->seekg(0); auto s = makehelper(_is); _firstfree = s.get32le(); _pagesize = s.get16le(); _firstindex = s.get32le(); _reccount = s.get32le(); _pagecount = s.get32le(); } virtual Page_ptr makepage(int nr) { dbgprint("page16\n"); return std::make_shared(pagestream(nr), nr, _pagesize); } }; // v2 b-tree pages - since idav6.7 class Page20 : public Page16 { public: Page20(stream_ptr is, int nr, int pagesize) : Page16(is, nr, pagesize) { } virtual Entry readent() { auto s = makehelper(_is); if (isindex()) { Entry ent; ent.pagenr = s.get32le(); ent.recofs = s.get16le(); dbgprint("@%04x: ix ent20 %08x %04x\n", (int)_is->tellg(), ent.pagenr, ent.recofs); return ent; } else if (isleaf()) { Entry ent; ent.indent = s.get16le(); /*ent.unknown = */s.get16le(); ent.recofs = s.get16le(); dbgprint("@%04x: lf ent20 %+4d %04x\n", (int)_is->tellg(), ent.indent, ent.recofs); return ent; } throw "page not a index or leaf"; } }; class Btree20 : public Btree16 { public: Btree20(stream_ptr is) : Btree16(is) { } int version() const { return 20; } virtual Page_ptr makepage(int nr) { dbgprint("page20\n"); return std::make_shared(pagestream(nr), nr, _pagesize); } }; // determine which btree type to create for the id0 stream. inline std::unique_ptr MakeBTree(stream_ptr is) { std::unique_ptr bt; is->seekg(0); char data[64]; is->read(data, 64); dbgprint("mkbt: %b\n", std::string(data, data+64)); if (std::equal(data+13, data+13+25, "B-tree v 1.5 (C) Pol 1990")) { bt = std::make_unique(is); } else if (std::equal(data+19, data+19+25, "B-tree v 1.6 (C) Pol 1990")) { bt = std::make_unique(is); } else if (std::equal(data+19, data+19+9, "B-tree v2")) { bt = std::make_unique(is); } else { throw "unknown btree version"; } bt->readheader(); return bt; } // NodeKeys is used to create btree keys with the right format // for the current database. class NodeKeys { int _w; public: NodeKeys(int wordsize) : _w(wordsize) { } template void setwordle(P first, P last, uint64_t w) { if (_w==8) EndianTools::setle64(first, last, w); else if (_w==4) EndianTools::setle32(first, last, w); } template void setwordbe(P first, P last, uint64_t w) { if (_w==8) EndianTools::setbe64(first, last, w); else if (_w==4) EndianTools::setbe32(first, last, w); } template int make_name_key(P first, P last, uint64_t id) { if (last-first<1+_w) throw "not enough space"; P p = first; *p++ = 'N'; setwordbe(p, last, id); p += _w; return p-first; } template int make_name_key(P first, P last, const std::string& name) { if (last-first<1+name.size()) throw "not enough space"; P p = first; *p++ = 'N'; std::copy(name.begin(), name.end(), p); p += name.size(); return p-first; } template int make_node_key(P first, P last, uint64_t nodeid) { if (last-first<1+_w) throw "not enough space"; P p = first; *p++ = '.'; setwordbe(p, last, nodeid); p += _w; return p-first; } template int make_node_key(P first, P last, uint64_t nodeid, char tag) { if (last-first<2+_w) throw "not enough space"; P p = first; *p++ = '.'; setwordbe(p, last, nodeid); p += _w; *p++ = tag; return p-first; } template int make_node_key(P first, P last, uint64_t nodeid, char tag, const std::string& hashkey) { if (last-first<2+_w+hashkey.size()) throw "not enough space"; P p = first; *p++ = '.'; setwordbe(p, last, nodeid); p += _w; *p++ = tag; // usually 'H' std::copy(hashkey.begin(), hashkey.end(), p); p += hashkey.size(); return p-first; } template int make_node_key(P first, P last, uint64_t nodeid, char tag, T index) { if (last-first<2+2*_w) throw "not enough space"; P p = first; *p++ = '.'; setwordbe(p, last, nodeid); p += _w; *p++ = tag; setwordbe(p, last, index); p += _w; return p-first; } template V make_name_key(uint64_t id) { V key; make_name_key(back_inserter(key), back_inserter(key), id); return key; } template V make_name_key(const std::string& name) { V key; make_name_key(back_inserter(key), back_inserter(key), name); return key; } template V make_node_key(uint64_t nodeid) { V key; make_node_key(back_inserter(key), back_inserter(key), nodeid); return key; } template V make_node_key(uint64_t nodeid, char tag) { V key; make_node_key(back_inserter(key), back_inserter(key), nodeid, tag); return key; } template V make_node_key(uint64_t nodeid, char tag, const std::string& hashkey) { V key; make_node_key(back_inserter(key), back_inserter(key), nodeid, tag, hashkey); return key; } template V make_node_key(uint64_t nodeid, char tag, T index) { V key; make_node_key(back_inserter(key), back_inserter(key), nodeid, tag, index); return key; } }; // convert node values to integer or string. struct NodeValues { static uint64_t getuint(const std::string& str) { switch(str.size()) { case 1: return EndianTools::get8(str.begin(), str.end()); case 2: return EndianTools::getle16(str.begin(), str.end()); case 4: return EndianTools::getle32(str.begin(), str.end()); case 8: return EndianTools::getle64(str.begin(), str.end()); } throw "unsupported int type"; } static uint64_t getuintbe(const std::string& str) { switch(str.size()) { case 1: return EndianTools::get8(str.begin(), str.end()); case 2: return EndianTools::getbe16(str.begin(), str.end()); case 4: return EndianTools::getbe32(str.begin(), str.end()); case 8: return EndianTools::getbe64(str.begin(), str.end()); } throw "unsupported int type"; } static int64_t getint(const std::string& str) { return (int64_t)getuint(str); } static std::string getstr(std::string data) { // strip terminating zeroes while (!data.empty() && data.back()==0) data.pop_back(); return data; } }; // provide access to the main part of the IDApro database. // // use 'find', 'node' and 'blob' to access nodes in the database. class ID0File { std::unique_ptr _bt; uint64_t _nodebase; int _wordsize; public: enum { INDEX = 0 }; // argument for idb.getsection() ID0File(IDBFile& idb, stream_ptr is) : _bt(MakeBTree(is)) { if (idb.magic() == IDBFile::MAGIC_IDA2) _wordsize = 8; else _wordsize = 4; _nodebase = uint64_t(0xFF)<<((_wordsize-1)*8); } uint64_t nodebase() const { return _nodebase; } bool is64bit() const { return _wordsize==8; } void dump() { _bt->dump(); } // function for creating a node key for the current database. template std::string makekey(ARGS...args) { NodeKeys nk(_wordsize); return nk.make_node_key(args...); } // function for creating a name key for the current database. template std::string makename(ARGS...args) { NodeKeys nk(_wordsize); return nk.make_name_key(args...); } // search for records in the current database by key. // relation gives the desired relation: // REL_LESS : return records less than the key. // // returns a cursor object. auto find(relation_t rel, const std::string& key) { return _bt->find(rel, key); } // search for records in the current database. // relation gives the desired relation: // REL_LESS : return records less than the key. // // returns a cursor object. template auto find(relation_t rel, uint64_t nodeid, ARGS...args) { return _bt->find(rel, makekey(nodeid, args...)); } // return a blob object as a string. std::string blob(uint64_t nodeid, char tag, uint64_t startid = 0, uint64_t lastid = 0xFFFFFFFF) { auto c = _bt->find(REL_GREATER_EQUAL, makekey(nodeid, tag, startid)); auto endkey = makekey(nodeid, tag, lastid); std::string blob; while (c.getkey() <= endkey) { blob += c.getval(); c.next(); } return blob; } // finds the nodeid by name. // // names can be labels like 'sub_1234', but also internal names like '$ structs', or 'Root Name' uint64_t node(const std::string& name) { auto c = _bt->find(REL_EQUAL, makename(name)); if (c.eof()) return 0; return NodeValues::getint(c.getval()); } // callback is called for each nodeid in the list. // examples of lists: '$ structs', '$ enums' template void enumlist(uint64_t nodeid, char tag, CB cb) { auto c = _bt->find(REL_GREATER_EQUAL, makekey(nodeid, tag)); auto endkey = makekey(nodeid, tag+1); while (c.getkey() <= endkey) cb(NodeValues::getint(c.getval())); } // 'easy' interface: return empty when record not found. // otherwise directly return the value. template std::string getdata(ARGS...args) { auto c = find(REL_EQUAL, makekey(args...)); if (c.eof()) return {}; return c.getval(); } template std::string getstr(ARGS...args) { // until ida6.7 strings were stored zero terminated. auto c = find(REL_EQUAL, makekey(args...)); if (c.eof()) return {}; return NodeValues::getstr(c.getval()); } template uint64_t getuint(ARGS...args) { auto c = find(REL_EQUAL, makekey(args...)); if (c.eof()) return {}; return NodeValues::getuint(c.getval()); } uint64_t getuint(BtreeBase::Cursor& c) { return NodeValues::getuint(c.getval()); } // returns the node name, resolves long names. std::string getname(uint64_t node) { auto c = find(REL_EQUAL, makekey(node, 'N')); if (c.eof()) return {}; auto val = c.getval(); if (val.empty()) return {}; if (val[0]==0) { val.erase(val.begin()); // bigname uint64_t nameid = NodeValues::getuintbe(val); val = blob(_nodebase, 'S', nameid*256, nameid*256+32); } return NodeValues::getstr(val); } }; #ifndef BADADDR #define BADADDR uint64_t(-1) #endif // the ID1File contains information on segments, and stores the flags for each byte. // basically this is the data for the idc GetFlags(ea) function. class ID1File { struct segment { uint32_t start_ea; uint32_t end_ea; uint32_t id1ofs; }; typedef std::vector segmentlist_t; segmentlist_t _segments; stream_ptr _is; int _wordsize; private: void open() { auto s = makehelper(_is, _wordsize); s.seekg(0); uint32_t magic = s.get32le(); if ((magic&0xFFF0FFFF)==0x306156) { // Va0 .. Va4 uint16_t nsegments = s.get16le(); uint16_t npages = s.get16le(); (void)npages; // value not used _segments.resize(nsegments); for (unsigned i=0 ; i _namedoffsets; mutable bool _namesloaded; stream_ptr _is; int _wordsize; uint32_t _nnames; uint64_t _listofs; public: enum { INDEX = 2 }; // argument for idb.getsection() NAMFile(IDBFile& idb, stream_ptr is) : _namesloaded(false), _is(is) { if (idb.magic() == IDBFile::MAGIC_IDA2) _wordsize = 8; else _wordsize = 4; open(); } void open() { auto s = makehelper(_is, _wordsize); s.seekg(0); uint32_t magic = s.get32le(); if ((magic&0xFFF0FFFF)==0x306156) { // Va0 .. Va4 uint16_t npages = s.get16le(); // nr of chunks uint16_t eof = s.get16le(); uint64_t unknown = s.getword(); (void)npages; (void)eof; // value not used _nnames = s.getword(); // nr of names _listofs = s.getword(); // page size dbgprint("nam: np=%d, eof=%d, nn=%d, ofs=%08x\n", npages, eof, _nnames, _listofs); if (unknown) print("!! nam.unknown=%08x\n", unknown); } else if (magic==0x2a4156) { uint32_t unk1 = s.get32le(); // 3 uint32_t npages = s.get32le(); // nr of chunks uint32_t unk2 = s.get32le(); // 0x800 uint32_t eof = s.get32le(); // 0x15 uint64_t unknown = s.getword(); // 0 (void)unk1; (void)npages; (void)unk2; (void)eof; (void)unknown; // values not used _listofs = 0x2000; _nnames = s.getword(); // nr of names dbgprint("nam: np=%d, eof=%d, nn=%d, ofs=%08x\n", npages, eof, _nnames, _listofs); } else { throw "invalid NAM"; } if (_wordsize==8) _nnames /= 2; } void loadoffsets() const { if (_namesloaded) return; _namedoffsets.reserve(_nnames); auto s = makehelper(_is, _wordsize); s.seekg(_listofs); for (unsigned i=0 ; i<_nnames ; i++) _namedoffsets.push_back(s.getword()); _namesloaded = true; } int numnames() const { loadoffsets(); return _namedoffsets.size(); } template void enumerate(FN fn) const { loadoffsets(); std::for_each(_namedoffsets.begin(), _namedoffsets.end(), fn); } // finds nearest named item uint64_t findname(uint64_t ea) const { loadoffsets(); if (_namedoffsets.empty()) return BADADDR; auto i= std::upper_bound(_namedoffsets.begin(), _namedoffsets.end(), ea); if (i==_namedoffsets.begin()) { // address before first: return first named item return *i; } i--; return *i; } uint64_t firstnamed() const { loadoffsets(); if (_namedoffsets.empty()) return BADADDR; return *_namedoffsets.begin(); } }; // packs/unpacks structured data class BaseUnpacker { public: virtual bool eof() const = 0; virtual uint16_t next16() = 0; virtual uint32_t next32() = 0; virtual uint64_t nextword() = 0; virtual ~BaseUnpacker() { } }; template class Unpacker : public BaseUnpacker { P _p; P _last; bool _use64; public: Unpacker(P first, P last, bool use64) : _p(first), _last(last), _use64(use64) { } bool eof() const { return _p>=_last; } /* * 7 bit - values 0 .. 0x7f * 14 bit - values 0x80 .. 0x3fff, orred with 0x8000 * 0xFF + 2 byte - any 16 bit val */ uint16_t next16() { if (_p>=_last) throw "unpack: no data"; uint8_t byte = *_p++; if (byte==0xff) { if (_p+2>_last) throw "unpack: no data"; uint16_t value = EndianTools::getbe16(_p, _last); _p += 2; return value; } if (byte<0x80) return byte; _p--; if (_p+2>_last) throw "unpack: no data"; uint16_t value = EndianTools::getbe16(_p, _last) & 0x3FFF; _p += 2; return value; } /* * 7 bit - values 0 .. 0x7f * 14 bit - values 0x80 .. 0x3fff, orred with 0x8000 * 29 bit - values 0x4000 .. 0x1fffffff, orred with 0xc0000000 * * 0xFF + 4 byte - any 32 bit val - in .idb * * in .i64, 64 bit values are encoded as the low 32bit value followed by high * 32 bit value using any of the above encodings. So the byte order is kind of twisted. * */ uint32_t next32() { if (_p>=_last) throw "unpack: no data"; uint8_t byte = *_p++; if (byte==0xff) { if (_p+4>_last) throw "unpack: no data"; uint32_t value = EndianTools::getbe32(_p, _last); _p += 4; return value; } if (byte<0x80) { return byte; } _p--; if (byte<0xc0) { if (_p+2>_last) throw "unpack: no data"; uint32_t value = EndianTools::getbe16(_p, _last) & 0x3FFF; _p += 2; return value; } if (_p+4>_last) throw "unpack: no data"; uint32_t value = EndianTools::getbe32(_p, _last) & 0x1FFFFFFF; _p += 4; return value; } uint64_t nextword() { uint64_t lo = next32(); if (_use64) { uint64_t hi = next32(); return lo|(hi<<32); } return lo; } }; template static void idaunpack(I first, I last, O out) { Unpacker p(first, last, false); while (!p.eof()) *out++ = p.next32(); } template Unpacker

makeunpacker(P first, P last, bool use64) { return Unpacker

(first, last, use64); } typedef std::vector DwordVector; // used mostly in lists, where the stored value is one less than the actually used value. // lists like: $enums, $structs, $scripts, values of enums, masks of bitfields, values of bitmasks // backref of bitfield value to mask. inline uint64_t minusone(uint64_t id) { if (id) return id-1; return 0; } // 'd' xref-from -> points to used type // 'D' xref-to -> points to type users class StructMember { /* * (membernode, N) = struct.member-name * (membernode, A, 3) = structid+1 * (membernode, A, 8) = * (membernode, A, 11) = enumid+1 * (membernode, A, 16) = flag? -- 4:variable length flag? * (membernode, S, 0x3000) = type (set with 'Y') * (membernode, S, 0x3001) = names used in 'type' * (membernode, S, 5) = array type? * (membernode, S, 9) = offset-type * (membernode, D, address) = xref-type * (membernode, d, structid) = xref-type -- for sub-structs */ ID0File& _id0; uint64_t _nodeid; uint64_t _skip; // nr of bytes to skip before this member uint64_t _size; // size in bytes of this member uint32_t _flags; uint32_t _props; uint64_t _ofs; public: StructMember(ID0File& id0, BaseUnpacker& spec) : _id0(id0) { _nodeid = spec.nextword(); _skip = spec.nextword(); _size = spec.nextword(); _flags = spec.next32(); _props = spec.next32(); _ofs = 0; } void setofs(uint64_t ofs) { _ofs = ofs; } uint64_t nodeid() const { return _nodeid + _id0.nodebase(); } uint64_t skip() const { return _skip; } uint64_t size() const { return _size; } uint32_t flags() const { return _flags; } uint32_t props() const { return _props; } uint64_t offset() const { return _ofs; } std::string name() const { return _id0.getname(nodeid()); } uint64_t enumid() const { return minusone(_id0.getuint(nodeid(), 'A', 11)); } uint64_t structid() const { return minusone(_id0.getuint(nodeid(), 'A', 3)); } std::string comment(bool repeatable) const { return _id0.getstr(nodeid(), 'S', repeatable ? 1 : 0); } std::string ptrinfo() const { return _id0.getdata(nodeid(), 'S', 9); } // types from typeinfo: // 11 00 _BYTE // 32 00 char // 22 00 unsigned __int8 // 02 00 __int8 // ='WORD" WORD // 03 00 short // 07 00 int // 03 00 __int16 // 28 00 _BOOL2 // 10 00 _WORD std::string typeinfo() const { return _id0.getdata(nodeid(), 'S', 0x3000); } }; // access structs and struct members. class Struct { /* * (structnode, N) = structname * (structnode, D, address) = xref-type * (structnode, M, 0) = packed struct info * (structnode, S, 27) = packed value(addr, byte) */ ID0File& _id0; uint64_t _nodeid; uint32_t _flags; std::vector _members; uint32_t _seqnr; uint32_t _size; class Iterator : public std::iterator { const Struct* _s; int _ix; public: Iterator(const Struct*s, int ix) : _s(s), _ix(ix) { } Iterator() : _s(nullptr), _ix(0) { } Iterator(const Iterator& i) : _s(i._s), _ix(i._ix) { } bool operator==(const Iterator& rhs) {return _ix==rhs._ix;} bool operator!=(const Iterator& rhs) {return _ix!=rhs._ix;} const StructMember& operator*() {return _s->member(_ix);} const StructMember& operator[](int i) {return _s->member(_ix+i);} Iterator& operator++() {++_ix;return *this;} Iterator operator++(int) {Iterator tmp(*this); operator++(); return tmp;} Iterator& operator--() {--_ix;return *this;} Iterator operator--(int) {Iterator tmp(*this); operator--(); return tmp;} Iterator& operator+=(int n) { _ix += n; return *this; } Iterator& operator-=(int n) { _ix -= n; return *this; } friend Iterator operator+(int n, Iterator p) { return p+=n; } friend Iterator operator+(Iterator p, int n) { return p+=n; } friend Iterator operator-(Iterator p, int n) { return p-=n; } friend int operator-(const Iterator& p, const Iterator& q) { return p._ix-q._ix; } bool operator<(const Iterator& rhs) { return _ix(const Iterator& rhs) { return _ix>rhs._ix; } bool operator>=(const Iterator& rhs) { return _ix>=rhs._ix; } }; public: Struct(ID0File& id0, uint64_t nodeid) : _id0(id0), _nodeid(nodeid) { auto spec = _id0.blob(_nodeid, 'M'); auto p = makeunpacker(spec.begin(), spec.end(), _id0.is64bit()); _flags = p.next32(); uint32_t nmember = p.next32(); uint64_t ofs = 0; while (nmember--) { _members.emplace_back(_id0, p); ofs += _members.back().skip(); _members.back().setofs(ofs); ofs += _members.back().size(); } _size = ofs; if (!p.eof()) _seqnr = p.next32(); else _seqnr = 0; } std::string name() const { return _id0.getname(_nodeid); } std::string comment(bool repeatable) const { return _id0.getstr(_nodeid, 'S', repeatable ? 1 : 0); } int nmembers() const { return _members.size(); } uint32_t flags() const { return _flags; } uint32_t seqnr() const { return _seqnr; } uint32_t size() const { return _size; } Iterator begin() const { return Iterator(this, 0); } Iterator end() const { return Iterator(this, nmembers()); } const StructMember& member(int ix) const { return _members[ix]; } }; class EnumMember { /* * (membernode, N) = membername * (membernode, A, -2) = enumnode + 1 * (membernode, A, -3) = member value */ ID0File& _id0; uint64_t _nodeid; uint64_t _value; public: EnumMember(ID0File& id0, uint64_t nodeid) : _id0(id0), _nodeid(nodeid) { _value = _id0.getuint(_nodeid, 'A', -3); } // 'A', -2 -> points to enum node uint64_t nodeid() const { return _nodeid; } uint64_t value() const { return _value; } std::string name() const { return _id0.getname(nodeid()); } std::string comment(bool repeatable) const { return _id0.getstr(nodeid(), 'S', repeatable ? 1 : 0); } }; // get properties of an enum. // bitfields and enums are both in the '$ enums' list. class Enum { /* * (enumnode, N) = enum-name * (enumnode, A, -1) = nr of values * (enumnode, A, -3) = representation * (enumnode, A, -5) = flags: bitfield, hidden, ... * (enumnode, A, -8) = * (enumnode, E, value) = valuenode + 1 */ ID0File& _id0; uint64_t _nodeid; public: Enum(ID0File& id0, uint64_t nodeid) : _id0(id0), _nodeid(nodeid) { } uint64_t nodeid() const { return _nodeid; } uint64_t count() const { return _id0.getuint(_nodeid, 'A', -1); } // >>20 : 0x11=hex, 0x22=dec, 0x77=oct, 0x66=bin, 0x33=char // values: FF_0NUMx|FF_1NUMx x=H,D,O,B,CHAR // // >>16 : 0x2 = signed : FF_SIGN uint32_t representation() const { return _id0.getuint(_nodeid, 'A', -3); } // bit0 = bitfield ENUM_FLAGS_IS_BF // bit1 = hidden ENUM_FLAGS_HIDDEN // bit2 = fromtil ENUM_FLAGS_FROMTIL // bit5-3 = width 0..7 = (0,1,2,4,8,16,32,64) // bit6 = ghost ENUM_FLAGS_GHOST uint32_t flags() const { return _id0.getuint(_nodeid, 'A', -5); } // 'A',-8 -> index in $enums list std::string name() const { return _id0.getname(_nodeid); } std::string comment(bool repeatable) const { return _id0.getstr(_nodeid, 'S', repeatable ? 1 : 0); } auto first() const { return _id0.find(REL_GREATER_EQUAL, _id0.makekey(_nodeid, 'E')); } std::string lastkey() const { return _id0.makekey(_nodeid, 'F'); } EnumMember getvalue(BtreeBase::Cursor& c) const { return EnumMember(_id0, minusone(_id0.getuint(c))); } }; class BitfieldValue { ID0File& _id0; uint64_t _nodeid; uint64_t _value; uint64_t _mask; public: BitfieldValue(ID0File& id0, uint64_t nodeid) : _id0(id0), _nodeid(nodeid) { _value = _id0.getuint(_nodeid, 'A', -3); _mask = _id0.getuint(_nodeid, 'A', -6) - 1; } uint64_t nodeid() const { return _nodeid; } std::string name() const { return _id0.getname(nodeid()); } std::string comment(bool repeatable) const { return _id0.getstr(nodeid(), 'S', repeatable ? 1 : 0); } uint64_t value() const { return _value; } uint64_t mask() const { return _mask; } // 'A', -2 -> points to enum node // 'A', -6 -> minusone -> maskid from : (enum, 'm', maskid) }; class BitfieldMask { ID0File& _id0; uint64_t _nodeid; uint64_t _mask; public: BitfieldMask(ID0File& id0, uint64_t nodeid, uint64_t mask) : _id0(id0), _nodeid(nodeid), _mask(mask) { } // BitfieldMask(const BitfieldMask& bf) // : _id0(bf._id0), _nodeid(bf._nodeid), _mask(bf._mask) // { // } uint64_t nodeid() const { return _nodeid; } std::string name() const { return _id0.getname(nodeid()); } std::string comment(bool repeatable) const { return _id0.getstr(nodeid(), 'S', repeatable ? 1 : 0); } uint64_t mask() const { return _mask; } auto first() const { return _id0.find(REL_GREATER_EQUAL, _id0.makekey(_nodeid, 'E')); } std::string lastkey() const { return _id0.makekey(_nodeid, 'F'); } BitfieldValue getvalue(BtreeBase::Cursor& c) const { return BitfieldValue(_id0, minusone(_id0.getuint(c))); } }; // get properties of a bitfield. // bitfields and enums are both in the '$ enums' list. class Bitfield { ID0File& _id0; uint64_t _nodeid; public: Bitfield(ID0File& id0, uint64_t nodeid) : _id0(id0), _nodeid(nodeid) { } uint64_t count() const { return _id0.getuint(_nodeid, 'A', -1); } // >>20 : 0x11=hex, 0x22=dec, 0x77=oct, 0x66=bin, 0x33=char // values: FF_0NUMx|FF_1NUMx x=H,D,O,B,CHAR // // >>16 : 0x2 = signed : FF_SIGN uint32_t representation() const { return _id0.getuint(_nodeid, 'A', -3); } // bit0 = bitfield ENUM_FLAGS_IS_BF // bit1 = hidden ENUM_FLAGS_HIDDEN // bit2 = fromtil ENUM_FLAGS_FROMTIL // bit5-3 = width 0..7 = (0,1,2,4,8,16,32,64) // bit6 = ghost ENUM_FLAGS_GHOST uint32_t flags() const { return _id0.getuint(_nodeid, 'A', -5); } // 'A',-8 -> index in $enums list std::string name() const { return _id0.getname(_nodeid); } std::string comment(bool repeatable) const { return _id0.getstr(_nodeid, 'S', repeatable ? 1 : 0); } // for bitmasks there is an extra level: 'm' in between. auto first() const { return _id0.find(REL_GREATER_EQUAL, _id0.makekey(_nodeid, 'm')); } std::string lastkey() const { return _id0.makekey(_nodeid, 'n'); } BitfieldMask getmask(BtreeBase::Cursor& c) { auto key = c.getkey(); uint64_t mask; // get the mask from the key index, // ... not really nescesary, since we can also get the mask // from the BitfieldValue : node('A',-6) - 1 if (_id0.is64bit() ) { assert(key.size()==18); mask = EndianTools::getbe64(&key[10], &key[10]+8); } else { assert(key.size()==10); mask = EndianTools::getbe32(&key[6], &key[6]+4); } return BitfieldMask(_id0, minusone(_id0.getuint(c)), mask); } }; // access scripts by nodeid, // use name(), language() and body() to access // the contents of the script. class Script { ID0File& _id0; uint64_t _nodeid; public: Script(ID0File& id0, uint64_t nodeid) : _id0(id0), _nodeid(nodeid) { } std::string name() const { return _id0.getstr(_nodeid, 'S', 0); } std::string language() const { return _id0.getstr(_nodeid, 'S', 1); } std::string body() const { return NodeValues::getstr(_id0.blob(_nodeid, 'X')); } }; template class List { /* * (listnode, 'N') = listname * (listnode, 'A', -1) = list size <-- not for '$ scriptsnippets' * (listnode, 'A', seqnr) = itemnode+1 * * (listnode, 'Y', itemnode) = seqnr <-- only for '$ enums' * * (listnode, 'Y', 0) = list size <-- only for '$ scriptsnippets' * (listnode, 'Y', 1) = ? <-- only for '$ scriptsnippets' */ ID0File& _id0; BtreeBase::Cursor _c; std::string _endkey; public: List(ID0File& id0, uint64_t nodeid) : _id0(id0), _c(_id0.find(REL_GREATER, _id0.makekey(nodeid, 'A'))) { _endkey = _id0.makekey(nodeid, 'A', -1); } bool eof() const { return !(_c.getkey() < _endkey); } T next() { uint64_t id = minusone(_id0.getuint(_c)); _c.next(); return T(_id0, id); } }; ================================================ FILE: tests/CMakeLists.txt ================================================ find_package(doctest REQUIRED) file(GLOB UnittestSrc *.cpp) add_executable(idbutil_unittests ${UnittestSrc}) set_property(TARGET idbutil_unittests PROPERTY OUTPUT_NAME unittests) target_link_libraries(idbutil_unittests PRIVATE cpputils idblib doctest::doctest) target_compile_definitions(idbutil_unittests PRIVATE WITH_DOCTEST) include(CTest) include(doctest OPTIONAL RESULT_VARIABLE res) if (res STREQUAL NOTFOUND) add_test(NAME IdbutilTest COMMAND idbutil_unittests) else() doctest_discover_tests(idbutil_unittests) endif() ================================================ FILE: tests/test-idb3.cpp ================================================ #include "unittestframework.h" #include #include std::string CreateTestIndexPage(int pagesize) { std::string page(pagesize, char(0)); auto oi = page.begin(); auto ei = page.begin() + pagesize/2; auto od = page.begin() + pagesize/2; auto ed = page.end(); auto et = EndianTools(); et.setle32(oi, ei, 122); oi += 4; et.setle16(oi, ei, 3); oi += 2; auto addkv = [&](const std::string& key, const std::string& val, int pagenr) { et.setle32(oi, ei, pagenr); oi += 4; et.setle16(oi, ei, od-page.begin()); oi += 2; et.setle16(od, ed, key.size()); od += 2; std::copy(key.begin(), key.end(), od); od += key.size(); et.setle16(od, ed, val.size()); od += 2; std::copy(val.begin(), val.end(), od); od += val.size(); }; addkv("Nabcde", std::string{"\x01\x00\x00\xFF",4}, 123); addkv("Nbcdef", std::string{"\x02\x00\x00\xFF",4}, 125); addkv("Ncdef", std::string{"\x03\x00\x00\xFF",4}, 127); return page; } TEST_CASE("TestIndexPage") { auto tstdata = CreateTestIndexPage(2048); auto page = std::make_unique(std::make_shared(tstdata), 1, 2048); page->readindex(); CHECK( page->isindex() == true ); CHECK( page->isleaf() == false ); CHECK( page->getpage(-1) == 122 ); CHECK( page->getpage(0) == 123 ); CHECK( page->getpage(1) == 125 ); CHECK( page->getpage(2) == 127 ); CHECK_THROWS( page->getpage(3) ); CHECK( page->getkey(0) == "Nabcde" ); CHECK( page->getkey(1) == "Nbcdef" ); CHECK( page->getkey(2) == "Ncdef" ); CHECK_THROWS( page->getkey(3) ); CHECK_FALSE(page->getval(0) == std::string{"fail"}); CHECK( page->getval(0) == (std::string{"\x01\x00\x00\xFF",4}) ); CHECK( page->getval(1) == (std::string{"\x02\x00\x00\xFF",4}) ); CHECK( page->getval(2) == (std::string{"\x03\x00\x00\xFF",4}) ); CHECK_FALSE(page->find("fail") == (BasePage::result{REL_EQUAL,2})); CHECK( page->find("N") == (BasePage::result{REL_RECURSE,-1}) ); CHECK( page->find("Nabcde") == (BasePage::result{REL_EQUAL,0}) ); CHECK( page->find("Nbcdef") == (BasePage::result{REL_EQUAL,1}) ); CHECK( page->find("Nbzzzz") == (BasePage::result{REL_RECURSE,1}) ); CHECK( page->find("Ncdef") == (BasePage::result{REL_EQUAL,2}) ); CHECK( page->find("Nzzzz") == (BasePage::result{REL_RECURSE,2}) ); } std::string CreateTestLeafPage(int pagesize) { std::string page(pagesize, char(0)); auto oi = page.begin(); auto ei = page.begin() + pagesize/2; auto od = page.begin() + pagesize/2; auto ed = page.end(); auto et = EndianTools(); et.setle32(oi, ei, 0); oi += 4; et.setle16(oi, ei, 3); oi += 2; auto addkv = [&](const std::string& key, const std::string& val, int indent) { et.setle32(oi, ei, indent); oi += 4; et.setle16(oi, ei, od-page.begin()); oi += 2; et.setle16(od, ed, key.size()-indent); od += 2; std::copy(key.begin()+indent, key.end(), od); od += key.size()-indent; et.setle16(od, ed, val.size()); od += 2; std::copy(val.begin(), val.end(), od); od += val.size(); }; addkv("Nabcde", std::string{"\x01\x00\x00\xFF",4}, 0); addkv("Nbcdef", std::string{"\x02\x00\x00\xFF",4}, 1); addkv("Ncdef", std::string{"\x03\x00\x00\xFF",4}, 1); return page; } void TestLeafPage() { auto tstdata = CreateTestLeafPage(2048); auto page = std::make_unique(std::make_shared(tstdata), 1, 2048); page->readindex(); CHECK( page->isindex() == false ); CHECK( page->isleaf() == true ); CHECK_THROWS( page->getpage(0) ); CHECK( page->getkey(0) == "Nabcde" ); CHECK( page->getkey(1) == "Nbcdef" ); CHECK( page->getkey(2) == "Ncdef" ); CHECK_THROWS( page->getkey(3) ); CHECK_FALSE( page->getval(0) == std::string{"fail"} ); CHECK( page->getval(0) == (std::string{"\x01\x00\x00\xFF",4}) ); CHECK( page->getval(1) == (std::string{"\x02\x00\x00\xFF",4}) ); CHECK( page->getval(2) == (std::string{"\x03\x00\x00\xFF",4}) ); CHECK_FALSE( page->find("fail") == (BasePage::result{REL_EQUAL,2}) ); CHECK( page->find("N") == (BasePage::result{REL_GREATER,0}) ); CHECK( page->find("Nabcde") == (BasePage::result{REL_EQUAL,0}) ); CHECK( page->find("Nbcdef") == (BasePage::result{REL_EQUAL,1}) ); CHECK( page->find("Nbzzzz") == (BasePage::result{REL_LESS,1}) ); CHECK( page->find("Ncdef") == (BasePage::result{REL_EQUAL,2}) ); CHECK( page->find("Nzzzz") == (BasePage::result{REL_LESS,2}) ); } /* streamhelper unittest */ TEST_CASE("test_streamhelper") { auto f = makehelper(std::make_shared("3456789a")); // read various chunks of the stream. CHECK( f.getdata(3) == "345" ); CHECK( f.getdata(8) == "6789a" ); CHECK( f.getdata(8) == "" ); f.seekg(-1, std::ios_base::end); CHECK( f.getdata(8) == "a" ); f.seekg(3); CHECK( f.getdata(2) == "67" ); f.seekg(-2,std::ios_base::cur); CHECK( f.getdata(2) == "67" ); f.seekg(2,std::ios_base::cur); CHECK( f.getdata(2) == "a" ); f.seekg(0); CHECK( f.get32le() == 0x36353433 ); CHECK( f.get32be() == 0x37383961 ); // seek to end of stream f.seekg(8); // read at EOF should return an empty string. CHECK( f.getdata(1) == "" ); // seek beyond end of stream should throw an exception CHECK_THROWS( f.seekg(9) ); CHECK_THROWS( f.getdata(1) ); } /* unittest for EndianTools */ TEST_CASE("test_EndianTools") { EndianTools et; uint8_t b[64]; int i; for (i=0 ; i<64 ; i++) b[i] = 0xAA; CHECK_THROWS( et.setle64(b, b+7, 0x12345678LL) ); et.setle64(b, b+8, 0x9abcdef12345678LL); uint8_t little_endian_number[9] = { 0x78, 0x56, 0x34, 0x12, 0xef, 0xcd, 0xab, 0x09, 0xAA }; CHECK(std::equal(b, b+9, little_endian_number)); CHECK_THROWS( et.getle64(b, b+7) ); CHECK(et.getle64(b, b+8)==0x9abcdef12345678LL); et.setbe64(b, b+8, 0x9abcdef12345678LL); uint8_t big_endian_number[9] = { 0x09,0xab,0xcd,0xef,0x12,0x34,0x56,0x78,0xAA }; CHECK(std::equal(b, b+9, big_endian_number)); CHECK_THROWS( et.getbe64(b, b+7) ); CHECK(et.getbe64(b, b+8)==0x9abcdef12345678LL); } /* unittest for sectionstream */ TEST_CASE("test_StreamSection") { auto f = makehelper(std::make_shared(std::make_shared("0123456789abcdef"), 3, 8)); CHECK( f.getdata(3) == "345" ); // should return what was asked for. CHECK( f.getdata(8) == "6789a" ); // should return less than requested CHECK( f.getdata(8) == "" ); // should return nothing f.seekg(-1, std::ios_base::end); CHECK( f.getdata(8) == "a" ); f.seekg(3); CHECK( f.getdata(2) == "67" ); f.seekg(-2,std::ios_base::cur); CHECK( f.getdata(2) == "67" ); f.seekg(2,std::ios_base::cur); CHECK( f.getdata(2) == "a" ); f.seekg(8); CHECK( f.getdata(1) == "" ); //CHECK_THROWS( f.seekg(9) ); } TEST_CASE("test_NodeValues") { CHECK( NodeValues::getuint("\x12\x34\x45\x56\x67\x78\x89\x9a") == 0x9a89786756453412 ); CHECK( NodeValues::getuint("\x12\x34\x45\x56") == 0x56453412 ); CHECK( NodeValues::getuint("\x12\x34") == 0x3412 ); CHECK( NodeValues::getuint("\x12") == 0x12 ); } TEST_CASE("test_Packer") { std::string val("\x00\x04\x88\xf1\x00\x04\xc0\x20\x00\x04\x01\x88\xf2\x00\x04\xc0\x20\x00\x04\x01\x88\xf3\x00\x04\xc0\x25\x50\x04\x11\x88\xf4\x00\x04\xc0\x25\x50\x04\x11\x02", 39); DwordVector nums; idaunpack(val.begin(), val.end(), std::back_inserter(nums)); DwordVector wrong = { 0x00 }; CHECK( nums != wrong ); CHECK_FALSE( nums == wrong ); DwordVector check = { 0x00,0x04,0x8f1,0x00,0x04,0x0200004,0x01,0x8f2,0x00,0x04,0x0200004,0x01,0x8f3,0x00,0x04,0x0255004,0x11,0x8f4,0x00,0x04,0x0255004,0x11,0x02 }; CHECK(nums == check); } ================================================ FILE: tests/unittestframework.h ================================================ /* * catch is available from: https://github.com/catchorg/Catch2 * doctest is available from https://github.com/onqtam/doctest * * * Doctest is almost a drop-in replacement for Catch. * Though Catch has a few more features, and works without any restrictions, * doctest has much faster compilation times, our 44 unittests build * takes about 13 minutes to build with catch, or about 3.5 minutes when * using doctest. * */ #if !defined(USE_CATCH) && !defined(USE_DOCTEST) #define USE_DOCTEST #endif #ifdef USE_CATCH #ifdef UNITTESTMAIN #define CATCH_CONFIG_MAIN #endif //#define CATCH_CONFIG_ENABLE_TUPLE_STRINGMAKER #define CATCH_CONFIG_ENABLE_ALL_STRINGMAKERS #if __has_include() #include #elif __has_include("single_include/catch.hpp") #include "single_include/catch.hpp" #elif __has_include("contrib/catch.hpp") #include "contrib/catch.hpp" #else #error "Could not find catch.hpp" #endif #define SKIPTEST , "[!hide]" // doctest has suites, catch doesn't. #define TEST_SUITE(x) namespace #elif defined(USE_DOCTEST) #ifdef UNITTESTMAIN #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN #endif #include #if __has_include() #include #elif __has_include("contrib/doctest.h") #include "contrib/doctest.h" #elif __has_include("single_include/doctest.h") #include "single_include/doctest.h" #else #error "Could not find doctest.h" #endif #define SECTION(...) SUBCASE(__VA_ARGS__) #define SKIPTEST * doctest::skip(true) #define CHECK_THAT(a, b) #else #error define either USE_CATCH or USE_DOCTEST #endif #define IN_UNITTEST 1 ================================================ FILE: tests/unittests.cpp ================================================ #define UNITTESTMAIN #include "unittestframework.h" // including all here again, so we will catch linking errors. #include TEST_CASE("main") { CHECK(true); } ================================================ FILE: tools/CMakeLists.txt ================================================ add_executable(idbtool idbtool.cpp) target_link_libraries(idbtool PRIVATE idasdk idblib cpputils) if (TARGET gmp) target_link_libraries(idbtool PRIVATE gmp) endif() ================================================ FILE: tools/idbtool.cpp ================================================ /* * idbtool: a tool for viewing idb files without running ida. * * Author: Willem Hengeveld * */ #include #include #include #include #include #include #include #ifdef HAVE_LIBGMP #include #endif #include #include int verbose = 0; #ifdef HAVE_LIBGMP /* * decode license info from idb */ // defines for making the intention of our mpz_import parameters clear to the reader. #define ORDER_LS_FIRST -1 #define ORDER_MS_FIRST 1 // function for converting a range of bytes to a gmp bignum object. inline mpz_class lstompz(const uint8_t *first, const uint8_t *last) { mpz_class m; int endian= 0; // endianness does not matter for bytes int nails= 0; // all bits used int order= ORDER_LS_FIRST; mpz_import(m.get_mpz_t(), last-first, order, sizeof(uint8_t), endian, nails, first); return m; } // function for converting a list of bytes to a gmp bignum object. inline mpz_class lstompz(const std::string& v) { return lstompz((uint8_t*)&v[0], (uint8_t*)&v[0]+v.size()); } // function for converting a gmp bignum object to a list of bytes. inline std::string mpztoms(int requiredbytes, const mpz_class& m) { int endian= 0; // endianness does not matter for bytes int nails= 0; // all bits used int order= ORDER_MS_FIRST; if (m<0) print("PROBLEM: can't convert negative mpz to bytes\n"); size_t n= mpz_sizeinbase(m.get_mpz_t(), 256); if (requiredbytes==0) requiredbytes= n; std::string v(requiredbytes, char(0)); mpz_export(&v[v.size()-n], &n, order, sizeof(uint8_t), endian, nails, m.get_mpz_t()); return v; } // decrypt the contents of the '$ original user' node. std::string decryptuser(const std::string& encvector) { mpz_class exp(0x13); uint8_t modbytes[]= { 0xED,0xFD,0x42,0x5C,0xF9,0x78,0x54,0x6E,0x89,0x11,0x22,0x58,0x84,0x43,0x6C,0x57, 0x14,0x05,0x25,0x65,0x0B,0xCF,0x6E,0xBF,0xE8,0x0E,0xDB,0xC5,0xFB,0x1D,0xE6,0x8F, 0x4C,0x66,0xC2,0x9C,0xB2,0x2E,0xB6,0x68,0x78,0x8A,0xFC,0xB0,0xAB,0xBB,0x71,0x80, 0x44,0x58,0x4B,0x81,0x0F,0x89,0x70,0xCD,0xDF,0x22,0x73,0x85,0xF7,0x5D,0x5D,0xDD, 0xD9,0x1D,0x4F,0x18,0x93,0x7A,0x08,0xAA,0x83,0xB2,0x8C,0x49,0xD1,0x2D,0xC9,0x2E, 0x75,0x05,0xBB,0x38,0x80,0x9E,0x91,0xBD,0x0F,0xBD,0x2F,0x2E,0x6A,0xB1,0xD2,0xE3, 0x3C,0x0C,0x55,0xD5,0xBD,0xDD,0x47,0x8E,0xE8,0xBF,0x84,0x5F,0xCE,0xF3,0xC8,0x2B, 0x9D,0x29,0x29,0xEC,0xB7,0x1F,0x4D,0x1B,0x3D,0xB9,0x6E,0x3A,0x8E,0x7A,0xAF,0x93, }; mpz_class mod= lstompz(modbytes, modbytes+sizeof(modbytes)); mpz_class val= lstompz(encvector); mpz_class res; mpz_powm(res.get_mpz_t(), val.get_mpz_t(), exp.get_mpz_t(), mod.get_mpz_t()); return mpztoms(sizeof(modbytes)-1, res); } #endif /* * dump structs + unions */ void dumpstructmember(const StructMember& mem) { print(" %02x %02x %08x %02x: %-40s", mem.skip(), mem.size(), mem.flags(), mem.props(), mem.name()); uint64_t enumid = mem.enumid(); if (enumid) print(" enum %08x", enumid); uint64_t structid = mem.structid(); if (structid) print(" struct %08x", structid); auto ptrinfo = mem.ptrinfo(); if (!ptrinfo.empty()) print(" ptr %b", ptrinfo); auto type= mem.typeinfo(); if (!type.empty()) print(" type %b", type); print("\n"); return; } void dumpstruct(const Struct& s) { print("struct %s, 0x%x, 0x%x\n", s.name(), s.flags(), s.seqnr()); for (const auto& mem : s) dumpstructmember(mem); } /* * dump bitfields */ void dumpbfvalue(const BitfieldValue& val) { print(" %16x %s\n", val.value(), val.name()); } void dumpbfmask(const BitfieldMask& msk) { print(" mask %x", msk.mask()); auto name = msk.name(); if (!name.empty()) print(" - %s", name); print("\n"); auto c = msk.first(); while (c.getkey() < msk.lastkey()) { dumpbfvalue(msk.getvalue(c)); c.next(); } } void dumpbitfield(ID0File & id0, uint64_t bfnode) { Bitfield e(id0, bfnode); print("bitfield %s, 0x%x, 0x%x, 0x%x\n", e.name(), e.count(), e.representation(), e.flags()); auto c = e.first(); while (c.getkey() < e.lastkey()) { dumpbfmask(e.getmask(c)); c.next(); } } /* * dump enums */ void dumpenummember(const EnumMember& e) { print(" %08x %s\n", e.value(), e.name()); } void dumpenum(ID0File& id0, const Enum& e) { if (e.flags()&1) { dumpbitfield(id0, e.nodeid()); return; } print("enum %s, 0x%x, 0x%x, 0x%x\n", e.name(), e.count(), e.representation(), e.flags()); auto c = e.first(); while (c.getkey() < e.lastkey()) { dumpenummember(e.getvalue(c)); c.next(); } } /* * print list of structs / enums */ void printidbstructs(ID0File& id0) { auto list = List(id0, id0.node("$ structs")); while (!list.eof()) try { dumpstruct(list.next()); } catch(const char*msg) { print("struct entry with error found\n"); } } void printidbenums(ID0File& id0) { auto list = List(id0, id0.node("$ enums")); while (!list.eof()) dumpenum(id0, list.next()); } void printcomments(ID0File& id0) { // todo // tool which lists all comments found in the database /* -- nalt.hpp: NSUP_CMT = 0 .{0X00001ddd 53 supvals 0} => "normal comment" 00 -- nalt.hpp: NSUP_REPCMT = 1 .{0X00001dd7 53 supvals 1} => "repeatable comment" 00 -- lines.hpp: E_PREV = 1000 .{0X00001ddd 53 supvals 1000} => "anterior comment" 00 .{0X00001ddd 53 supvals 1001} => "more" 00 .{0X00001ddd 53 supvals 1002} => "and another anterior" 00 -- lines.hpp: E_NEXT = 2000 .{0X00001ddd 53 supvals 2000} => "posterior comment" 00 .{0X00001ddd 53 supvals 2001} => "more" 00 .{0X00001ddd 53 supvals 2002} => "and another posterior" 00 */ } /* .{0X00001dd9 name} => "globallabel" 00 N:"globallabel" => 0x00001dd9 .. func prop -- nalt.hpp: NSUP_LLABEL .{0X00001bb0 53 supvals 0x5000} => {82 27 0a:"locallabel"} */ void printnames(ID0File& id0, ID1File& id1, NAMFile& nam, bool listall) { nam.enumerate([&](uint64_t ea){ uint64_t f= id1.GetFlags(ea); std::string name= id0.getname(ea); if (listall || !(f&0x8000)) print("%08x: [%08x] %s\n", ea, f, name); // todo: filter out nullsub, jpt_XXX, thunks (j_...) }); } // print each address in the form: