Execution transfert event
Introduction
One limitation of QBDI is it shares the heap and some library with the instrumented code. With this design, the user may use any shared library and doesn’t need to statically link all their dependencies with their code. However, some method must not be instrumented in QBDI:
The heap allocator method (malloc, free, …).
Any no reentrant method shared between the target code and QBDI itself.
Any no reentrant method shared between the target code and user callbacks.
When the target code calls one of these methods, QBDI restores the native execution. The return address is changed in order to catch the return and to continue the instrumentation of the code. Two events allow the user to detect this mechanism:
EXEC_TRANSFER_CALL
: called before restoring native execution for the method.EXEC_TRANSFER_RETURN
: called after the execution of the method.
These two events can be used to retrieve the method and its parameters before the call and its return value after.
EXEC_TRANSFER_CALL
can also be used to emulate a method call.
Get native call symbols
When QBDI needs to restore the native execution, the user may retrieve the name of the calling method
based on the current address. The associated symbol can be found with dladdr
(on Linux and OSX) or SymFromAddr
(on Windows).
We recommend forcing the linker to resolve all symbols before running the VM. This can be achieved with:
LD_BIND_NOW=1
on LinuxDYLD_BIND_AT_LAUNCH=1
on OSX
With dladdr
dladdr
may not find the symbol associated with an address if it’s not an exported symbol.
If several symbols are associated, only one is returned.
static VMAction transfertcbk(VMInstanceRef vm, const VMState *vmState, GPRState *gprState, FPRState *fprState, void *data) {
Dl_info info = {0};
dladdr((void*)gprState->rip, &info);
if (info.dli_sname != NULL) {
printf("Call %s (addr: 0x%" PRIRWORD ")\n", info.dli_sname, gprState->rip);
} else {
printf("Call addr: 0x%" PRIRWORD "\n", gprState->rip);
}
return QBDI_CONTINUE;
}
qbdi_addVMEventCB(vm, QBDI_EXEC_TRANSFER_CALL, transfertcbk, NULL);
import ctypes
import ctypes.util
class Dl_info(ctypes.Structure):
_fields_ = [('dli_fname', ctypes.c_char_p),
('dli_fbase', ctypes.c_void_p),
('dli_sname', ctypes.c_char_p),
('dli_saddr', ctypes.c_void_p)]
libdl_path = ctypes.util.find_library('dl')
assert libdl_path != None
libdl = ctypes.cdll.LoadLibrary(libdl_path)
libdl.dladdr.argtypes = (ctypes.c_void_p, ctypes.POINTER(Dl_info))
def dladdr(addr):
res = Dl_info()
result = libdl.dladdr(ctypes.cast(addr, ctypes.c_void_p), ctypes.byref(res))
return res.dli_sname
def transfertcbk(vm, vmState, gpr, fpr, data):
print("Call {} (addr: 0x{:x})".format(
dladdr(gpr.rip),
gpr.rip))
return pyqbdi.CONTINUE
vm.addVMEventCB(pyqbdi.EXEC_TRANSFER_CALL, transfertcbk, None)
With lief
Lief is a C, C++ and python library
that aims to parse ELF, PE and MachO file formats. This library can
extract all the symbols associated with an address, including the non-exported one.
This solution can resolve more addresses, but could be slower than dladdr
.
For ELF binary, the following code prints for each EXEC_TRANSFER_CALL
event, the symbols associated with the target address. For PE library, the user may
need to parse the PDB file of the library to get the symbol associated with the
target address.
#include <LIEF/LIEF.hpp>
class Module {
public:
std::string path;
QBDI::Range<QBDI::rword> range;
Module(const QBDI::MemoryMap& m) : path(m.name), range(m.range) {}
void append(const QBDI::MemoryMap& m) {
if (m.range.start() < range.start()) {
range.setStart(m.range.start());
}
if (m.range.end() > range.end()) {
range.setEnd(m.range.end());
}
}
};
class AddrResolver {
private:
std::vector<Module> modules;
std::unordered_set<std::string> loaded_path;
std::unordered_map<QBDI::rword, std::unordered_set<std::string>> resolv_cache;
void cacheModules();
const Module* getModule(QBDI::rword addr, bool reload = true);
void loadModule(const Module& m);
public:
AddrResolver() {
cacheModules();
}
const std::unordered_set<std::string>& resolve(QBDI::rword addr);
};
void AddrResolver::cacheModules() {
modules.clear();
for (const auto& map : QBDI::getCurrentProcessMaps(true)) {
auto r = std::find_if(std::begin(modules), std::end(modules),
[&](const Module& m){return m.path == map.name;});
if (r != std::end(modules)) {
r->append(map);
} else if (map.name.find("/") != std::string::npos) {
modules.emplace_back(map);
}
}
}
const Module* AddrResolver::getModule(QBDI::rword addr, bool reload) {
const auto r = std::find_if(std::begin(modules), std::end(modules),
[&](const Module& m){return m.range.contains(addr);});
if (r != std::end(modules)) {
return &*r;
} else if (reload) {
cacheModules();
return getModule(addr, false);
} else {
return nullptr;
}
}
void AddrResolver::loadModule(const Module& m) {
std::cout << "Load Module " << m.path << std::endl;
if (loaded_path.find(m.path) != loaded_path.end()) {
return;
}
std::unique_ptr<LIEF::ELF::Binary> externlib = LIEF::ELF::Parser::parse(m.path);
if (not externlib) {
return;
}
for (const auto& s: externlib->symbols()) {
QBDI::rword addr = s.value() + m.range.start();
resolv_cache[addr].emplace(s.demangled_name());
}
loaded_path.emplace(m.path);
}
const std::unordered_set<std::string>& AddrResolver::resolve(QBDI::rword addr) {
const auto & symnames = resolv_cache[addr];
if (!symnames.empty()) {
return symnames;
}
std::cout << std::setbase(16) << "Fail to found 0x" << addr << std::endl;
const Module* m = getModule(addr);
if (m != nullptr) {
loadModule(*m);
}
return symnames;
}
QBDI::VMAction transfertCBK(QBDI::VMInstanceRef vm, const QBDI::VMState* vmState, QBDI::GPRState* gprState, QBDI::FPRState* fprState, void* data) {
const std::unordered_set<std::string>& r = static_cast<AddrResolver*>(data)->resolve(gprState->rip);
if (r.empty()) {
std::cout << std::setbase(16) << "Call addr: 0x" << gprState->rip << std::endl;
} else {
std::cout << "Call ";
for (const auto& s: r) {
std::cout << s << " ";
}
std::cout << std::setbase(16) << "(addr: 0x" << gprState->rip << ")" << std::endl;
}
return QBDI::CONTINUE;
}
AddrResolver data;
vm->addVMEventCB(QBDI::EXEC_TRANSFER_CALL, transfertCBK, &data);
import lief
import pyqbdi
class Module:
def __init__(self, module):
self.name = module.name
self.range = pyqbdi.Range(module.range.start, module.range.end)
def append(self, module):
assert module.name == self.name
if module.range.start < self.range.start:
self.range.start = module.range.start
if self.range.end < module.range.end:
self.range.end = module.range.end
class AddrResolver:
def __init__(self):
self.lib_cache = []
self.resolv_cache = {}
self.map_cache = self.get_exec_maps()
def get_exec_maps(self):
maps = {}
for m in pyqbdi.getCurrentProcessMaps(True):
if m.name in maps:
maps[m.name].append(m)
elif '/' in m.name:
maps[m.name] = Module(m)
return maps
def get_addr_maps(self, addr):
for _, m in self.map_cache.items():
if addr in m.range:
return m
self.map_cache = self.get_exec_maps()
for _, m in self.map_cache.items():
if addr in m.range:
return m
return None
def load_lib(self, maps):
if maps.name in self.lib_cache:
return
# use lief.PE or lief.MACO if not ELF file
lib = lief.ELF.parse(maps.name)
if lib is None:
return
for s in lib.symbols:
addr = s.value + maps.range.start
if addr in self.resolv_cache:
if s.name not in self.resolv_cache[addr]:
self.resolv_cache[addr].append(s.name)
else:
self.resolv_cache[addr] = [s.name]
self.lib_cache.append(maps.name)
def get_names(self, addr):
if addr in self.resolv_cache:
return self.resolv_cache[addr]
maps = self.get_addr_maps(addr)
if maps == None:
return []
self.load_lib(maps)
if addr in self.resolv_cache:
return self.resolv_cache[addr]
self.resolv_cache[addr] = []
return []
def transfertcbk(vm, vmState, gpr, fpr, data):
f_names = data['resolver'].get_names(gpr.rip)
if f_names != []:
print("Call {} (addr: 0x{:x})".format(f_names, gpr.rip))
else:
print("Call addr: 0x{:x}".format(gpr.rip))
return pyqbdi.CONTINUE
ctx = {
"resolver": AddrResolver(),
}
vm.addVMEventCB(pyqbdi.EXEC_TRANSFER_CALL, transfertcbk, ctx)
Using this snippet with PyQBDIPreload prints the libc calls.
$ python -m pyqbdipreload test.py ls
Call ['__strrchr_avx2'] (addr: 0x7f2aed2a8330)
Call ['setlocale', '__GI_setlocale'] (addr: 0x7f2aed17a7f0)
Call ['bindtextdomain', '__bindtextdomain'] (addr: 0x7f2aed17e000)
Call ['textdomain', '__textdomain'] (addr: 0x7f2aed1815f0)
Call ['__cxa_atexit', '__GI___cxa_atexit'] (addr: 0x7f2aed1879b0)
Call ['getopt_long'] (addr: 0x7f2aed22d3f0)
Call ['getenv', '__GI_getenv'] (addr: 0x7f2aed186b20)
Call ['getenv', '__GI_getenv'] (addr: 0x7f2aed186b20)
Call ['getenv', '__GI_getenv'] (addr: 0x7f2aed186b20)
Call ['getenv', '__GI_getenv'] (addr: 0x7f2aed186b20)
Call ['getenv', '__GI_getenv'] (addr: 0x7f2aed186b20)
Call ['isatty', '__isatty'] (addr: 0x7f2aed239250)
Call ['ioctl', '__ioctl', '__GI_ioctl', '__GI___ioctl'] (addr: 0x7f2aed23d590)
Call ['getenv', '__GI_getenv'] (addr: 0x7f2aed186b20)
Call ['getenv', '__GI_getenv'] (addr: 0x7f2aed186b20)
Call ['__errno_location', '__GI___errno_location'] (addr: 0x7f2aed16fde0)
Call ['__libc_malloc', 'malloc', '__GI___libc_malloc', '__malloc'] (addr: 0x7f2aed1d3320)
Call ['__memcpy_avx_unaligned', '__memmove_avx_unaligned'] (addr: 0x7f2aed2ab4a0)
Call ['__errno_location', '__GI___errno_location'] (addr: 0x7f2aed16fde0)
Call ['__libc_malloc', 'malloc', '__GI___libc_malloc', '__malloc'] (addr: 0x7f2aed1d3320)
Call ['__memcpy_avx_unaligned', '__memmove_avx_unaligned'] (addr: 0x7f2aed2ab4a0)
Call ['getenv', '__GI_getenv'] (addr: 0x7f2aed186b20)
....