From a8022aa8d573d52de9343b17b073feae833afa24 Mon Sep 17 00:00:00 2001 From: Clement Rouault Date: Thu, 7 Aug 2014 20:36:26 -0400 Subject: [PATCH] Working on data / fixing inconsistency in code Xref --- __init__.py | 11 +++-- data.py | 109 ++++++++++++++++++++++++++++++++++++++++++++- elt.py | 119 +++++++++++++++++++++++++++++++++++++++++++++++--- functions.py | 105 ++++++++++++++++++++++++++++++++++++-------- ida_import.py | 6 +-- idb.py | 4 +- xref.py | 14 +++++- 7 files changed, 331 insertions(+), 37 deletions(-) diff --git a/__init__.py b/__init__.py index 9825ec7..564aeed 100644 --- a/__init__.py +++ b/__init__.py @@ -8,8 +8,9 @@ import xref import ida_import import idb +import data -all_submodules_name = ['elt', 'functions', 'xref', 'ida_import', 'idb'] +all_submodules_name = ['elt', 'functions', 'xref', 'ida_import', 'idb', 'data'] def get_full_submodule_name(name): @@ -21,12 +22,12 @@ def get_full_submodule_name(name): #TODO : You know what for problem with kernel32.dll #TODO data.py -> Bytes / Word / Dword / String / cstr +# ?? : Undef Data et (Undef code : exist now) ? # helper : reload(MIDAP); MIDAP.reload(); g = MIDAP.functions.MFunctions(); f = MIDAP.fhere(); i = MIDAP.ihere() # TODO: structures ? -# Entry point: list(Entries()) #TODO : CLEAR ON RELOAD @@ -65,10 +66,14 @@ def bhere(): def fhere(): "Typed here(): return current Function" return functions.IDAFunction.get_func(idc.here()) + +def dhere(): + "Typed here(): return current Data" + return data.Data.new_data_by_type(idc.here()) fixup_late_import() -self = idb.IDB() +self = idb.current diff --git a/data.py b/data.py index 38ea0cc..0a6b53e 100644 --- a/data.py +++ b/data.py @@ -1,2 +1,107 @@ -class Data(object): - pass \ No newline at end of file +import elt + +import idc + +# why not: automatic subclassing :) +class Data(elt.IDANamedSizedElt): + + # size == 0 doesn't make sens: need automatic subclassing :D + size = 0 + match = (lambda *args : False) + # addr -> bool + # staticmethod + _get_value_func = None + + def value(self): + """ + [property] [get | set] + The value of the Data object. + DataByte.value = 0x42 will patch the byte at DataByte.addr + """ + return self._get_value_func(self.addr) + + def set_value(self, value, litte_endian=True): + # TODO: ask endianness to IDB + initial_value = value + bytes = [] + for i in range(self.size): + bytes.append(value & 255) + value = value >> 8 + if value != 0: + raise ValueError("value {0} is too big for {1}".format(hex(initial_value), self.__class__.__name__)) + if not litte_endian: + bytes.reverse() + self.patch(bytes, False) + + value = property(value, set_value, None) + + + def __init__(self, addr): + super(Data, self).__init__(addr, addr + self.size) + + @property + def is_byte(self): + return idc.isByte(self.flags) + + @property + def is_word(self): + return idc.isWord(self.flags) + + @property + def is_dword(self): + return idc.isDwrd(self.flags) + + @property + def is_qword(self): + return idc.isQwrd(self.flags) + + def __IDA_repr__(self): + if self._get_value_func is None: + return "" + return hex(self.value) + + # @property + # def next(self): + # return Data.new_data_by_type( + + @classmethod + def new_data_by_size(cls, addr, size): + for subcls in cls.__subclasses__(): + if subcls.size == size: + return subcls(addr) + raise ValueError("Don't know how to handle size = {0}".format(size)) + + @classmethod + #Name new_data ? (most general use) + def new_data_by_type(cls, addr): + data = Data(addr) + for subcls in cls.__subclasses__(): + if subcls.match(data): + return subcls(addr) + raise ValueError("Don't know how to handle addr = {0}".format(hex(addr))) + + + +class ByteData(Data): + size = 1 + _get_value_func = staticmethod(idc.Byte) + match = staticmethod(Data.is_byte.fget) + +class WordData(Data): + size = 2 + _get_value_func = staticmethod(idc.Word) + match = staticmethod(Data.is_word.fget) + +class DwordData(Data): + size = 4 + _get_value_func = staticmethod(idc.Dword) + match = staticmethod(Data.is_dword.fget) + + +class DwordData(Data): + size = 8 + _get_value_func = staticmethod(idc.Qword) + match = staticmethod(Data.is_qword.fget) + + + \ No newline at end of file diff --git a/elt.py b/elt.py index 34a5023..8af002c 100644 --- a/elt.py +++ b/elt.py @@ -1,7 +1,7 @@ import idc import idautils -late_import = ['xref'] +late_import = ['xref', 'data'] class IDAElt(object): @@ -13,6 +13,10 @@ def get_addr(self): addr = property(get_addr, None, None, 'ea of the object') + @classmethod + def get_all(cls): + return [cls(x) for x in idautils.Heads()] + #property ? def goto(self): idc.Jump(self.addr) @@ -21,21 +25,84 @@ def __int__(self): return self.addr def __repr__(self): - return "<{0}>".format(self.__IDA_repr__() + " ".format(hex(self.addr))) + + return "<{cls} {ida_repr} >".format( + cls=self.__class__.__name__, + ida_repr=self.__IDA_repr__(), + addr=hex(self.addr)) def __IDA_repr__(self): - return self.__class__.__name__ + return "" @property def xfrom(self): + """ List of all XrefsFrom the element """ return [xref.Xref(x) for x in idautils.XrefsFrom(self.addr, False)] @property def xto(self): + """ List of all XrefsTo the element """ return [xref.Xref(x) for x in idautils.XrefsTo(self.addr, False)] - #Create data xref ? - + @property + def flags(self): + return idc.GetFlags(self.addr) + + # do LineA and LineB ? for comments ? + + # Flags + + @property + def is_code(self): + return idc.isCode(self.flags) + + @property + def is_data(self): + return idc.isData(self.flags) + + @property + def is_unknow(self): + return idc.isUnknown(self.flags) + + @property + def is_head(self): + return idc.isHead(self.flags) + + @property + def is_tail(self): + return idc.isTail(self.flags) + + # useful ? here ? + @property + def is_var(self): + return idc.isVar(self.flags) + + @property + def has_extra_comment(self): + """ + Does this address has extra prev ou next line comments ? + - see LineA and LineB + """ + return idc.isExtra(self.flags) + + @property + def has_ref(self): + return idc.isRef(self.flags) + + @property + def has_value(self): + return idc.hasValue(self.flags) + + # comments: property ? for normal and repeteable ? + def set_comment(self, comment, repeteable=True): + if repeteable: + idc.MakeRptCmt(self.addr, comment) + else: + return idc.MakeComm(self.addr, comment) + + def get_comment(self, repeteable=True): + return idc.CommentEx(self.addr, repeteable) + class IDANamedElt(IDAElt): """ Real base class : looks like everything can have a name """ @@ -52,11 +119,25 @@ def set_name(self, name): def __IDA_repr__(self): if self.name is not "": - return self.__class__.__name__ + " " + self.name - return super(IDANamedElt, self).__IDA_repr__() + return self.name + return "{no name}" + + # Do not use the Has*Name from idc because these have no sens + @property + def has_user_name(self): + return bool(self.flags & idc.FF_NAME) + + @property + def has_dummy_name(self): + return bool(self.flags & idc.FF_LABL) + + @property + def has_name(self): + return bool(self.flags & idc.FF_ANYNAME) class IDASizedElt(IDAElt): + # always use NextHead to get endaddr ? def __init__(self, addr, endaddr, nb_elt=None): """ endaddr: first addr not part of the element """ super(IDASizedElt, self).__init__(addr, endaddr, nb_elt) @@ -69,6 +150,30 @@ def __init__(self, addr, endaddr, nb_elt=None): def __contains__(self, value): return self.addr <= value < self.endADDR + def patch(self, patch, fill_nop=True): + print("PATCH ASKED at <{0}| size {1}> with {2}".format(self.addr, self.size, patch)) + nop = 0x90 #<- need to adapt to other platform + if self.size < len(patch): + raise ValueError("Patch if too big for {0}".format(self)) + if self.size != len(patch) and not fill_nop: + raise Value("Patch is too small for {0} and no fill_patch (better idea than raise ?)".format(self)) + + full_patch = list(patch) + [nop] * (self.size - len(patch)) + for addr, byte in zip(range(self.addr, self.addr + self.size), full_patch): + if idc.Byte(addr) == byte: + print("NOPATCH BYTE : SAME VALUE") + continue + if not idc.PatchByte(addr, byte): + print("PATCH addr {0} with byte {1} failed".format(hex(addr), hex(byte))) + + def replace(self, value): + return self.patch([value] * self.size) + + @property + def bytes(self): + return [data.ByteData(addr) for addr in range(self.addr, self.addr + self.size)] + + class IDANamedSizedElt(IDASizedElt, IDANamedElt): pass \ No newline at end of file diff --git a/functions.py b/functions.py index 47626f1..13ea729 100644 --- a/functions.py +++ b/functions.py @@ -16,11 +16,20 @@ # TODO : real name for constructor get_func and get_block -# TODO : real name for MFunctions -class IDAFunction(elt.IDANamedSizedElt): + + +class IDACodeElt(elt.IDANamedSizedElt): + def nop(self): + self.patch("", True) + + # undefine ? + # reanalyse ? + + +class IDAFunction(IDACodeElt): # Constructors @classmethod @@ -59,9 +68,12 @@ def get_comment(self, repeteable=True): def set_comment(self, comment, repeteable=True): return idc.SetFunctionCmt(self.addr, comment, repeteable) + + # Noping a complete fonction is not good for analyser + # Maybe leave the last instr ? -class IDABlock(elt.IDANamedSizedElt): +class IDABlock(IDACodeElt): #Constructors @classmethod @@ -106,13 +118,52 @@ def func(self): except ValueError: return None -# TODO: use DecodeInstruction and insn_t ? -class IDAInstr(elt.IDASizedElt): +# TODO: use DecodeInstruction and insn_t ? <- yep, later for instr in instr + + + +class IDAUndefInstr(elt.IDANamedElt): + def __init__(self, addr): + super(IDAUndefInstr, self).__init__(addr) + self.size = 0 + self.mnemo = "" + self.operands = [] + + property_ret_none = property(lambda self: None) + # auto lookup for import ? IDAImportInstr ? + func = property_ret_none + block = property_ret_none + next = property_ret_none + prev = property_ret_none + jump = property_ret_none + switch = property(lambda self: []) + data = property_ret_none + is_flow = property(lambda self: False) + + + + +class IDAImportInstr(IDAUndefInstr): + def __init__(self, addr, imp): + super(IDAImportInstr, self).__init__(addr) + self.imp = imp + + @property + def func(self): + return self.imp + + +# TODO : UndefInstr for call to undef place and call to exterieur (IAT) +class IDAInstr(IDACodeElt): def __init__(self, addr, block=None): end_addr = idc.NextHead(addr) super(IDAInstr, self).__init__(addr, end_addr) - self.mnemo = idc.GetMnem(addr) + + #Get Operend may disass unknow Bytes so put it before GetMnem (do we need to accept this behaviour ?) self.operands = [idc.GetOpnd(addr , i) for i in range(idaapi.UA_MAXOP) if idc.GetOpnd(addr , i) is not ""] + self.mnemo = idc.GetMnem(addr) + if self.mnemo == "": + raise ValueError("address <{0}> is not an instruction".format(hex(self.addr))) self.completeinstr = "{0} {1}".format(self.mnemo, ",".join(self.operands)) self._block = block @@ -148,14 +199,31 @@ def next(self): return None return normal_next[0] + @property + def prev(self): + if not self.has_flow_prev: + return None + return IDAInstr(idc.PrevHead(self.addr)) + + + def _get_instr_jumps(self): + return [x for x in self._gen_code_xfrom(True) if x.is_code and not x.is_nflow] + @property def jump(self): - jump_next = [x for x in self._gen_code_xfrom(True) if x.is_code and not x.is_nflow] - if len(jump_next) > 1: - raise ValueError("Instruction {0} has more that one jump flow xrefFrom".format(self)) - if not jump_next: + jump_next = self._get_instr_jumps() + if len(jump_next) != 1: + # This is not a simple call / jmp + # THIS IS A SWITCH (see switch property) return None return jump_next[0] + + @property + def switch(self): + jump_next = self._get_instr_jumps() + if len(jump_next) <= 1: + return None + return jump_next #Todo : rename @@ -168,17 +236,16 @@ def data(self): return None return datas[0] - def set_comment(self, comment, repeteable=True): - if repeteable: - idc.MakeRptCmt(self.addr, comment) - else: - return idc.MakeComm(self.addr, comment) - def get_comment(self, repeteable=True): - return idc.CommentEx(self.addr, repeteable) + @property + def is_flow(self): + return idc.isFlow(self.flags) + + has_flow_prev = is_flow def __IDA_repr__(self): - return self.completeinstr + return "{" + self.completeinstr + "}" - \ No newline at end of file + + \ No newline at end of file diff --git a/ida_import.py b/ida_import.py index ebbf882..024148d 100644 --- a/ida_import.py +++ b/ida_import.py @@ -47,7 +47,7 @@ def __init__(self): self.exports_by_name[name] = x #Entry point : doesnt works for ELF.. # TODO: find real entry point method - if b == c: + if addr == ordinal: self.entry_point = x # No more default dict ! self.exports_by_addr.default_factory = None @@ -91,8 +91,8 @@ def __IDA_repr__(self): descr = "ord={0}".format(self.ord) if self.name is not "": descr = "name={0}".format(self.name) - module = ", module={0}".format(self.module_name) if self.module_name else "" - return "{0}({1}{2})".format(self.__class__.__name__,descr, self.module_name) + module = ", module={0}".format(self.module) if self.module else "" + return "({1}{2})".format(self.__class__.__name__,descr, module) class IDAImportList(object): # Really need import by name ? diff --git a/idb.py b/idb.py index 7e7604b..ae2a44e 100644 --- a/idb.py +++ b/idb.py @@ -25,10 +25,9 @@ def __init__(self): self.format = "ELF" else: raise ValueError("Unknow format <{0}>".format(filetype)) - self.init = True - self.imports = ida_import.IDAImportList() self.exports = ida_import.IDAExportList() + self.init = True @@ -47,3 +46,4 @@ def Functions(self): return functions.IDAFunction.get_all() +current = IDB() \ No newline at end of file diff --git a/xref.py b/xref.py index 2de28b5..00e4bd3 100644 --- a/xref.py +++ b/xref.py @@ -1,6 +1,6 @@ import idc -late_import = ['elt', 'functions'] +late_import = ['elt', 'functions', 'data', 'idb'] @@ -52,6 +52,14 @@ def frm(self): @property def to(self): + # This is a code xref dans to is not code: import + # If someone found another case: tell me please ! + dst = elt.IDAElt(self.xref.to) + if not dst.is_code: + if dst in idb.current.imports: + return functions.IDAImportInstr(self.xref.to, idb.current.imports[dst]) + else: + return functions.IDAUndefInstr(self.xref.to) return functions.IDAInstr(self.xref.to) @property @@ -73,6 +81,10 @@ def __init__(self, xref): super(DataXref, self).__init__(xref) if (self.xref.type & 0x1f) in (idc.dr_I,): print("DEBUG strange xref with dr_I") + + @property + def to(self): + return data.Data.new_data_by_type(self.xref.to) @property def is_offset(self):