看开源代码如何解析ELF⽂件⼯具ROPgadget
在ROPgadget中有识别并分析多种⽂件结构,这次主要⽤这个功能来分析ELF⽂件格式。
分析的⽂件为libc.so
上代码
class Binary:
def __init__(lf, options):
lf.__fileName = options.binary
lf.__rawBinary = None
lf.__binary = None
try:
fd = open(lf.__fileName, "rb")
李达康是好人吗
lf.__rawBinary = fd.read()
fd.clo()
except:
print("[Error] Can't open the binary or binary not found")
return None
if options.rawArch and options.rawMode:
lf.__binary = Raw(lf.__rawBinary, options.rawArch, options.rawMode)
elif lf.__rawBinary[:4] == unhexlify(b"7f454c46"):
lf.__binary = ELF(lf.__rawBinary)
elif lf.__rawBinary[:2] == unhexlify(b"4d5a"):
lf.__binary = PE(lf.__rawBinary)
elif lf.__rawBinary[:4] == unhexlify(b"cafebabe"):
lf.__binary = UNIVERSAL(lf.__rawBinary)
elif lf.__rawBinary[:4] == unhexlify(b"cefaedfe") or lf.__rawBinary[:4] == unhexlify(b"cffaedfe"):
lf.__binary = MACHO(lf.__rawBinary)
el:
print("[Error] Binary format not supported")
return None
在binary.py中进⾏⽂件类型判定,ELF⽂件最开始四个字节‘7f454c46’
class ELFFlags:
ELFCLASS32 = 0x01
ELFCLASS64 = 0x02
EI_CLASS = 0x04
EI_DATA = 0x05
ELFDATA2LSB = 0x01有思
ELFDATA2MSB = 0x02
EM_386 = 0x03
EM_X86_64 = 0x3e
EM_ARM = 0x28
EM_MIPS = 0x08
EM_SPARCv8p = 0x12
EM_PowerPC = 0x14
EM_ARM64 = 0xb7
class ELF:
def __init__(lf, binary):
lf.__binary = bytearray(binary)
lf.__ElfHeader = None
lf.__shdr_l = []
lf.__phdr_l = []
lf.__tHeaderElf()
lf.__tShdr()
lf.__tPhdr()
ELF⽂件类初始化
def __tHeaderElf(lf):
e_ident = lf.__binary[:15]#ELF⽂件魔数
易经基础ei_class = e_ident[ELFFlags.EI_CLASS]
ei_data = e_ident[ELFFlags.EI_DATA]
if ei_class != ELFFlags.ELFCLASS32 and ei_class != ELFFlags.ELFCLASS64: print("[Error] ELF.__tHeaderElf() - Bad Arch size")
return None
if ei_data != ELFFlags.ELFDATA2LSB and ei_data != ELFFlags.ELFDATA2MSB: print("[Error] ELF.__tHeaderElf() - Bad architecture endian")
return None
if ei_class == ELFFlags.ELFCLASS32:
if ei_data == ELFFlags.ELFDATA2LSB:
lf.__ElfHeader = Elf32_Ehdr_LSB.from_buffer_copy(lf.__binary)
elif ei_data == ELFFlags.ELFDATA2MSB:
lf.__ElfHeader = Elf32_Ehdr_MSB.from_buffer_copy(lf.__binary)
elif ei_class == ELFFlags.ELFCLASS64:
if ei_data == ELFFlags.ELFDATA2LSB:
lf.__ElfHeader = Elf64_Ehdr_LSB.from_buffer_copy(lf.__binary)
elif ei_data == ELFFlags.ELFDATA2MSB:
lf.__ElfHeader = Elf64_Ehdr_MSB.from_buffer_copy(lf.__binary)
设置ELF⽂件头
怎样炒豆腐好吃ELF前16个字节称为魔数
其中前四字节之前已经说过了,第⼀个字符是ACSII字符中DEL控制符,后三个是ELF的ACSII码
第5个字节为Class位,0为⽆效⽂件,1为32位⽂件,2为64位⽂件
第6个字节指定字节序(Data)有以下取值
0 ⽆效格式
1 ⼩端格式
2 ⼤端格式
根据5和6字节信息选择相应的拷贝⽅式(如32位⼩端等)
def getArch(lf):
if lf.__ElfHeader.e_machine == ELFFlags.EM_386 or lf.__ElfHeader.e_machine == ELFFlags.EM_X86_64:
return CS_ARCH_X86
elif lf.__ElfHeader.e_machine == ELFFlags.EM_ARM:
花瓶的画法
return CS_ARCH_ARM
elif lf.__ElfHeader.e_machine == ELFFlags.EM_ARM64:
return CS_ARCH_ARM64
elif lf.__ElfHeader.e_machine == ELFFlags.EM_MIPS:
return CS_ARCH_MIPS
elif lf.__ElfHeader.e_machine == ELFFlags.EM_PowerPC:
return CS_ARCH_PPC
elif lf.__ElfHeader.e_machine == ELFFlags.EM_SPARCv8p:
return CS_ARCH_SPARC
el:
print("[Error] Arch() - Architecture not supported")
return None
e_machine是⼀个双字节(19,20字节)的表⽰CPU平台属性的成员
之后执⾏函数
def __tShdr(lf):<span > </span>#设置段头部
shdr_num = lf.__ElfHeader.e_shnum #段数量
ba = lf.__binary[lf.__ElfHeader.e_shoff:]#获取段表
shdr_l = []
e_ident = lf.__binary[:15]
实习生离职ei_data = e_ident[ELFFlags.EI_DATA]
for i in range(shdr_num):
ArchMode() == CS_MODE_32:
if ei_data == ELFFlags.ELFDATA2LSB: shdr = Elf32_Shdr_LSB.from_buffer_copy(ba)#32位⼩端格式拷贝
elif ei_data == ELFFlags.ELFDATA2MSB: shdr = Elf32_Shdr_MSB.from_buffer_copy(ba)
ArchMode() == CS_MODE_64:
if ei_data == ELFFlags.ELFDATA2LSB: shdr = Elf64_Shdr_LSB.from_buffer_copy(ba)
elif ei_data == ELFFlags.ELFDATA2MSB: shdr = Elf64_Shdr_MSB.from_buffer_copy(ba)
lf.__shdr_l.append(shdr)
ba = ba[lf.__ElfHeader.e_shentsize:]
# tup name from the strings table
if lf.__ElfHeader.e_shstrndx != 0:
可回收垃圾标志string_table = str(lf.__binary[(lf.__shdr_l[lf.__ElfHeader.e_shstrndx].sh_offt):])
for i in range(shdr_num):
lf.__shdr_l[i].str_name = string_table[lf.__shdr_l[i].sh_name:].split('\0')[0]
家庭游戏
该函数处理段头部
第⼀⾏lf.__ElfHeader.e_shnum表⽰段数量,e_shnum也是⼀个双字节成员(48,49字节),在本机的实际运⾏中可以看到libc.so的段数量为32(好TM多)
第⼆⾏lf.__ElfHeader.e_shoff代表段表在⽂件中的偏移,32位版本中为4字节(33,34,35,36字节),实际运⾏值为0x4b88f
之后同样按照32位⼩端格式从段表中拷贝添加到lf.__shdr_l
lf.__ElfHeader.e_shentsize指段表描述符⼤⼩,双字节(47,48字节),实际运⾏值40
lf.__ElfHeader.e_shstrndx指段表字符串表所在段在段表中的下标,双字节(51,52字节),实际运⾏值为31,也就是说段表中最后⼀个段是段表字符串表所在段(好拗⼝)根据这个值找到字符串所在段,然后依次分给各段
下⼀个函数
def __tPhdr(lf):
pdhr_num = lf.__ElfHeader.e_phnum
ba = lf.__binary[lf.__ElfHeader.e_phoff:]
phdr_l = []
e_ident = lf.__binary[:15]
ei_data = e_ident[ELFFlags.EI_DATA]
for i in range(pdhr_num):
ArchMode() == CS_MODE_32:
if ei_data == ELFFlags.ELFDATA2LSB: phdr = Elf32_Phdr_LSB.from_buffer_copy(ba) elif ei_data == ELFFlags.ELFDATA2MSB: phdr = Elf32_Phdr_MSB.from_buffer_copy(ba) ArchMode() == CS_MODE_64:
if ei_data == ELFFlags.ELFDATA2LSB: phdr = Elf64_Phdr_LSB.from_buffer_copy(ba) elif ei_data == ELFFlags.ELFDATA2MSB: phdr = Elf64_Phdr_MSB.from_buffer_copy(ba)
lf.__phdr_l.append(phdr)
ba = ba[lf.__ElfHeader.e_phentsize:]
lf.__ElfHeader.e_phnum是ELF执⾏视图中Segment的个数,双字节(45,46),实际结果9
lf.__ElfHeader.e_phoff是Segment的偏移,双字节(43,44)
之后和段表⼀样,放⼊lf__phdr_l