-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathsymbolize_pdb.py
149 lines (121 loc) · 4.94 KB
/
symbolize_pdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import sys
import re
import subprocess
from trie import Trie
def multireplace(string, replacements, ignore_case=False):
"""
Given a string and a replacement map, it returns the replaced string.
:param str string: string to execute replacements on
:param dict replacements: replacement dictionary {value to find: value to replace}
:param bool ignore_case: whether the match should be case insensitive
:rtype: str
"""
if not replacements:
# Edge case that'd produce a funny regex and cause a KeyError
return string
# If case insensitive, we need to normalize the old string so that later a replacement
# can be found. For instance with {"HEY": "lol"} we should match and find a replacement for "hey",
# "HEY", "hEy", etc.
if ignore_case:
def normalize_old(s):
return s.lower()
re_mode = re.IGNORECASE
else:
def normalize_old(s):
return s
re_mode = 0
trie = Trie()
for r in replacements:
trie.add(r)
pattern = re.compile(r"0?x?0*" + trie.pattern() + r"", re.IGNORECASE)
#replacements = {normalize_old(key): val for key, val in replacements.items()}
# Place longer ones first to keep shorter substrings from matching where the longer ones should take place
# For instance given the replacements {'ab': 'AB', 'abc': 'ABC'} against the string 'hey abc', it should produce
# 'hey ABC' and not 'hey ABc'
#rep_sorted = sorted(replacements, key=len, reverse=True)
#rep_escaped = map(re.escape, rep_sorted)
# Create a big OR regex that matches any of the substrings to replace
#pattern = re.compile("|".join(rep_escaped), re_mode)
#pattern = re.compile("|".join(["0?x?(" + a + ")" for a in rep_escaped]), re_mode)
#print(pattern)
# For each match, look up the new string in the replacements, being the key the normalized old string
return pattern.sub(lambda match: replacements[normalize_old("{:x}".format(int(match.group(0),16)))], string)
layout = sys.argv[1]
symbol_base = sys.argv[2]
with open(layout, 'r') as f:
lines = f.readlines()
sym_to_fullpath = dict()
bases = dict()
bases_to_names = dict()
loaded = set()
mod2pdb = {}
ranges = {}
for l in lines:
if ".pdb" in l:
module_name = ".".join(l.split()[-1].split("\\")[-1].split(".")[:-1]).lower()
pdbpath = symbol_base + "/" + "/".join(l.split()[-1].split("\\")[-3:])
mod2pdb[module_name] = pdbpath
elif ".exe" in l or ".dll" in l and "`" not in l:
name = ".".join(l.split()[4].split(".")[:-1]).lower()
start = int(l.split()[2], 16)
end = start + int(l.split()[3], 16)
if name in mod2pdb:
ranges[(start, end)] = [name, mod2pdb[name], []]
syms = dict()
sorted_bases = sorted(bases_to_names.keys())[::-1]
# for a in sorted_bases:
# print(hex(a))
def loadsyms(r):
# print("loading {}".format(s))
subprocess.run(['ls', '-l'], stdout=subprocess.PIPE)
result = subprocess.run(['llvm-pdbutil', 'dump', '-all', ranges[r][1]], check=False, stdout=subprocess.PIPE)
name = None
newsyms=[]
for l in result.stdout.decode('utf-8').splitlines():
if "S_LPROC32" in l or "S_PUB32" in l:
name = "".join(l.split("`")[1:-1])
else:
if "addr = " in l and name != None:
m = re.match(".*addr = ([0-9:]+).*", l)
offset = int(m.group(1).split(":")[1])
mult = int(m.group(1).split(":")[0])
if mult == 1:
offset += mult*0x1000
addr = offset
newsyms.append((addr, name))
name = None
stinput = ""
for sym in newsyms:
stinput += sym[1] + "\n"
p = subprocess.Popen(['demumble'], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
stdout_data = p.communicate(input=stinput.encode())[0]
i = 0
for line in stdout_data.splitlines():
ranges[r][2].append((newsyms[i][0], line.decode("utf-8")))
i+=1
ranges[r][2].sort(key=lambda x: x[0])
ranges[r][2].reverse()
# for v in ranges[r][2]:
# print(hex(v[0]), v[1])
reps = {}
with open(sys.argv[3],'r') as file:
contents = file.read()
nums = set(re.findall(r'0x[0-9A-Fa-f]+', contents, re.I))
nums = re.findall(r'[0-9A-Fa-f]+', contents, re.I)
for n in set(nums):
n = int(n, 16)
#print(hex(n))
for r in ranges:
if not (n >= r[0] and n < r[1]):
continue
if ranges[r][0] not in loaded:
loadsyms(r)
loaded.add(ranges[r][0])
for s in ranges[r][2]:
if n-r[0] >= s[0]:
# print(hex(n), hex(n-r[0]) , hex(s[0]), s[1])
rep = "({:x})<{}>{} +{}".format(n, ranges[r][0], s[1], hex(n-r[0]-s[0]))
reps["{:x}".format(n)] = rep
break
contents = multireplace(contents, reps, True)
print(contents)