-
Notifications
You must be signed in to change notification settings - Fork 81
/
Copy pathppc_pdf2oplist.rb
192 lines (176 loc) · 5.93 KB
/
ppc_pdf2oplist.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# This file is part of Metasm, the Ruby assembly manipulation suite
# Copyright (C) 2006-2009 Yoann GUILLOT
#
# Licence is LGPL, see LICENCE in the top-level directory
#
# parses the PPC specification PDF to generate the opcode list
#
require 'pdfparse'
$field_mask = {}
$field_shift = {}
$opcodes = []
def make_instr(bins, bits, text)
# calc bitfields length from their offset
last = 32
bitlen = []
bits.reverse_each { |bit|
bitlen.unshift last-bit
last = bit
}
# the opcode binary value (w/o fields)
bin = 0
fields = []
# parse the data
bins.zip(bits, bitlen).each { |val, off, len|
off = 32-(off+len)
msk = (1 << len) - 1
case val
when '/', '//', '///' # reserved field, value unspecified
when /^\d+$/; bin |= val.to_i << off # constant field
when /^[A-Za-z]+$/
fld = val.downcase.to_sym
fld = "#{fld}_".to_sym while $field_mask[fld] and ($field_mask[fld] != msk or $field_shift[fld] != off)
fields << fld
$field_mask[fld] ||= msk
$field_shift[fld] ||= off
end
}
text.each { |txt|
# fnabs FRT,FRB (Rc=0)
curbin = bin
curfields = fields.dup
txt.sub!(' Rc=1)', ' (Rc=1)') if txt.include? 'fdiv.' # typo: fdiv. has no '('
if txt =~ /(.*\S)\s*\((\w+=.*)\)/
txt = $1
$2.split.each { |e|
raise e if e !~ /(\w+)=(\d+)/
name, val = $1.downcase, $2.to_i
raise "bad bit #{name} in #{txt}" if not fld = curfields.find { |fld_| fld_.to_s.delete('_') == name }
curfields.delete fld
curbin |= val << $field_shift[fld]
}
end
opname, args = txt.split(/\s+/, 2)
args = args.to_s.downcase.split(/\s*,\s*/).map { |arg| fld = curfields.find { |fld_| fld_.to_s.delete('_') == arg } ; curfields.delete fld ; fld }
if args.include? nil and curfields.length == 2 and (curfields - [:ra, :d]).empty?
args[args.index(nil)] = :ra_i16
curfields.clear
elsif args.include? nil and curfields.length == 2 and (curfields - [:ra, :ds]).empty?
args[args.index(nil)] = :ra_i16s
curfields.clear
elsif args.include? nil and curfields.length == 2 and (curfields - [:ra, :dq]).empty?
args[args.index(nil)] = :ra_i16q
curfields.clear
elsif args.include? nil and curfields.length == 1
args[args.index(nil)] = curfields.shift
end
raise "bad args #{args.inspect} (#{curfields.inspect}) in #{txt}" if args.include? nil
$opcodes << [opname, curbin, args]
n = (opname.inspect << ',').ljust(10) + '0x%08X' % curbin
n << ', ' if not args.empty?
puts "\taddop " + n + args.map { |e| e.inspect }.join(', ')
}
end
# handle instruction aliases
# NOT WORKING
# should be implemented in the parser/displayer instead of opcode list
# manual work needed for eg conditionnal jumps
def make_alias(newop, newargs, oldop, oldargs)
raise "unknown alias #{newop} => #{oldop}" if not op = $opcodes.reverse.find { |op_| op_[0] == oldop }
op2 = op.dup
op2[0] = newop
oldargs.each_with_index { |oa, i|
# XXX bcctr 4, 6 -> bcctr 4, 6, 0 => not the work
if oa =~ /^[0-9]+$/ or oa =~ /^0x[0-9a-f]+$/i
fld = op[2][i]
op2[1] |= Integer(oa) << $field_shift[fld]
end
}
puts "#\talias #{newop} #{newargs.join(', ')} -> #{oldop} #{oldargs.join(', ')}".downcase
end
require 'enumerator'
def epilog
puts "\n\t@field_shift = {"
puts $field_shift.sort_by { |k, v| k.to_s }.enum_slice(6).map { |slc|
"\t\t" + slc.map { |k, v| "#{k.inspect} => #{v}" }.join(', ')
}.join(",\n")
puts "\t}"
puts "\n\t@field_mask = {"
puts $field_mask.sort_by { |k, v| k.to_s }.enum_slice(6).map { |slc|
"\t\t" + slc.map { |k, v| "#{k.inspect} => #{v > 1000 ? '0x%X' % v : v}" }.join(', ')
}.join(",\n")
puts "\t}"
end
$foundop = false
def parse_page(lines)
# all instr defining pages include this
return unless lines.find { |l| l.str =~ /Special Registers Altered|Memory Barrier Instructions|Data Cache Instructions/ } # sync L/dcbt
ilist = [] # line buffer
extended = false
# concat lines with same y
lines = lines.sort_by { |l| [-l.y, l.x] }
lastline = nil
lines.delete_if { |l|
if lastline and lastline.y == l.y and ([lastline.fontx, lastline.fonty] == [l.fontx, l.fonty] or l.str =~ /^\s*$/)
lastline.str << ' ' << l.str
true
else
lastline = l
false
end
}
lines.each { |l|
# search for the bit indices list
if l.fonty < 7 and l.str =~ /^0 [\d ]+ 31\s*$/ and (ilist.last.str.split.length == l.str.split.length or ilist.last.str.split.length == l.str.split.length-1)
$foundop = true
bitindices = l.str.split.map { |i| i.to_i }
# previous line is the binary encoding
encoding = ilist.pop.str.split
bitindices.pop if encoding.length < bitindices.length
# previous line is the instruction text format
ilist.pop if ilist.last.str =~ /\[POWER2? mnemonics?: (.*)\]/
text = []
text.unshift l while l = ilist.pop and l = l.str and (l =~ /,|\)$/ or text.empty?)
ilist = []
make_instr(encoding, bitindices, text)
elsif l.str.include? 'Special Registers Altered'
if not $foundop
puts ilist.map { |l_| "(#{l_.y}) #{l_.str}" }
puts lines.map { |l_| "(#{l_.y}) #{l_.str}" } if ilist.empty?
raise 'nofoundop'
else
$foundop = false
end
elsif l.str =~ /Extended:\s+Equivalent to:/
extended = true
elsif extended
if l.str.include? ',' and l.str =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(.*)/ and $opcodes.find { |op| op[0] == $3 }
newop, newargs, exop, exargs = $1, $2, $3, $4
make_alias(newop, newargs.split(','), exop, exargs.split(','))
else extended = false
end
else ilist << l
end
}
end
# PowerPC Architecture v2.02:
# 1 - User Instruction Set
# 2 - Virtual Environment
# 3 - Operating Environment
Dir['PPC_Vers202_Book?_public.pdf'].sort.each { |book|
$stderr.puts book if $stderr.tty?
pdf = PDF.read book
pagecount = pdf.trailer['Root']['Pages']['Count'] || 0
curpage = 0
pdf.each_page { |p|
$stderr.print "#{curpage+=1}/#{pagecount} \r" if $stderr.tty?
p.clip_lines(50, 740)
list = p.lines.flatten
# split columns
sp1, sp2 = list.partition { |l| l.x < 288 }
parse_page(sp1)
parse_page(sp2)
}
$stderr.print " \r" if $stderr.tty?
}
epilog()