Skip to content

Commit

Permalink
ia32: fix some sse instrs, minor decoder optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
jjyg committed Nov 21, 2020
1 parent cf6e3cf commit 5c3969c
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 33 deletions.
31 changes: 22 additions & 9 deletions metasm/cpu/ia32/decode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,14 @@ def decode_findopcode(edata)
# fetch the relevant bytes from edata
bseq = edata.data[edata.ptr, op.bin.length].unpack('C*')

pfx_ = pfx
if fld = op.fields[:vex_w] and ((bseq[fld[0]] >> fld[1]) & @fields_mask[:vex_w]) == 1
# rex_w is used by opsz but only decoded in decode_instr_op
# add it here for checks with op.props[:opsz] (eg vmovd VS vmovq)
pfx_ = pfx.dup
pfx_[:rex_w] = true
end

# check against full opcode mask
op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } and
# check special cases
Expand All @@ -155,8 +163,8 @@ def decode_findopcode(edata)
(op.props[:modrmA] and fld = op.fields[:modrm] and (bseq[fld[0]] >> fld[1]) & 0xC0 == 0xC0) or
(op.props[:modrmR] and fld = op.fields[:modrm] and (bseq[fld[0]] >> fld[1]) & 0xC0 != 0xC0) or
(fld = op.fields[:vex_vvvv] and @size != 64 and (bseq[fld[0]] >> fld[1]) & @fields_mask[:vex_vvvv] < 8) or
(sz = op.props[:opsz] and opsz(di, op) != sz) or
(sz = op.props[:adsz] and adsz(di, op) != sz) or
(op.props[:opsz] and opsz(di, op, pfx_) != op.props[:opsz]) or
(op.props[:adsz] and adsz(di, op) != op.props[:adsz]) or
(ndpfx = op.props[:needpfx] and not pfx[:list].to_a.include? ndpfx) or
(pfx[:adsz] and op.props[:adsz] and op.props[:adsz] == @size) or
# return non-ambiguous opcode (eg push.i16 in 32bit mode) / sync with addop_post in opcode.rb
Expand Down Expand Up @@ -198,13 +206,22 @@ def decode_instr_op(edata, di)
}

opsz = opsz(di)
opsz = op.props[:argsz] if op.props[:argsz] and op.props[:needpfx] != 0x66
opsz = op.props[:argsz] if op.props[:argsz]
adsz = (pfx[:adsz] ? 48 - @size : @size)

mmxsz = ((op.props[:xmmx] && pfx[:opsz]) ? 128 : 64)
op.args.each { |a|
di.instruction.args << case a
when :reg; Reg.new field_val[a], opsz
when :modrm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, opsz, pfx.delete(:seg)
when :i8, :u8, :u16; Expression[edata.decode_imm(a, @endianness)]
when :i;
if op.props[:unsigned_imm]
type = { 8 => :a8, 16 => :a16, 32 => :a32, 64 => :a64 }[opsz]
else
type = { 8 => :i8, 16 => :i16, 32 => :i32, 64 => :i64 }[opsz]
end
Expression[edata.decode_imm(type, @endianness)]
when :eeec; CtrlReg.new field_val[a]
when :eeed; DbgReg.new field_val[a]
when :eeet; TstReg.new field_val[a]
Expand All @@ -215,11 +232,7 @@ def decode_instr_op(edata, di)
when :regymm; SimdReg.new field_val[a], 256

when :farptr; Farptr.decode edata, @endianness, opsz
when :i8, :u8, :u16; Expression[edata.decode_imm(a, @endianness)]
when :i; Expression[edata.decode_imm("#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym, @endianness)]

when :mrm_imm; ModRM.decode edata, (adsz == 16 ? 6 : 5), @endianness, adsz, opsz, pfx.delete(:seg)
when :modrm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, opsz, pfx.delete(:seg)
when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz]
when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex]
when :modrmymm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 256, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex]
Expand Down Expand Up @@ -324,8 +337,8 @@ def decode_cc_to_expr(cc)
end
end

def opsz(di, op=nil)
if di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 48-@size
def opsz(di, op=nil, pfx=di.instruction.prefix)
if di and pfx and pfx[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 48-@size
else @size
end
end
Expand Down
24 changes: 13 additions & 11 deletions metasm/cpu/ia32/opcodes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -556,9 +556,9 @@ def init_sse2_only
addop('movdqu', [0x0F, 0x6F], :mrmxmm, {:d => [1, 4]}) { |o| o.props[:needpfx] = 0xF3 }
addop('movq2dq', [0x0F, 0xD6], :mrmxmm, :modrmR) { |o| o.args[o.args.index(:modrmxmm)] = :modrmmmx ; o.props[:needpfx] = 0xF3 }
addop('movdq2q', [0x0F, 0xD6], :mrmmmx, :modrmR) { |o| o.args[o.args.index(:modrmmmx)] = :modrmxmm ; o.props[:needpfx] = 0xF2 }
addop('movd', [0x0F, 0x6E], :mrmxmm, {:d => [1, 4]}) { |o| o.args = [:regxmm, :modrm] ; o.props[:needpfx] = 0x66 ; o.props[:argsz] = 128 }
addop('movq', [0x0F, 0x7E], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 ; o.props[:argsz] = 128 }
addop('movq', [0x0F, 0xD6], :mrmxmm) { |o| o.args.reverse! ; o.props[:needpfx] = 0x66 ; o.props[:argsz] = 128 }
addop('movd', [0x0F, 0x6E], :mrmxmm, {:d => [1, 4]}) { |o| o.args = [:regxmm, :modrm] ; o.props[:needpfx] = 0x66 ; o.props[:opsz] = o.props[:argsz] = 32 }
addop('movq', [0x0F, 0x7E], :mrmxmm) { |o| o.props[:needpfx] = 0xF3 ; o.props[:argsz] = 64 }
addop('movq', [0x0F, 0xD6], :mrmxmm) { |o| o.args.reverse! ; o.props[:needpfx] = 0x66 ; o.props[:argsz] = 64 }

addop 'paddq', [0x0F, 0xD4], :mrmmmx, :xmmx
addop 'pmuludq', [0x0F, 0xF4], :mrmmmx, :xmmx
Expand Down Expand Up @@ -675,9 +675,9 @@ def init_sse41_only
addop('pblendvb', [0x0F, 0x38, 0x10], :mrmxmm) { |o| o.props[:needpfx] = 0x66 }
addop('pblendw', [0x0F, 0x3A, 0x1E], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66 }
addop('pcmpeqq', [0x0F, 0x38, 0x29], :mrmxmm) { |o| o.props[:needpfx] = 0x66 }
addop('pextrb', [0x0F, 0x3A, 0x14], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args.index(:modrmxmm); o.args.unshift(:modrm); o.props[:argsz] = 8 }
addop('pextrw', [0x0F, 0x3A, 0x15], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args.index(:modrmxmm); o.args.unshift(:modrm); o.props[:argsz] = 16 }
addop('pextrd', [0x0F, 0x3A, 0x16], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args.index(:modrmxmm); o.args.unshift(:modrm); o.props[:argsz] = 32 }
addop('pextrb', [0x0F, 0x3A, 0x14], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args.delete(:modrmxmm); o.args.unshift(:modrm); o.props[:argsz] = 8 }
addop('pextrw', [0x0F, 0x3A, 0x15], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args.delete(:modrmxmm); o.args.unshift(:modrm); o.props[:argsz] = 16 }
addop('pextrd', [0x0F, 0x3A, 0x16], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args.delete(:modrmxmm); o.args.unshift(:modrm); o.props[:argsz] = 32 }
addop('pinsrb', [0x0F, 0x3A, 0x20], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 8 }
addop('pinsrw', [0x0F, 0x3A, 0x21], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 16 }
addop('pinsrd', [0x0F, 0x3A, 0x22], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:argsz] = 32 }
Expand Down Expand Up @@ -752,8 +752,7 @@ def init_avx_only
pclmulqdq punpcklbw punpcklwd punpckldq punpckhbw punpckhwd
punpckhdq punpcklqdq punpckhqdq].each { |n| add128[n] = true }

%w[movups movupd movddup movsldup
unpcklps unpcklpd unpckhps unpckhpd
%w[unpcklps unpcklpd unpckhps unpckhpd
movaps movshdup movapd movntps movntpd movmskps movmskpd
sqrtps sqrtpd rsqrtps rcpps andps andpd andnps andnpd
orps orpd xorps xorpd addps addpd mulps mulpd
Expand All @@ -771,14 +770,17 @@ def init_avx_only
pmovzxbw pmovzxbd pmovzxbq pmovzxwd pmovzxwq pmovzxdq
aesimc aeskeygenassist lddqu maskmovdqu movapd movaps
pcmpestri pcmpestrm pcmpistri pcmpistrm phminposuw
cvtpd2dq cvttpd2dq cvtdq2pd cvtps2pd cvtpd2ps cvtdq2ps cvtps2dq
cvttps2dq movd movq movddup movdqa movdqu movmskps movmskpd
cvtpd2dq cvttpd2dq cvtdq2pd cvtps2pd cvtpd2ps cvtdq2ps
cvtps2dq cvttps2dq movddup movdqa movdqu movmskps movmskpd
movntdq movntps movntpd movshdup movsldup movups movupd
pextrb pextrw pextrd pextrq ptest rcpps roundps roundpd
extractps sqrtps sqrtpd comiss comisd ucomiss ucomisd
cvttss2si cvttsd2si cvtss2si cvtsd2si
cvttss2si cvttsd2si cvtss2si cvtsd2si movd movq
].each { |n| add128[n] = true ; varg[n] = nil }

%w[movups movupd movddup movsldup
].each { |n| add128[n] = add256[n] = true ; varg[n] = nil }

cvtarg128 = { :regmmx => :regxmm, :modrmmmx => :modrmxmm }
cvtarg256 = { :regmmx => :regymm, :modrmmmx => :modrmymm,
:regxmm => :regymm, :modrmxmm => :modrmymm }
Expand Down
30 changes: 18 additions & 12 deletions metasm/cpu/x86_64/decode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,27 @@ def decode_instr_op(edata, di)
end

opsz = opsz(di)
opsz = op.props[:argsz] if op.props[:argsz] and op.props[:needpfx] != 0x66
opsz = op.props[:argsz] if op.props[:argsz]
adsz = pfx[:adsz] ? 32 : 64
mmxsz = (op.props[:xmmx] && pfx[:opsz]) ? 128 : 64

op.args.each { |a|
di.instruction.args << case a
when :reg; Reg.new field_val_r[a], opsz
when :modrm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, opsz, pfx.delete(:seg), Reg, pfx
when :i8, :u8, :i16, :u16, :i32, :u32, :i64, :u64; Expression[edata.decode_imm(a, @endianness)]
when :i # 64bit constants are sign-extended from :i32
if opsz == 64
type = op.props[:imm64] ? :a64 : :i32
elsif op.props[:unsigned_imm]
type = { 8 => :a8, 16 => :a16, 32 => :a32, 64 => :a64 }[opsz]
else
type = { 8 => :i8, 16 => :i16, 32 => :i32, 64 => :i64 }[opsz]
end
v = edata.decode_imm(type, @endianness)
v &= 0xffff_ffff_ffff_ffff if opsz == 64 and op.props[:unsigned_imm] and v.kind_of?(::Integer)
Expression[v]

when :eeec; CtrlReg.new field_val_r[a]
when :eeed; DbgReg.new field_val_r[a]
when :eeet; TstReg.new field_val_r[a]
Expand All @@ -145,15 +159,7 @@ def decode_instr_op(edata, di)
when :regymm; SimdReg.new field_val_r[a], 256

when :farptr; Farptr.decode edata, @endianness, opsz
when :i8, :u8, :i16, :u16, :i32, :u32, :i64, :u64; Expression[edata.decode_imm(a, @endianness)]
when :i # 64bit constants are sign-extended from :i32
type = (opsz == 64 ? op.props[:imm64] ? :a64 : :i32 : "#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym )
v = edata.decode_imm(type, @endianness)
v &= 0xffff_ffff_ffff_ffff if opsz == 64 and op.props[:unsigned_imm] and v.kind_of? Integer
Expression[v]

when :mrm_imm; ModRM.new(adsz, opsz, nil, nil, nil, Expression[edata.decode_imm("a#{adsz}".to_sym, @endianness)], pfx.delete(:seg))
when :modrm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, opsz, pfx.delete(:seg), Reg, pfx
when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx.delete(:seg), SimdReg, pfx.merge(:argsz => op.props[:argsz])
when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx.delete(:seg), SimdReg, pfx.merge(:argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex])
when :modrmymm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 256, pfx.delete(:seg), SimdReg, pfx.merge(:argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex])
Expand Down Expand Up @@ -232,9 +238,9 @@ def decode_instr_interpret(di, addr)
di
end

def opsz(di, op=nil)
if di and di.instruction.prefix and di.instruction.prefix[:rex_w]; 64
elsif di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 16
def opsz(di, op=nil, pfx=di.instruction.prefix)
if di and pfx and pfx[:rex_w]; 64
elsif di and pfx and pfx[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 16
elsif di and (op || di.opcode).props[:auto64]; 64
else 32
end
Expand Down
2 changes: 1 addition & 1 deletion metasm/cpu/x86_64/opcodes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def init_sse3

def init_sse41_only
super()
addop('pextrq', [0x0F, 0x3A, 0x16], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:opsz] = o.props[:argsz] = 64 }
addop('pextrq', [0x0F, 0x3A, 0x16], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args.delete(:modrmxmm); o.args.unshift(:modrm); o.props[:opsz] = o.props[:argsz] = 64 }
addop('pinsrq', [0x0F, 0x3A, 0x22], :mrmxmm, :u8) { |o| o.props[:needpfx] = 0x66; o.args[o.args.index(:modrmxmm)] = :modrm; o.props[:opsz] = o.props[:argsz] = 64 }
end

Expand Down

0 comments on commit 5c3969c

Please sign in to comment.