Skip to content

Commit

Permalink
cmd/compile,test: combine byte loads and stores on ppc64le
Browse files Browse the repository at this point in the history
CL 74410 added rules to combine consecutive byte loads and
stores when the byte order was little endian for ppc64le. This
is the corresponding change for bytes that are in big endian order.
These rules are all intended for a little endian target arch.

This adds new testcases in test/codegen/memcombine.go

Fixes golang#22496
Updates golang#24242

Benchmark improvement for encoding/binary:
name                      old time/op    new time/op    delta
ReadSlice1000Int32s-16      11.0µs ± 0%     9.0µs ± 0%  -17.47%  (p=0.029 n=4+4)
ReadStruct-16               2.47µs ± 1%    2.48µs ± 0%   +0.67%  (p=0.114 n=4+4)
ReadInts-16                  642ns ± 1%     630ns ± 1%   -2.02%  (p=0.029 n=4+4)
WriteInts-16                 654ns ± 0%     653ns ± 1%   -0.08%  (p=0.629 n=4+4)
WriteSlice1000Int32s-16     8.75µs ± 0%    8.20µs ± 0%   -6.19%  (p=0.029 n=4+4)
PutUint16-16                1.16ns ± 0%    0.93ns ± 0%  -19.83%  (p=0.029 n=4+4)
PutUint32-16                1.16ns ± 0%    0.93ns ± 0%  -19.83%  (p=0.029 n=4+4)
PutUint64-16                1.85ns ± 0%    0.93ns ± 0%  -49.73%  (p=0.029 n=4+4)
LittleEndianPutUint16-16    1.03ns ± 0%    0.93ns ± 0%   -9.71%  (p=0.029 n=4+4)
LittleEndianPutUint32-16    0.93ns ± 0%    0.93ns ± 0%     ~     (all equal)
LittleEndianPutUint64-16    0.93ns ± 0%    0.93ns ± 0%     ~     (all equal)
PutUvarint32-16             43.0ns ± 0%    43.1ns ± 0%   +0.12%  (p=0.429 n=4+4)
PutUvarint64-16              174ns ± 0%     175ns ± 0%   +0.29%  (p=0.429 n=4+4)

Updates made to functions in gcm.go to enable their matching. An existing
testcase prevents these functions from being replaced by those in encoding/binary
due to import dependencies.

Change-Id: Idb3bd1e6e7b12d86cd828fb29cb095848a3e485a
Reviewed-on: https://go-review.googlesource.com/98136
Run-TryBot: Lynn Boger <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Keith Randall <[email protected]>
  • Loading branch information
laboger committed May 8, 2018
1 parent f31a18d commit 28edaf4
Show file tree
Hide file tree
Showing 9 changed files with 11,707 additions and 810 deletions.
62 changes: 36 additions & 26 deletions src/cmd/compile/internal/ppc64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -619,35 +619,31 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = ppc64.REGTMP // discard result

case ssa.OpPPC64MOVDaddr:
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

var wantreg string
// Suspect comment, copied from ARM code
// MOVD $sym+off(base), R
// the assembler expands it as the following:
// - base is SP: add constant offset to SP
// when constant is large, tmp register (R11) may be used
// - base is SB: load external address from constant pool (use relocation)
switch v.Aux.(type) {
default:
v.Fatalf("aux is of unknown type %T", v.Aux)
case *obj.LSym:
wantreg = "SB"
gc.AddAux(&p.From, v)
case *gc.Node:
wantreg = "SP"
gc.AddAux(&p.From, v)
v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
case nil:
// No sym, just MOVD $off(SP), R
wantreg = "SP"
p.From.Offset = v.AuxInt
}
if reg := v.Args[0].RegName(); reg != wantreg {
v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
// If aux offset and aux int are both 0, and the same
// input and output regs are used, no instruction
// needs to be generated, since it would just be
// addi rx, rx, 0.
if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
p.From.Reg = v.Args[0].Reg()
p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
}

case *obj.LSym, *gc.Node:
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
gc.AddAux(&p.From, v)

}

case ssa.OpPPC64MOVDconst:
Expand Down Expand Up @@ -729,6 +725,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()

case ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
Expand Down
232 changes: 221 additions & 11 deletions src/cmd/compile/internal/ssa/gen/PPC64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,7 @@

(MOVHZreg y:(MOVHZreg _)) -> y // repeat
(MOVHZreg y:(MOVBZreg _)) -> y // wide of narrow
(MOVHZreg y:(MOVHBRload _ _)) -> y

(MOVHreg y:(MOVHreg _)) -> y // repeat
(MOVHreg y:(MOVBreg _)) -> y // wide of narrow
Expand All @@ -690,6 +691,8 @@
(MOVWZreg y:(MOVWZreg _)) -> y // repeat
(MOVWZreg y:(MOVHZreg _)) -> y // wide of narrow
(MOVWZreg y:(MOVBZreg _)) -> y // wide of narrow
(MOVWZreg y:(MOVHBRload _ _)) -> y
(MOVWZreg y:(MOVWBRload _ _)) -> y

(MOVWreg y:(MOVWreg _)) -> y // repeat
(MOVWreg y:(MOVHreg _)) -> y // wide of narrow
Expand Down Expand Up @@ -870,6 +873,8 @@
(MOVWstore [off] {sym} ptr (MOV(W|WZ)reg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (SRWconst (MOV(H|HZ)reg x) [c]) mem) && c <= 8 -> (MOVBstore [off] {sym} ptr (SRWconst <typ.UInt32> x [c]) mem)
(MOVBstore [off] {sym} ptr (SRWconst (MOV(W|WZ)reg x) [c]) mem) && c <= 24 -> (MOVBstore [off] {sym} ptr (SRWconst <typ.UInt32> x [c]) mem)
(MOVHBRstore {sym} ptr (MOV(H|HZ|W|WZ)reg x) mem) -> (MOVHBRstore {sym} ptr x mem)
(MOVWBRstore {sym} ptr (MOV(W|WZ)reg x) mem) -> (MOVWBRstore {sym} ptr x mem)

// Lose W-widening ops fed to compare-W
(CMPW x (MOVWreg y)) -> (CMPW x y)
Expand Down Expand Up @@ -902,13 +907,14 @@
(FSUBS (FMULS x y) z) -> (FMSUBS x y z)


// The following rules are intended to match statements as are found in encoding/binary
// functions UintXX (load) and PutUintXX (store), combining multi-byte loads and stores
// into wider loads and stores.
// Initial implementation handles only little endian loads and stores on little endian
// targets.
// TODO implement big endian loads and stores for little endian machines (using byte reverse
// loads and stores).
// The following statements are found in encoding/binary functions UintXX (load) and PutUintXX (store)
// and convert the statements in these functions from multiple single byte loads or stores to
// the single largest possible load or store.
// Some are marked big or little endian based on the order in which the bytes are loaded or stored,
// not on the ordering of the machine. These are intended for little endian machines.
// To implement for big endian machines, most rules would have to be duplicated but the
// resulting rule would be reversed, i. e., MOVHZload on little endian would be MOVHBRload on big endian
// and vice versa.
// b[0] | b[1]<<8 -> load 16-bit Little endian
(OR <t> x0:(MOVBZload [i0] {s} p mem)
o1:(SL(W|D)const x1:(MOVBZload [i1] {s} p mem) [8]))
Expand All @@ -920,9 +926,39 @@
&& clobber(x0) && clobber(x1) && clobber(o1)
-> @mergePoint(b,x0,x1) (MOVHZload <t> {s} [i0] p mem)

// b[0]<<8 | b[1] -> load 16-bit Big endian on Little endian arch.
// Use byte-reverse indexed load for 2 bytes.
(OR <t> x0:(MOVBZload [i1] {s} p mem)
o1:(SL(W|D)const x1:(MOVBZload [i0] {s} p mem) [8]))
&& !config.BigEndian
&& i1 == i0+1
&& x0.Uses ==1 && x1.Uses == 1
&& o1.Uses == 1
&& mergePoint(b, x0, x1) != nil
&& clobber(x0) && clobber(x1) && clobber(o1)
-> @mergePoint(b,x0,x1) (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)

// b[0]<<n+8 | b[1]<<n -> load 16-bit Big endian (where n%8== 0)
// Use byte-reverse indexed load for 2 bytes,
// then shift left to the correct position. Used to match subrules
// from longer rules.
(OR <t> s0:(SL(W|D)const x0:(MOVBZload [i1] {s} p mem) [n1])
s1:(SL(W|D)const x1:(MOVBZload [i0] {s} p mem) [n2]))
&& !config.BigEndian
&& i1 == i0+1
&& n1%8 == 0
&& n2 == n1+8
&& x0.Uses == 1 && x1.Uses == 1
&& s0.Uses == 1 && s1.Uses == 1
&& mergePoint(b, x0, x1) != nil
&& clobber(x0) && clobber(x1) && clobber(s0) && clobber(s1)
-> @mergePoint(b,x0,x1) (SLDconst <t> (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [n1])

// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit Little endian
// Use byte-reverse indexed load for 4 bytes.
(OR <t> s1:(SL(W|D)const x2:(MOVBZload [i3] {s} p mem) [24])
o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i2] {s} p mem) [16]) x0:(MOVHZload [i0] {s} p mem)))
o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i2] {s} p mem) [16])
x0:(MOVHZload [i0] {s} p mem)))
&& !config.BigEndian
&& i2 == i0+2
&& i3 == i0+3
Expand All @@ -935,9 +971,81 @@
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (MOVWZload <t> {s} [i0] p mem)

// b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit Big endian order on Little endian arch
// Use byte-reverse indexed load for 4 bytes with computed address.
// Could be used to match subrules of a longer rule.
(OR <t> s1:(SL(W|D)const x2:(MOVBZload [i0] {s} p mem) [24])
o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i1] {s} p mem) [16])
x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i2] {s} p) mem)))
&& !config.BigEndian
&& i1 == i0+1
&& i2 == i0+2
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& o0.Uses == 1
&& s0.Uses == 1 && s1.Uses == 1
&& mergePoint(b, x0, x1, x2) != nil
&& clobber(x0) && clobber(x1) && clobber(x2)
&& clobber(s0) && clobber(s1)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)

// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit Big endian order on Little endian arch
// Use byte-reverse indexed load for 4 bytes with computed address.
// Could be used to match subrules of a longer rule.
(OR <t> x0:(MOVBZload [i3] {s} p mem)
o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i2] {s} p mem) [8])
s1:(SL(W|D)const x2:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [16])))
&& !config.BigEndian
&& i2 == i0+2
&& i3 == i0+3
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& o0.Uses == 1
&& s0.Uses == 1 && s1.Uses == 1
&& mergePoint(b, x0, x1, x2) != nil
&& clobber(x0) && clobber(x1) && clobber(x2)
&& clobber(s0) && clobber(s1)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)

// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 -> load 32-bit Big endian order on Little endian arch
// Use byte-reverse indexed load to for 4 bytes with computed address.
// Used to match longer rules.
(OR <t> s2:(SLDconst x2:(MOVBZload [i3] {s} p mem) [32])
o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i2] {s} p mem) [40])
s0:(SLDconst x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [48])))
&& !config.BigEndian
&& i2 == i0+2
&& i3 == i0+3
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& o0.Uses == 1
&& s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1
&& mergePoint(b, x0, x1, x2) != nil
&& clobber(x0) && clobber(x1) && clobber(x2)
&& clobber(s0) && clobber(s1) && clobber(s2)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (SLDconst <t> (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])

// b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 32-bit Big endian order on Little endian arch
// Use byte-reverse indexed load for 4 bytes with constant address.
// Used to match longer rules.
(OR <t> s2:(SLDconst x2:(MOVBZload [i0] {s} p mem) [56])
o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i1] {s} p mem) [48])
s0:(SLDconst x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i2] {s} p) mem) [32])))
&& !config.BigEndian
&& i1 == i0+1
&& i2 == i0+2
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& o0.Uses == 1
&& s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1
&& mergePoint(b, x0, x1, x2) != nil
&& clobber(x0) && clobber(x1) && clobber(x2)
&& clobber(s0) && clobber(s1) && clobber(s2)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (SLDconst <t> (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])

// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4] <<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit Little endian
// Note: long rules with commutative ops will result in very large functions in rewritePPC64,
// so shorter rules which make use of previously defined rules are preferred.
// Rules with commutative ops and many operands will result in extremely large functions in rewritePPC64,
// so matching shorter previously defined subrules is important.
// Offset must be multiple of 4 for MOVD
(OR <t> s6:(SLDconst x7:(MOVBZload [i7] {s} p mem) [56])
o5:(OR <t> s5:(SLDconst x6:(MOVBZload [i6] {s} p mem) [48])
Expand All @@ -959,10 +1067,56 @@
&& clobber(o3) && clobber(o4) && clobber(o5)
-> @mergePoint(b,x0,x4,x5,x6,x7) (MOVDload <t> {s} [i0] p mem)

// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 load 64-bit Big endian ordered bytes on Little endian arch
// Use byte-reverse indexed load of 8 bytes.
// Rules with commutative ops and many operands can result in extremely large functions in rewritePPC64,
// so matching shorter previously defined subrules is important.
(OR <t> s0:(SLDconst x0:(MOVBZload [i0] {s} p mem) [56])
o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i1] {s} p mem) [48])
o1:(OR <t> s2:(SLDconst x2:(MOVBZload [i2] {s} p mem) [40])
o2:(OR <t> s3:(SLDconst x3:(MOVBZload [i3] {s} p mem) [32])
x4:(MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i4] p) mem)))))
&& !config.BigEndian
&& i1 == i0+1
&& i2 == i0+2
&& i3 == i0+3
&& i4 == i0+4
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
&& s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1
&& mergePoint(b, x0, x1, x2, x3, x4) != nil
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
&& clobber(o0) && clobber(o1) && clobber(o2)
&& clobber(s0) && clobber(s1) && clobber(s2) && clobber(s3)
-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)

// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit Big endian ordered bytes on Little endian arch
// Use byte-reverse indexed load of 8 bytes.
// Rules with commutative ops and many operands can result in extremely large functions in rewritePPC64,
// so matching shorter previously defined subrules is important.
(OR <t> x7:(MOVBZload [i7] {s} p mem)
o5:(OR <t> s6:(SLDconst x6:(MOVBZload [i6] {s} p mem) [8])
o4:(OR <t> s5:(SLDconst x5:(MOVBZload [i5] {s} p mem) [16])
o3:(OR <t> s4:(SLDconst x4:(MOVBZload [i4] {s} p mem) [24])
s0:(SL(W|D)const x3:(MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])))))
&& !config.BigEndian
&& i4 == i0+4
&& i5 == i0+5
&& i6 == i0+6
&& i7 == i0+7
&& x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
&& o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1
&& s0.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1
&& mergePoint(b, x3, x4, x5, x6, x7) != nil
&& clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
&& clobber(o3) && clobber(o4) && clobber(o5)
&& clobber(s0) && clobber(s4) && clobber(s5) && clobber(s6)
-> @mergePoint(b,x3,x4,x5,x6,x7) (MOVDBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)

// 2 byte store Little endian as in:
// b[0] = byte(v >> 16)
// b[1] = byte(v >> 24)
// Added mainly to use when matching longer rules below
// Added for use in matching longer rules.
(MOVBstore [i1] {s} p (SR(W|D)const w [24])
x0:(MOVBstore [i0] {s} p (SR(W|D)const w [16]) mem))
&& !config.BigEndian
Expand Down Expand Up @@ -993,6 +1147,38 @@
&& clobber(x0)
-> (MOVWstore [i0] {s} p w mem)

// 4 byte store Big endian as in:
// b[0] = byte(v >> 24)
// b[1] = byte(v >> 16)
// b[2] = byte(v >> 8)
// b[3] = byte(v)
// Use byte-reverse indexed 4 byte store.
(MOVBstore [i3] {s} p w
x0:(MOVBstore [i2] {s} p (SRWconst w [8])
x1:(MOVBstore [i1] {s} p (SRWconst w [16])
x2:(MOVBstore [i0] {s} p (SRWconst w [24]) mem))))
&& !config.BigEndian
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& i1 == i0+1 && i2 == i0+2 && i3 == i0+3
&& clobber(x0) && clobber(x1) && clobber(x2)
-> (MOVWBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)

// The 2 byte store appears after the 4 byte store so that the
// match for the 2 byte store is not done first.
// If the 4 byte store is based on the 2 byte store then there are
// variations on the MOVDaddr subrule that would require additional
// rules to be written.

// 2 byte store Big endian as in:
// b[0] = byte(v >> 8)
// b[1] = byte(v)
(MOVBstore [i1] {s} p w x0:(MOVBstore [i0] {s} p (SRWconst w [8]) mem))
&& !config.BigEndian
&& x0.Uses == 1
&& i1 == i0+1
&& clobber(x0)
-> (MOVHBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)

// 8 byte store Little endian as in:
// b[0] = byte(v)
// b[1] = byte(v >> 8)
Expand All @@ -1015,3 +1201,27 @@
&& i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
-> (MOVDstore [i0] {s} p w mem)

// 8 byte store Big endian as in:
// b[0] = byte(v >> 56)
// b[1] = byte(v >> 48)
// b[2] = byte(v >> 40)
// b[3] = byte(v >> 32)
// b[4] = byte(v >> 24)
// b[5] = byte(v >> 16)
// b[6] = byte(v >> 8)
// b[7] = byte(v)
// Use byte-reverse indexed 8 byte store.
(MOVBstore [i7] {s} p w
x0:(MOVBstore [i6] {s} p (SRDconst w [8])
x1:(MOVBstore [i5] {s} p (SRDconst w [16])
x2:(MOVBstore [i4] {s} p (SRDconst w [24])
x3:(MOVBstore [i3] {s} p (SRDconst w [32])
x4:(MOVBstore [i2] {s} p (SRDconst w [40])
x5:(MOVBstore [i1] {s} p (SRDconst w [48])
x6:(MOVBstore [i0] {s} p (SRDconst w [56]) mem))))))))
&& !config.BigEndian
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1
&& i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
-> (MOVDBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
Loading

0 comments on commit 28edaf4

Please sign in to comment.