Skip to content

Commit

Permalink
cmd/compile: eliminate unnecessary type conversions in TrailingZeros(…
Browse files Browse the repository at this point in the history
…16|8) for arm

This follows CL 156999 which did the same for arm64.

name               old time/op  new time/op  delta
TrailingZeros-4    7.30ns ± 1%  7.30ns ± 0%     ~     (p=0.413 n=9+9)
TrailingZeros8-4   8.32ns ± 0%  7.17ns ± 0%  -13.77%  (p=0.000 n=10+9)
TrailingZeros16-4  8.30ns ± 0%  7.18ns ± 0%  -13.50%  (p=0.000 n=9+10)
TrailingZeros32-4  6.46ns ± 1%  6.47ns ± 1%     ~     (p=0.325 n=10+10)
TrailingZeros64-4  16.3ns ± 0%  16.2ns ± 0%   -0.61%  (p=0.000 n=7+10)

Change-Id: I7e9e1abf7e30d811aa474d272b2824ec7cbbaa98
Reviewed-on: https://go-review.googlesource.com/c/go/+/167797
Run-TryBot: Tobias Klauser <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Cherry Zhang <[email protected]>
  • Loading branch information
tklauser committed Mar 15, 2019
1 parent d9db9e3 commit 156c830
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 5 deletions.
8 changes: 4 additions & 4 deletions src/cmd/compile/internal/gc/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -3288,12 +3288,12 @@ func init() {
y := s.newValue2(ssa.OpOr32, types.Types[TUINT32], x, c)
return s.newValue1(ssa.OpCtz32, types.Types[TINT], y)
},
sys.ARM, sys.MIPS)
sys.MIPS)
addF("math/bits", "TrailingZeros16",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz16, types.Types[TINT], args[0])
},
sys.AMD64, sys.ARM64, sys.Wasm)
sys.AMD64, sys.ARM, sys.ARM64, sys.Wasm)
addF("math/bits", "TrailingZeros16",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
Expand All @@ -3309,12 +3309,12 @@ func init() {
y := s.newValue2(ssa.OpOr32, types.Types[TUINT32], x, c)
return s.newValue1(ssa.OpCtz32, types.Types[TINT], y)
},
sys.ARM, sys.MIPS)
sys.MIPS)
addF("math/bits", "TrailingZeros8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz8, types.Types[TINT], args[0])
},
sys.AMD64, sys.ARM64, sys.Wasm)
sys.AMD64, sys.ARM, sys.ARM64, sys.Wasm)
addF("math/bits", "TrailingZeros8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0])
Expand Down
11 changes: 10 additions & 1 deletion src/cmd/compile/internal/ssa/gen/ARM.rules
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,22 @@

// TODO: optimize this for ARMv5 and ARMv6
(Ctz32NonZero x) -> (Ctz32 x)
(Ctz16NonZero x) -> (Ctz32 x)
(Ctz8NonZero x) -> (Ctz32 x)

// count trailing zero for ARMv5 and ARMv6
// 32 - CLZ(x&-x - 1)
(Ctz32 <t> x) && objabi.GOARM<=6 -> (RSBconst [32] (CLZ <t> (SUBconst <t> (AND <t> x (RSBconst <t> [0] x)) [1])))
(Ctz32 <t> x) && objabi.GOARM<=6 ->
(RSBconst [32] (CLZ <t> (SUBconst <t> (AND <t> x (RSBconst <t> [0] x)) [1])))
(Ctz16 <t> x) && objabi.GOARM<=6 ->
(RSBconst [32] (CLZ <t> (SUBconst <typ.UInt32> (AND <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x) (RSBconst <typ.UInt32> [0] (ORconst <typ.UInt32> [0x10000] x))) [1])))
(Ctz8 <t> x) && objabi.GOARM<=6 ->
(RSBconst [32] (CLZ <t> (SUBconst <typ.UInt32> (AND <typ.UInt32> (ORconst <typ.UInt32> [0x100] x) (RSBconst <typ.UInt32> [0] (ORconst <typ.UInt32> [0x100] x))) [1])))

// count trailing zero for ARMv7
(Ctz32 <t> x) && objabi.GOARM==7 -> (CLZ <t> (RBIT <t> x))
(Ctz16 <t> x) && objabi.GOARM==7 -> (CLZ <t> (RBIT <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
(Ctz8 <t> x) && objabi.GOARM==7 -> (CLZ <t> (RBIT <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))

// bit length
(BitLen32 <t> x) -> (RSBconst [32] (CLZ <t> x))
Expand Down
140 changes: 140 additions & 0 deletions src/cmd/compile/internal/ssa/rewriteARM.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions test/codegen/mathbits.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ func RotateLeftVariable32(n uint32, m int) uint32 {

func TrailingZeros(n uint) int {
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
// arm:"CLZ"
// arm64:"RBIT","CLZ"
// s390x:"FLOGR"
// ppc64:"ANDN","POPCNTD"
Expand All @@ -278,6 +279,7 @@ func TrailingZeros64(n uint64) int {

func TrailingZeros32(n uint32) int {
// amd64:"BTSQ\\t\\$32","BSFQ"
// arm:"CLZ"
// arm64:"RBITW","CLZW"
// s390x:"FLOGR","MOVWZ"
// ppc64:"ANDN","POPCNTW"
Expand All @@ -288,6 +290,7 @@ func TrailingZeros32(n uint32) int {

func TrailingZeros16(n uint16) int {
// amd64:"BSFL","BTSL\\t\\$16"
// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
// s390x:"FLOGR","OR\t\\$65536"
// ppc64:"POPCNTD","OR\\t\\$65536"
Expand All @@ -298,6 +301,7 @@ func TrailingZeros16(n uint16) int {

func TrailingZeros8(n uint8) int {
// amd64:"BSFL","BTSL\\t\\$8"
// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
// s390x:"FLOGR","OR\t\\$256"
// wasm:"I64Ctz"
Expand Down

0 comments on commit 156c830

Please sign in to comment.